#!/usr/bin/env perl
# Using /usr/bin/env above is especially important on the Mac, where
# we don't want to use the system Perl and where the PATH will likely
# contain the appropriate Homebrew or Macports bin directory.

# @(#$Id: rclimg,v 1.5 2008-10-09 06:41:21 dockes Exp $  (C) 2007 Cedric Scott
#######################################################
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
######################################################

#
# Extract image tags with exiftool and convert the data to html for
# recoll indexing.
#

#
# maps image file tags to recoll fields (sort of).
#
$tagMap = {
    'author' => 'author',
    'caption' => 'caption',
    'caption-abstract' => 'caption',
    'comment' => 'description',
    'creator' => 'creator',
    'date' => 'date',
    'from' => 'from',
    'headline' => 'title',
    'keyword' => 'keyword',
    'keywords' => 'keywords',
    'subject' => 'subject',
    'tag' => 'tag',
    'title' => 'title',
};

%options = ('CharsetIPTC' => 'UTF8',
            'DateFormat' => '%Y-%m-%d %H:%M:%S'
    );

# set to non-zero if tags which map to xapian tags are to output
# in the body as well as the header
#
$headAndBody = 1;

# xapianTag
# returns a xapian tag to be used for this tag
#
sub xapianTag {
	my $imgtag = shift;
	while ( ( $tagre, $xapiantag) = each %{$tagMap} ) {
            if ($imgtag =~ /^$tagre$/i) {
		return $xapiantag  ;
            }
	}
	return undef;
}

sub imgTagsToHtml {
    my %defaults = (
        filename => "",
        json => 0,
    );
    my %opts = (%defaults, @_);  # user args override defaults

    my $imageFile = $opts{filename};
    my $output = "";
    $imageFile = '-' if $imageFile eq '';
    $info = ImageInfo($imageFile, \%options);
    return $output unless $info;
    $fields = [];
    $other = [];
    my %hashfields = {}; # All tags and values for json output
    $titleHtmlTag = "";
    foreach $tagname ( sort keys %{$info} ) {
	$xapiantag = xapianTag($tagname);
	if (defined $xapiantag ) {
	    if ($xapiantag eq 'title') {
                if ($titleHtmlTag) {
                    $titleHtmlTag = $titleHtmlTag . " - $info->{$tagname}";
                } else {
                    $titleHtmlTag = $info->{$tagname};
                }
                $hashfields{"completeTitle"} = $titleHtmlTag;
	    } else {
                push @{$fields}, [ $xapiantag, $info->{$tagname} ];
            }
            push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody;
	} else {
	    push @{$other}, [ $tagname, $info->{$tagname} ];
	}
        if (! ($info->{$tagname} =~ "^SCALAR\\(|HASH\\(")) {
            $hashfields{$tagname} = $info->{$tagname};
        }
    }
    if ($opts{"json"}) {
        use JSON;
        my $json = JSON->new->allow_nonref;
        $output = $json->encode(\%hashfields);
    } else {
        $output = "<html>\n<head>\n";
        if ($titleHtmlTag) {
            $output = $output . "<title>" . $titleHtmlTag . "</title>\n";
        }
        $output = $output . 
            "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n";
        foreach $tagpair ( @{$fields} ) {
            ($tagname, $value) = @{$tagpair};
            $output = $output . "<meta name=\"$tagname\" content=\"$value\">\n";
        }
        $output = $output . "</head><body>\n";
        foreach $tagpair (@{$other} ) {
            ($tagname, $value) = @{$tagpair};
            $output = $output . sprintf("%30s : %s<br>\n", $tagname, $value);
        }
        $output = $output . "</body>\n</html>\n";
    }
    return $output;
}


####################################################################
# Code for the rclexecm filter<->indexer protocol from here

# Get one line from stdin (from recollindex), exit on eof
sub readlineorexit {
    my $s = <STDIN>;
    unless ($s) {
        # print STDERR "RCLIMG: EOF\n";
	exit 0;
    }
    return $s
}

# Read one named parameter
sub readparam {
    my $s = readlineorexit();
    if ($s eq "\n") {
        return ("","");
    }
    my @l = split(' ', $s);

    if (scalar(@l) != 2) {
        print STDERR "RCLIMG: bad line:", $s;
	exit 1;
    }
    my $paramname = lc $l[0];
    my $paramsize = $l[1];
    if ($paramsize > 0) {
        my $n = read STDIN, $paramdata, $paramsize;
        if ($n != $paramsize) {
    	    print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
            exit 1;
        }
    }
    # print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n";
    return ($paramname, $paramdata);
}

#
# Main program starts here. Talks the rclexecm protocol
#
use strict;
use Getopt::Std;

# JFD: replaced the "use" call with a runtime load with error checking,
# for compat with the missing filter detection code.
# use Image::ExifTool qw(:Public);
eval {require Image::ExifTool; Image::ExifTool->import(qw(:Public));}; 
if ($@) {
	print "RECFILTERROR HELPERNOTFOUND Perl::Image::ExifTool\n";
	exit(1);
}

my %options=();
getopts("dfj", \%options);

# If we have an argument, we go into command line test mode, no cmdtalk loop.
if ($ARGV[0]) {
    my $json = 0;
    if (defined $options{j}) {
        $json = 1;
    }

    my $data = imgTagsToHtml(
        filename => $ARGV[0],
        json => $json
        );
    if (defined $options{d} || defined $options{j}) {
        print("$data\n");
    }
    exit(0);
}

# Cmdtalk processing loop

binmode(STDIN)      || die "cannot binmode STDIN";
binmode(STDOUT)     || die "cannot binmode STDOUT";
# Force fflush after every write
$| = 1;

while (1) {
    # print STDERR "RCLIMG: waiting for command\n";

    my %params = ();
    # Read at most 10 parameters (we only actually use one), stop at empty line
    for(my $i = 1; $i < 10; $i++) {
        my ($name, $value) = readparam;
        if ($name eq "") {
            last;
	}
        $params{$name} = $value;
    }
    unless (defined $params{"filename:"}) {
        print STDERR "RCLIMG: no filename ??\n";
	# Recoll is requesting next subdocument (it shouldn't cause we 
	# returned eofnext last time), but we have none, just say so:
        print "Eofnow:0\nDocument: 0\n\n";
	next;
    }

    print "Mimetype: 9\ntext/html";
    my $data = imgTagsToHtml(
        filename => $params{"filename:"}
        );
    my $l = length($data);
    print "Document: $l\n";
    # print STDERR "RCLIMG: writing $l bytes of data\n";
    print $data;
    # Say we have no further documents for this file
    print "Eofnext: 0\n";
    # End of output parameters: print empty line
    print "\n";
    # print STDERR "RCLIMG: done writing data\n";
}
#print STDERR "RCLIMG: Exiting\n";
