#!/usr/bin/perl
# Copyright (c) 2010-2012 Index Data, http://www.indexdata.com
#
# mpstat - dump CF/metaproxy log
#

use Getopt::Long;
use IO::File;
use File::stat;

use strict;
use warnings;

use vars qw($VERSION);
$VERSION = 1.0;

my $debug = 0;
my $help;
my $offset       = 0;
my $offset_store = 0;
my $offset_filename;
my $prefix = "pazpar2-statistic";

sub usage () {
    <<EOF;
usage: $0 [ options ] metaproxy.log ....

--debug=0..2      	debug option, default: $debug
--offset		start at offset in log file, default: $offset
--offset-filename=/path/to/offset.txt		keep offset of last run
--offset-store		select an offset-filename automatically, default: $offset_store
--prefix=name		prefix file name for offset storage, default: $prefix

EOF
}

sub read_offset {
    my $file = shift;

    if ( !-e $file ) {
        warn "offset file $file does not (yet) exists, skip\n" if $debug;
        return 0;
    }

    my $fh = IO::File->new( $file, "r" ) or die "open $file: $!\n";
    my $offset = 0;
    while (<$fh>) {
        chomp;
        $offset = $_;
        last;
    }
    $fh->close;
    return $offset;
}

sub write_offset {
    my $file = shift;
    my $offset = shift || 0;

    warn "open $file and write offset: $offset\n" if $debug;
    my $fh = IO::File->new( $file, "w" ) or die "open > $file: $!\n";
    print $fh $offset;
    $fh->close;
}

#

#
# IndexData metaproxy log file parsing
#
sub print_log {
    my %args = @_;

    my $offset          = $args{'offset'} || 0;
    my $hash_db         = $args{'hash_db'};
    my $files           = $args{'files'};
    my $offset_filename = $args{'offset_filename'};

    my @files = ref $files eq 'ARRAY' ? @$files : $files;
    push @files, 'STDIN' if scalar(@files) <= 0;

# first, parse a search query, extract the ID of the session
# and store the important information in $hash_log
#
# log example:
# 17:09:30-28/08 [log] - tcp:64.34.162.111:51734 51734 0.000000 Z3950 initRequest oa/oa36 - 81 Index Data PazPar2/ZOOM-C/YAZ 1.1.0/3.0.52 e687cb7eb87c841f0d1a374174d51d30371f2d97
# 14:34:54-22/02 [log] - tcp:93.220.79.199:17 17 0.000000 Z3950 searchRequest wikipedia 1 - RPN @attrset Bib-1 berlin
# 08:59:53-10/11 [log] - tcp:147.52.108.98:11609 11629 0.536000 Z3950 searchResponse Failure DIAG 114 21
#

    my $logfile = $files[0];

    my $fh;
    if ( $logfile eq 'STDIN' ) {
        $fh = \*STDIN;
        warn "read data from $fh\n" if $debug;
    }

    else {
        warn "open $logfile\n" if $debug;
        $fh = IO::File->new( $logfile, "r" ) or die "open $logfile: $!\n";
        if ( $offset > 0 || $offset_filename ) {
            if ( scalar(@files) == 1 ) {
                $offset = read_offset($offset_filename) if $offset_filename;
                my $st = stat($logfile);

                if ( defined $st && $offset > 0 && $offset > $st->size ) {
                    warn "Offset $offset is larger than file size ",
                      $st->size, "for file $logfile, ignored.\n",
"Maybe you run a different file, or the file was log rotated?\n";
                }
                else {
                    warn "Start at offset: $offset\n" if $debug;
                    seek( $fh, $offset, 0 );
                    warn "Seek to end of file, no new data\n"
                      if $debug && $offset == $st->size;
                }

            }

            else {
                warn
                  "Ignore offset because more than one filename is given: ",
                  join( ", ", @files ), "\n";
            }
        }
    }

    while (<$fh>) {
        print;
    }

    # record offset for next run
    if ( $offset_filename && scalar(@files) == 1 ) {
        my $st = stat($logfile) or die "stat: $logfile\n";
        write_offset( $offset_filename, $st->size );
    }
}

# generate an filename to keep the offset automatically
sub get_offset_filename {
    my $uid  = $<;
    my $file = "/var/tmp/$prefix-offset-$uid.txt";

    return $file;
}

######################################################################
#
# main
#

GetOptions(
    "help"              => \$help,
    "debug=i"           => \$debug,
    "offset=i"          => \$offset,
    "offset-filename=s" => \$offset_filename,
    "prefix=s"          => \$prefix,
    "offset-store"      => \$offset_store,
) or die usage;

die &usage if $help;

if ($offset_store) {
    $offset_filename = get_offset_filename;
}

print_log(
    'offset'          => $offset,
    'files'           => \@ARGV,
    'offset_filename' => $offset_filename
);

1;
