#!/usr/bin/perl -w

# To allow access to USI Torus2, use
#	export TORUS2AUTH="usi.indexdata.com:80/USI Admin/id/id3636"
#
# So a typical run of all supported targets in the test realm is:
#	$ TORUS2AUTH="usi.indexdata.com:80/USI Admin/id/id3636" ./test-usi-targets -f "http://id:id3636@usi.indexdata.com/torus2/searchable.production/records/" -s .skip -u http://ebsco2.indexdata.com:9000 http://usi.indexdata.com/torus2/searchable.test

use strict;
use warnings;
use Getopt::Std;
use ZOOM;
use XML::LibXML;
use IO::File;
use LWP::Simple;
use XML::Simple;

use lib '../lib';
use Masterkey::Admin::Dummy;
use Masterkey::Admin::Torus;

#   ###	This is a hacky way to do things.  The following table is more
#	or less the inverse of the list of <fieldmap> directives in
#	the ZOOM-filter definition of
#	ebsco-usi/config/metaproxy/metaproxy-filter-usi.xml
#	It's the same as the table in ../web/htdocs/mc/ztest-inner.mc
our %ccl2cql_index = (
    # No need to handle "term"
    au => "dc.creator",
    ti => "dc.title",
    su => "dc.subject",
    isbn => "bath.isbn",
    issn => "bath.issn",
    date => "[HANDLED SEPARATELY]",
    type => "dc.format",
    jt => "dc.source",
    src => "dc.source",
    publisher => "dc.publisher",
    peerreviewed => "id.peerReviewed",
    fulltext => "id.fullText",
    description => "dc.description",
    language => "dc.language",
    id => "rec.identifier",
    );

my %opts = (l => "", "q" => "cql.allRecords = 1",
	    u => "http://ebsco1.indexdata.com:9000");
if (!getopts('q:s:t:l:u:nf:', \%opts) || @ARGV != 1) {
print STDERR <<__EOT__;
Usage: $0 [-q <query>] [-s <skipFile>] [-t <testFile>] [-l <logLevel>] [-u <url>] [-n] [-f <filterRealm>] <realm>
	-q: query to use within realm [defaults to finding all records]
	-s: specifies a file listing UDBs of targets to skip
	-t: specifies a file listing UDBs of targets to test, skipping others
	-l: sets the specified logging level (e.g. "torus,wsput")
	-u: use the specified USI [$opts{u}]
	-n: no 'Testing N records at URL' message at start of output
	-f: filter targets to only those also in specified <filterRealm>
e.g. $0 -q science -l torus http://usi.indexdata.com/torus2/searchable.test
__EOT__
    exit 1;
}

my %skip = read_file_into_hash($opts{s});
my %test = read_file_into_hash($opts{t});

sub read_file_into_hash {
    my($name) = @_;
    my %hash;

    if ($name) {
	my $f = new IO::File("<$name")
	    or die "$0: can't open list-file $name: $!";
	while (my $udb = <$f>) {
	    chomp($udb);
	    $udb =~ s/#.*//;
	    $udb =~ s/[ \t]+$//;
	    next if $udb eq "";
	    $hash{$udb} = 1;
	}
	$f->close();
    }

    return %hash;
}

my %filter;
if ($opts{f}) {
    my $url = $opts{f};
    warn "filtering on realm $url";
    my $xml = get($url) or die "can't fetch '$url'";
    my $ref = XMLin($xml);
    my $data = $ref->{record};
    foreach my $item (@$data) {
	my $udb = $item->{layer}->{udb};
	$filter{$udb} = 1;
    }
}

my($ws, $realm) = ($ARGV[0] =~ /(.*)\.(.*)/);
my $admin = new Masterkey::Admin::Dummy(logprefix => $0, loglevel => $opts{l});
my $torus = new Masterkey::Admin::Torus($admin, $ws);
my $query = $opts{q};
my $rs = $torus->records($realm, "$query sortby displayName");

$| = 1;
print "Testing ", $rs->alln(), " records at $opts{u}\n" if !$opts{n};

for (my $i = 0; $i < $rs->alln(); $i++) {
    my $record = $rs->record($i);
    my $udb = $record->field("udb");
    my $error;
    if ($skip{$udb} || (%test && !$test{$udb})) {
	$error = "skip";
    } elsif (%filter && !$filter{$udb}) {
	$error = "filtered";
    } else {
	$error = ztest($realm, $record);
    }
    print((defined $error ? $error : "OK") ," [$udb] ", $record->displayName(), "\n");
}


sub ztest {
    my($realm, $record) = @_;

    my $apperror;
    eval {
	$apperror = doit($realm, $record);
    }; if ($@) {
	(my $syserror = $@) =~ s/(.*) at .*/$1/s;
	print STDERR "\n[", $record->field("udb"), "] Z39.50 error: $syserror\n";
	return "autherror" if $syserror =~ /ZOOM error 3 /;
	return "proxyerror" if $syserror =~ /ZOOM error 1074 /;
	return "syserror";
    } elsif (defined $apperror) {
	print STDERR "\n[", $record->field("udb"), "] Application error: $apperror\n";
	return $apperror;
    }

    return undef;
}


sub doit {
    my($realm, $record) = @_;
    my $admin = $record->torus()->admin();

    my $zurl = $opts{u} . "/" . $record->field("udb"). ",realm=$realm";
    my $conn = create ZOOM::Connection();
    $conn->option(sru => "get");
    $conn->connect($zurl);
    $conn->option(timeout => 60);
    maybe_set_attribute($conn, rpnCharset => $record->field("queryEncoding"));

    # Test searching
    my $saved = undef;
    my $nSearchesTried = 0;
    foreach my $field ($record->fields(), "*") {
	my $index = $field;
	my $attrs = undef;
	if ($field eq "*") {
	    last if defined $saved;
	    # Special case: if no searches are configured, try one with no attributes
	    $index = "term";
	} elsif (!($index =~ s/^cclmap_//)) {
	    next;
	} else {
	    $attrs = $record->field($field);
	    next if !defined $attrs || $attrs eq "";
	}

	my $term = ($record->field("testingTerm") ||
		    $admin->configValue("z3950TestingTerm") ||
		    "water");
	my $qstr;
	if ($index eq "term") {
	    $qstr = $term;
	} elsif ($index eq "date") {
	    $qstr = "$term and dc.date=" .
		($record->field("dateTestingTerm") ||
		 $admin->configValue("z3950DateTestingTerm") ||
		 "1998");
	} elsif ($index eq "peerreviewed" || $index eq "fulltext") {
	    $qstr = "$term and " . $ccl2cql_index{$index} . "=1";
	} else {
	    $qstr = $ccl2cql_index{$index} . "=$term";
	}

	my $query = new ZOOM::Query::CQL($qstr, $conn);
	$nSearchesTried++;
	eval {
	    my $oldtime = time();
	    my $rs = $conn->search($query);
	    my $newtime = time();
	    my $secs = $newtime-$oldtime;
	    my $n = $rs->size();
	    if (!defined $saved && $n > 0) {
		$saved = [ $qstr, $n ];
	    }
	    $rs->destroy();
	}; if ($@ && ref $@ && ref $@ eq "ZOOM::Exception" &&
	       ($@->code() == 114 ||
		($@->code() == 3 && $@->addinfo() eq "Task not found") ||
		($@->code() == 16 && $@->diagset() eq "info:srw/diagnostic/1") ||
		($@->code() == 2 && $@->addinfo() =~ /Unsupported index in CQL/))){
	    # Index not supported
	    # The second of these conditions is a stopgap while the CF Engine is broken
	    # The last is for ProQuest's SRU servers, which return non-SRU diagnostics
	} elsif ($@ && ref $@ && ref $@ eq "ZOOM::Exception" &&
		 ($@->code() == 2 && $@->addinfo() =~ /in addition to the indexes/)) {
	    # Index not can not be used on its own, only with others
	} elsif ($@) {
	    die $@;
	}
    }

    if ($nSearchesTried == 0) {
	return "nosearch";
    } elsif (!defined $saved) {
	return "norecords";
    }

    # Test retrieval
    my($qstr, $oldn) = @$saved;
    my $query = new ZOOM::Query::CQL($qstr, $conn);
    my $rs = $conn->search($query);
    my $n = $rs->size();
    if ($n != $oldn && 
	($n == 0 || abs($n-$oldn)/$n > 0.1)) {
	warn "count changed from $oldn to $n ($qstr)";
	return "countchanged";
    }
    maybe_set_attribute($rs, preferredRecordSyntax => $record->field("requestSyntax"));
    maybe_set_attribute($rs, elementSetName => $record->field("elementSet"));
    # There is no similar setting for schema

    my $rec;
    eval {
	$rec = $rs->record(0);
    }; if ($@ && ref $@ && ref $@ eq "ZOOM::Exception" &&
	   ($@->code() == 239)) {
	return "recsyn";
    } elsif ($@) {
	die $@;
    }

    # The record will be in USI's extended-MODS 3.4 format
    my $xmlText = $rec->render();
    my $dom = XML::LibXML->load_xml(string => $xmlText);
    my $root = $dom->documentElement();
    my $xpc = new XML::LibXML::XPathContext($root);
    $xpc->registerNs('m', 'http://www.loc.gov/mods/v3');
    my $title = $xpc->findvalue("/m:mods/m:titleInfo/m:title");
    return "notitle" if !defined $title || $title eq "";
    my @url = $xpc->findnodes("/m:mods/m:location/m:url[\@usage='primary']");
    return "nolink" if @url == 0;

    $rs->destroy();
    $conn->destroy();
    return undef;
}


sub maybe_set_attribute {
    my($obj, $key, $value) = @_;
    $obj->option($key, $value) if defined $value;
}


=head1 NAME

test-usi-targets - test targets defined in a Torus realm using the USI

=head1 SYNOPSIS

test-usi-targets
[
-q
I<query>
]
[
-s
I<skipFile>
]
[
-l
I<logLevel>
]
[
-u
I<url>
]
I<realm>

=head1 DESCRIPTION

C<test-usi-targets>
runs through all the targets described by records in a nominated Torus
realm, invoking a suite of tests on each one by means of the USI
(aka. MasterKey Connect).  It emits a terse report that indicates the
status of each target, one per line, each line in the form:

I<status>
[I<udb>]
I<displayName>

for example:

 OK [galeaone] Academic One File (SRU)
 skip [gale_aone_sru-content] Academic One File (SRU) (CONTENT)
 OK [galebcrc] Business and Company Resource Center TEST
 OK [galecpi] Canadian Periodical Index (SRU)
 syserror [galecd] Computer Database (SRU)

This format is designed for automated comparison.  If the output of an
old run is saved, then the output of a new run can be compared using
diff(1), and it becomes easy to identify targets which have either
stopped or started working in the intervening period.

In addition, more detail about errors is emitted on standard error.
Typically, this is redirected to a separate file, and can be consulted
when trying to debug failing targets.

The following statuses may be generated:

=over 4

=item OK

The target tested correctly.

=item skip

The target was skipped, i.e. it was not tested because its UDB was one
of those listed in a skip-file specified by the B<-s> option.

=item syserror

A system error occurred and an exception was thrown.  Details on
standard error.

=item nosearch

No indexes were configured, so no searching could be attempted.

=item norecords

One or more searches were successfully attempted, but none of them
found any records, so no retrieval could be attempted.

=item countchanged

A search was done that found one or more records, but when it was
retried for the retrieval test, a different number of records were
returned.

=item recsyn

The configured record-syntax was not supported for retrieval, e.g. the
target is said to support XML record but does not.

=item notitle

A record was successfully retrieved, but had no title.

=item nolink

A record was successfully retrieved, but had no link.

=back

=head1 OPTIONS

=over 4

=item -q I<query>

Test targets whose records within the realm match I<query>.  If not
specified, then all records' targets are tested.

=item -s I<skipFile>

If specified, then I<skipFile> is the name of a file that contains the
UDBs of targets, one per line, which are not to be tested.  Targets
that have a UDB in this list are skipped.  This is useful to avoid
testing content connectors and other known-bad targets.

=item -l I<loglevel>

Level of logging to use in the Torus client library.  If specified,
should be a comma-separated list of log-level names, such as

 torusop,torus,config

=item -u I<url>

Address of the USI instance to use for testing.  If not specified,
uses a sensible default which is emitted in the first line of output.

=back

=head1 EXAMPLES

An example invocation:

 $ test-usi-targets -q aone -s .skip -l torusop,torus http://usi.indexdata.com/torus2/searchable.test >output.new 2>error
 $ diff output.old output.new

Test targets that contain the word "aone" (i.e. those pertaining to
Gale's Academic ONEFILE database), skipping those listed in the file
B<.skip> (probably including the content connector
B<gale_aone_sru-content>), logging Torus API operations and their
corresponding Web-Service URLs.  Use the USI "testing" realm as the
source of target information.  Save output in B<output.new> and
detailed diagnostics for failed targets in B<error>.  Compare the
results of this run with those of an earlier run.

=head1 AUTHOR

Copyright (C) 2011 by Index Data Aps E<lt>info@indexdata.comE<gt>

=cut
