#!/usr/bin/perl -w

# For full logging, invoke as:
#	TRACE=1 LOG_SHOW_CATEGORY=1 ./torus-sed -d -v -a id/id3636 mkc-torus.indexdata.com/torus2/searchable.test_mike udb=solr-5902 m%categories | cat -v

use 5.008;
use strict;
use warnings;

use Getopt::Long qw(:config no_ignore_case);
use LWP;
#use Log::Any::App;
#use Net::HTTP::Methods::Patch::LogRequest; # Run with TRACE=1 to see HTTP messages
use URI::Escape;
use XML::Simple qw(:strict);

# centos5 RPM packages
require Log::Any::App;
require Net::HTTP::Methods::Patch::LogRequest; # Run with TRACE=1 to see HTTP messages

sub expand_field {
    my($value, $i, $layer1, $layer2) = @_;

    my $sub;
    while ($value =~ /(.*?)\$\{(.*?)\}(.*)/) {
	if ($2 eq "#i") {
	    $sub = $i;
	} else {
	    $sub = $layer1->{$2} || $layer2->{$2} || "";
	}

	$value = $1 . $sub . $3;
    }

    return $value;
}


sub merge_field {
    my($field, @layers) = @_;

    my %hash;
    for (my $i = 0; $i < @layers; $i++) {
	my $layer = $layers[$i];
	my $value = $layer->{$field} || "";
	my @keys = split(/,/, $value);
	foreach my $key (@keys) {
	    $hash{$key} = 1;
	}
    }

    return join(',', sort keys %hash);
}


my($verbose, $dryrun, $safe, $superSafe, $auth);
my $skip = 0;
if (!GetOptions('verbose'       => \$verbose,
		'dryrun'        => \$dryrun,
		'skip|k=i'      => \$skip,
		'safe|s'        => \$safe,
		'super-safe|S'  => \$superSafe,
                'auth|a=s'      => \$auth,
    ) || @ARGV < 3) {
    print STDERR qq[\
Usage: $0 [options] <realmUrl> <query> <field1>=<value1> d%<field2>...
    -v, --verbose                    Verbose: commentate on what is being done
    -d, --dry-run                    Dry run only: show what WOULD be done
    -k, --skip NUM                   Skip the first NUM records (do not edit)
    -s, --safe                       Safe: do not overwrite existing local values
    -S, --super-safe                 Safe: do not override inherited value
    -a, --auth USER/PASS             HTTP Basic authentication with USER and PASS
e.g. $0 -d -a mike/swordfish mkc-torus.indexdata.com/torus2/searchable.test_mike udb=plos_api supported=1 d%authentication
];
    exit 1;
}


my $url = shift @ARGV;
my $query = shift @ARGV;

my $ua = new LWP::UserAgent();
$url =~ s/^http:\/\///; # Remove any leading protocol indicator
my $sourceUrl = 'http://' . $url . '/records/?layers=override,original&query=' . uri_escape($query);
print "fetching '$sourceUrl'\n" if $verbose;
my $req = new HTTP::Request(GET => $sourceUrl);
if ($auth) {
    my($user, $pass) = ($auth =~ /(.*)\/(.*)/);
    $req->authorization_basic("$user", "$pass" );
    print "authenticating with user='$user' pass='$pass'\n" if $verbose;
}

my $res = $ua->request($req);
if ($res->is_error()) {
    die "HTTP GET error ", $res->code(), ": ", $res->message(), "\n", $res->content();
}

my $xml = XMLin($res->content, ForceArray => [ qw(record layer) ], KeyAttr => { layer => 'name' });
print "processing ", $xml->{count}, " records\n" if $verbose;
$| = 1;

my $records = $xml->{record};
for (my $i = 0; $i < @$records; $i++) {
    my $rec = $records->[$i];
    my $overrideLayer = $rec->{layer}->{override};
    my $originalLayer = $rec->{layer}->{original};
    my $id = $overrideLayer->{id};

    print "$i ($id): ";
    if ($i < $skip) {
	print "skipping\n";
	next;
    }

    my $substitutions = '';
    foreach my $kv (@ARGV) {
	my($key, $value) = split('=', $kv, 2);

	if ($safe) {
	    my $old = $overrideLayer->{$key};
	    if ($old) {
		print "old override $key = '$old'\n" if $verbose;
		next;
	    }
	}

	if ($superSafe) {
	    my $old = $originalLayer->{$key};
	    if ($old) {
		print "old original $key = '$old'\n" if $verbose;
		next;
	    }
	}

	if ($value) {
	    my $tmp = expand_field($value, $i, $overrideLayer, $originalLayer);
	    if ($tmp ne $value) {
		print "expanded $key '$value' -> '$tmp'" if $verbose;
		$value = $tmp;
	    }
	    ### Should use XML::Generator, here and elsewhere
	    $substitutions .= "    <$key>" . XML::Simple::escape_value(undef, $value) . "</$key>\n";
	} elsif ($kv =~ /^d%/) {
	    my $field = $kv;
	    $field =~ s/^d%//;
	    $substitutions .= "    <$field gone='yes'/>\n";
	} elsif ($kv =~ /^m%/) {
	    my $field = $kv;
	    $field =~ s/^m%//;
	    my $s = merge_field($field, $originalLayer, $overrideLayer);
	    $substitutions .= "    <$field>" . XML::Simple::escape_value(undef, $s) . "</$field>>\n";
	} else {
	    die "unrecognised substitution '$kv'";
	}
    }

    if ($substitutions eq '') {
	print "no substitutions for record $1 (id=$id), skipping";
	next;
    }

    my $xmlText = qq[
<record type="searchable">
  <layer name="override">
$substitutions  </layer>
</record>
];

    my $fullurl = "http://$url/records/" . uri_escape($id) . "/";
    if ($dryrun) {
	print "PUT $fullurl\n";
	print "$xmlText" if $verbose;
    } else {
	my $req = new HTTP::Request(PUT => $fullurl);
	$req->header('Content-Type' => 'application/xml');
	$req->content($xmlText);

	### duplicate code ahead
	if ($auth) {
	    my($user, $pass) = ($auth =~ /(.*)\/(.*)/);
	    $req->authorization_basic("$user", "$pass" );
	}

	my $res = $ua->request($req);
	# Don't use is_error(), as this thinks 3xx codes are OK
	if ($res->code() !~ /^2/) {
	    die "HTTP PUT error ", $res->code(), ": ", $res->message(), "\n", $res->content();
	}

	print "PUT $fullurl OK\n";
    }
}




=head1 NAME

torus-sed - set fields in the records of a Torus realm

=head1 SYNOPSIS

torus-sed
[
C<-v> | C<--verbose>
]
[
C<-d> | C<--dry-run>
]
[
C<-k> I<num> | C<--skip> I<num>
]
[
C<-s> | C<--safe>
]
[
C<-S> | C<--super-safe>
]
[
C<-a> | C<--auth> I<USER>C</>I<PASS>
]
I<realmUrl>
I<query>
I<instruction>
...

=head1 DESCRIPTION

C<torus-sed>
allows the values of fields in Torus records to be modified. Although
its present capabilities are limited, the intention is that as
necessary it will be extended until it can perform on Torus records all
the same kinds of operations that
C<sed>
can perform on text streams.

It is invoked with the URL of a realm, a query that is used to narrow
down the records within that realm that will be affected, and a series
of one or more instructions.

Instructions can take one of several forms:

=over 4

=item B<assignment>

This is of the form I<field>C<=>I<value>, and sets an override for the
named field to the specified value. Values may contain sequences of
the form C<${>I<name>C<}>, which will be replaced by the value of the
field called I<name> in the present record, whether that value is from
the override layer or inherited from the parent record. The special
fieldname C<#i> (i.e. the sequence C<${#i}>)is recognised, and is
replaced by the record's ordinal number within the set being
processed.

=item B<deletion>

This is of the form C<d%>I<field>, and simply deletes any override for
the named field.

=item B<merging>

This is of the form C<m%>I<field>, and merges together the existing
value in the C<override> layer with that in the C<original> layer,
using the result as the new override. The field is assumed to be made
up of a comma-separated list of values, as is the case for the
C<categories> field for example. The result is a new list containing
all the values from both layers, sorted and deduplicated.

Note that this operation is idempotent: running it multiple times will
leave the records in the same state as running it only once.

=back

=head1 OPTIONS

=over 4

=item C<-v> or C<--verbose>

Emit verbose commentary. Probably of interest only to people
developing or debugging the program.

=item C<-d> or C<--dry-run>

Do not actually make any changes to the records in the Torus realm,
just note what I<would> be done.

=item C<-k num> or C<--skip num>

Skip the first I<num> records. The is useful for re-starting a run
that was broken off part-way through.

=item C<-s> or C<--safe>

Do not replace any existing overrides. When setting a field, proceed
only if that field is empty, or has only a value inherited from the
parent record.

=item C<-S> or C<--super-safe>

Do not replace any existing values, whether locally overridden or
inherited. When setting a field, proceed only if that field is empty.

=item C<-a> I<USER>C</>I<PASS> or C<--auth> I<USER>C</>I<PASS>

Send HTTP Basic authentication with the specified username and
password.

=back

=head1 EXAMPLES

An example invocation:

 $ torus-sed -d -a mike/swordfish mkc-torus.indexdata.com/torus2/searchable.test_mike udb=plos_api supported=1 d%authentication

In the C<test_mike> realm of the MKC Torus,
Find the record whose UDB is C<plos_api>
and show what would be done if setting its C<supported> field to 1 and
deleting the C<authentication> field.
Authenticate onto the Torus using HTTP Basic authentication with
username C<mike> and password C<swordfish>.

=head1 AUTHOR

Copyright (C) 2014 by Index Data Aps E<lt>info@indexdata.comE<gt>

=cut
