var EXPORTED_SYMBOLS = ["NormalizeDate"];
Components.utils.import('resource://indexdata/runtime/Step.js');
Components.utils.import('resource://indexdata/runtime/StepError.js');
Components.utils.import('resource://indexdata/util/xmlHelper.js');
Components.utils.import('resource://indexdata/util/xulHelper.js');
Components.utils.import("resource://indexdata/util/logging.js");
Components.utils.import('resource://indexdata/util/jsonPathHelper.js');

var NormalizeDate = function () {
  this.conf = {
    "in": undefined,
    out: undefined,
    noMatchFail: false, // default value if not overridden by config
      // I think this suffices to make old connectors upgrade gracefully
  };
};
NormalizeDate.prototype = new Step();
NormalizeDate.prototype.constructor = NormalizeDate;

NormalizeDate.prototype.init = function() {};

NormalizeDate.prototype.draw = function(surface) {
    var tabs = xulHelper.tabBox(surface,
                                [ "No configuration", "Configuration" ],
                                { flex: 1 });

    var vbox0 = xmlHelper.appendNode(tabs[0], "vbox", null,
                                     { flex: 1, pack: "center", align: "center" });
    xmlHelper.appendNode(vbox0, "caption", "It Just Works");

    var vbox1 = xmlHelper.appendNode(tabs[1], "vbox", null,
                       { flex: 1, pack: "top", align: "center" });
    xulHelper.jsonPathMapField(vbox1, this, "in", "out",
      {path:"$.output.results[*]", key:"date"},
      {path:"$.output.results[*]", key:"date"});
    xulHelper.checkbox(vbox1, this, "noMatchFail", "Fail if unnormalized?");
};

NormalizeDate.logger = logging.getLogger(); //why?
var logger = logging.getLogger();

NormalizeDate.regexps = [
        // Regexp, then indexes of subfields matching year, month and day
        [ /^(\d{4})$/,                 1, null, null ],	// 1968
        [ /([a-z]+)[ -](\d+),?[ -](\d+)/i, 3, 1, 2 ],	// march-12, 1968
        [ /^([a-z]+) (\d+)$/i,         2, 1, null ],	// March 1968
        // The next one assumes the order MM-DD-YYYY -- no good in England!
        [ /^(\d{2})\/(\d{2})\/(\d{4})$/, 3, 1, 2 ],
        [ /^(\d\d?) ([a-z]+) (\d\d(\d\d)?)/i, 3, 2, 1 ],
        [ /^\s*(\d{4}) ([A-z]{3})\b/, 1, 2, null ],     // 1968 Mar
        [ /^\s*(\d{1,2})[ -]([A-z]{3})[ -](\d{4})\b/, 3, 2, 1 ], // 12-MAR-1968
        [ /^\s*([A-z]+) (\d+)((st)|(nd)|(rd)|(th))[, ]+(\d{4})\b/, 8, 1, 2 ], // Mar 12th 1968
        [ /^\s*(\d{4})-(\d{2})-(\d{2})/, 1,2,3 ],  // ISO date 2014-06-18
        [ /^\s*(\d{4})-(\d{2})/, 1,2,null ],  // ISO date 2014-06
        //[ /^\s*(\d{4})\.(\d{2})\.(\d{2})/, 1,2,3 ],  // 2014.06.18 as on old Indexdata frontpage
                                                     // and our regression test pages
        // And finally, the emergency backup regexp ...
        //      [ /(\d{4})/,                   1, null, null ],
];

NormalizeDate.months = [
        "january",
        "february",
        "march",
        "april",
        "may",
        "june",
        "july",
        "august",
        "september",
        "october",
        "november",
        "december",
];

NormalizeDate.prototype.run = function (task) {
  var context = this;
  var inpath = context.conf['in'] || {path:"$.output.results[*]", key:"date"}
  jsonPathHelper.mapElements(inpath, context.conf['out'],
			     function(s) { return NormalizeDate.normalize.call(context, s) },
			     task.data);
};

/*
 * Strategy: if Date.parse() doesn't work, try matching against a
 * sequence of well-known regular expressions.  When we get the first
 * match, stop: at this point we have the day, month and year in some
 * form.  Then use well-known heuristics to translate months from
 * names to number, to zero-pad each number to the right number of
 * digits, and to glue the resulting chunks together into a known
 * format.
 *
 * A potentially useful refinement would be to allow the connector
 * author to specify one or more regular expressions that are known to
 * work for the back-end in question.  This would be applied before,
 * or perhaps instead of, the standard regexps.
 */
NormalizeDate.normalize = function(s) {
  if (typeof(s) === "undefined") {
    NormalizeDate.logger.warn("Source not found.");
    return undefined;
  }

  var news = s.
	replace(/[,.]+\s+/, " ").
	replace(/^[^0-9a-z]+/i, "");
  if (news !== s) {
      NormalizeDate.logger.debug("transformed '" + s + "' to '" + news + "'");
      s = news;
  }

  if (!s.match(/^\d{4}$/)  &&
       !s.match(/^\d{4}-\d{2}/)  ) {
    // Special case: Date.parse() handles four-digit numbers, but
    // returns not the year but the first day of that year. Avoid
    // xinvoking it for such dates.
    // Another special case: If it looks like an ISO date, starts with
    // YYYY-MM  (day is optional here), we do not use date.parse, because
    // it assumes UTC for ISO dates, and localtime for anything else. Since
    // it defaults to 00:00, time zones can push it to another date.
    // CP-3813
    var ms = Date.parse(s);
    if (ms) {
      var date = new Date(ms);
      NormalizeDate.logger.debug("Date.parse() parsed '" + s + "' to " + ms + " which is " + date);
      //var y = date.getUTCFullYear();
      //var m = date.getUTCMonth()+1; // *sigh*
      //var d = date.getUTCDate();
      var y = date.getFullYear();
      var m = date.getMonth()+1; // *sigh*
      var d = date.getDate();
      return y + "-" + ("00"+m).substr(-2) + "-" + ("00"+d).substr(-2);
    }
  }

  NormalizeDate.logger.debug("Date.parse() can't parse '" + s + "'");
  for (var i = 0; i < NormalizeDate.regexps.length; i++) {
    var ref = NormalizeDate.regexps[i];
    var matches = ref[0].exec(s);
    if (matches) {
      NormalizeDate.logger.debug("'" + s + "' matches " + ref[0]);
      var y = matches[ref[1]];
      var m = matches[ref[2]];
      var d = matches[ref[3]];
      return NormalizeDate.make(s, y, m, d);
    } else {
      NormalizeDate.logger.debug("'" + s + "' does not match " + ref[0]);
    }
  }

  // Out of options: mark the date so it can be reported
  NormalizeDate.logger.error("can't normalise date '" + s + "'");
  if (this.conf.noMatchFail) {
      NormalizeDate.logger.info("noMatchFail true: HARD ERROR");
      throw new StepError("Date '" + s + "' cannot be normalized");
      NormalizeDate.logger.info("This is impossible");
  } else {
      // Fail silently -- just return what we started with
      NormalizeDate.logger.info("noMatchFail false: returning '" + s + "'");
      return s;
  }
}

NormalizeDate.make = function(s, y, m, d) {
  NormalizeDate.logger.debug("make(): y='" + y + "', " + "m='" + m + "', " + "d='" + d + "'");
  if (!(m === undefined) && m.match(/[a-z]/i)) {
    var mlc = m.toLowerCase();
    var j;
    for (var i = 0; i < NormalizeDate.months.length; i++) {
      var mname = NormalizeDate.months[i];
      if (mlc == mname.substr(0, mlc.length)) {
        j = i+1;
        break;
      }
    }
    NormalizeDate.logger.debug("Transformed month '" + m + "' -> '" + j + "'");
    m = j;
  }

  if (y < 100) {
    // Almost certainly a two-digit year: promote heuristically
    // The use of unary plus below is to coerce the year, which is
    // initially a string, into a number so that the binary plus
    // operator does arithmetic rather than concatenation.
    var newy;
    if (y < 40) {
      newy = 2000 + +y;
    } else {
      newy = 1900 + +y;
    }
    NormalizeDate.logger.debug("promoted year '" + y + "' to '" + newy + "'");
    y = newy;
  }

  var m2 = ("00"+m).substr(-2);
  var d2 = ("00"+d).substr(-2);
  if (d) {
    s = y + "-" + m2 + "-" + d2;
  } else if (m) {
    s = y + "-" + m2;
  } else if (y) {
    s = y;
  } else {
    throw new StepError("year not defined for '" + s + "'");
  }

  //dump("\t\treturning '" + s + "'\n");
  return s;
}

NormalizeDate.prototype.getClassName = function () {
  return "NormalizeDate";
};

NormalizeDate.prototype.getDisplayName = function () {
  return "Normalize date";
};

NormalizeDate.prototype.getDescription = function () {
  return "Normalizes dates in the results by checking each against a sequence of known patterns, using the first match, and transforming to ISO format (YYYY-MM-DD).";
};

NormalizeDate.prototype.getVersion = function () {
  return "1.0";
};

NormalizeDate.prototype.renderArgs = function () {
  let src = this.conf.in;
  let dst = this.conf.out;
  src = src ? src.key : "date";
  dst = dst ? dst.key : src;
  return src + "->" + dst;
};

NormalizeDate.prototype.upgrade = function (confVer, curVer, conf) {
  // can't upgrade if the connector is newer than the step
  if (confVer > curVer)
    return false;

  if (confVer < 0.3) {
    jsonPathHelper.upgradePostProc(this.conf);
  }

  return true;
};

NormalizeDate.prototype.unitTest = function () {
    const samples = [
	[ "Mar 12, 1968",				"1968-03-12" ],
	[ "1968/03/12",					"1968-03-12" ],
	[ "Mon, 12 Marc 1968 01:29:00 GMT",		"1968-03-12" ],
	[ "1968",					"1968" ],
	[ "March 12, 1968",				"1968-03-12" ],
	[ "march-12, 1968",				"1968-03-12" ],
	[ "MARCH 12-1968",				"1968-03-12" ],
	[ "Around MAR-12 1968 as far as I remember",	"1968-03-12" ],
	[ "March 1968",					"1968-03" ],
	[ "Mar 1968",					"1968-03" ],
	[ "03/12/1968",					"1968-03-12" ],
	[ "12 Mar 68",					"1968-03-12" ],
	[ "12 Mar. 68",					"1968-03-12" ],
	[ "12 March, 68",				"1968-03-12" ],
	[ "12 Mar., 68",				"1968-03-12" ],
	[ "Winter, 1968",				"1968" ],
	[ ": March 1968",				"1968-03" ],
	[ "March, 1968",				"1968-03" ],
	[ "1968 MAR",					"1968-03" ],
	[ "12-MAR-1968",				"1968-03-12" ],
	[ "Mar 12th 1968",				"1968-03-12" ],
        [ "2014 06 18",                                 "2014-06-18" ],

        // ISO dates, parsed with a regexp. CP-3813
        [ "2014-06-18",                                 "2014-06-18" ],
        [ "1968-03-12",                                 "1968-03-12" ],
        [ "1968-03",                                 "1968-03" ],
        [ "1968-03-12T00:00:00+0200",                                 "1968-03-12" ],

        // This fails, returns 1968-03-11, because the timezone and midnight
        // at least when run in Copenhagen
        //[ "Mar 12, 1968 00:00:00 GMT+1200",           "1968-03-12" ],
    ];

    var ok = true;
    for (var i = 0; i < samples.length; i++) {
	var ref = samples[i];
	var xin = ref[0];
	var xout = ref[1];
	var out = NormalizeDate.normalize.call(this, xin);
	if (out === xout) {
	    logger.info("parsed '" + xin + "' correctly");
	} else {
	    logger.error("parsed '" + xin + "' to '" + out + "' (expected '" + xout + "')");
            ok = false;
	}
    }

    return ok;
};
