var EXPORTED_SYMBOLS = ["XpatternMaker"];

// Produce a xpattern that matches a given set of Dom nodes
//
// This is kind of related to the job of matching a pattern,
// except that the "pattern" we match is just a selection of 
// nodes, and the result is a path through the DOM tree that
// covers all the nodes, expressed as an Xpattern node tree

// TODO: Better handling of repeating nodes!
// for example, if we have marked a B node as a target, and told
// that it can repeat, and the dom tree has a sequence of B's
// followed by a A, try not to create a pattern like
//  B + : B : B : B : A
// Instead, figure that those B's are repeats and should be covered
// by the B+, and come up with
//  B+ : A
// Even this is not always right, but it is a better guess
// Most important for repeating groups!

// TODO: Regrouping patterns like
//  ... : TABLE { TR { ( TD : TD : TD ) + } } : ...
// into
//  ... : TABLE { ( TR { TD : TD : TD } ) + } : ...
// if that produces better matches on the page
// But not (necessarily?) into
// ... ( TABLE {  TR { TD : TD : TD } } ) + : ...
// as the most likely case is that the table row repeats.
// Do not assume what is most likely, try them all.
// Perhaps this should be in a separate module?

Components.utils.import('resource://indexdata/util/xpattern.js');
Components.utils.import('resource://indexdata/util/xpatternText.js');
Components.utils.import('resource://indexdata/util/xmlHelper.js');
Components.utils.import('resource://indexdata/util/logging.js');
var logger = logging.getLogger();
var debug = false;  // Produces a very detailed dump of the process
//debug = true;


// constructor. Takes a Dom node that indicates the area to start from
// Often /html/body.  
function XpatternMaker( startnode ) {
    this.startnode = startnode;
    this.targets = [];  // contains target nodes, see addNode below
    this.groups = [];  // contains groups, see addGroup below.
                      // Note, first group is in [1], since 0 means no group
    this.nextgroupnumber = 1;
}

// Add a node to be included in the pattern
XpatternMaker.prototype.addNode =
    function (node, variable, cardinality, attrs, group, modifiers ) {
      var targ = { "node": node,
              "variable": variable,
              "cardinality": cardinality,
              "attr" : attrs,
              "group" : group,
              "modifiers" : modifiers,
      };
      targ.seen = false;
      this.targets.push(targ);
};

// Add a group definition
// Normally gets the next group number, and returns that. But can be called
// with a number, like in the unit tests.
XpatternMaker.prototype.addGroup =
    function (node, variable, cardinality, attrs, grouptype, parent, groupnum ) {
      if (typeof(groupnum) == "undefined" )  
        groupnum = this.nextgroupnumber++;
      var grp = { "node": node,
              "variable": variable,
              "cardinality": cardinality,
              "attr" : attrs,
              "group" : groupnum,
              "grouptype": grouptype,
              "parent" : parent };
      this.groups[groupnum] = grp;
      return groupnum;
};

XpatternMaker.prototype._clearseen = function () {
    for ( targ in this.targets ) {
        this.targets[targ].seen = false;
    }
};

// Check that all targets have been seen
XpatternMaker.prototype._allseen = function () {
  for ( targ in this.targets ) {
    var tg = this.targets[targ];
    if ( ! tg.seen )
      return false;
  }
  return true;
};

// Return the parent group (if nested)
// returns 0 for non-nested groups, or if not in a group at all
XpatternMaker.prototype._parentgroup = function (g) {
  if ( g != 0 && this.groups[g] ) {
    return this.groups[g].parent;
  } else {
    return 0;
  }
};


// dump all targets and groups for debugging
XpatternMaker.prototype.dumpTargets = function () {
    if (!debug)
      return;
    for ( targ in this.targets ) {
      var tg = this.targets[targ];
      var nt = "(null)";
      var nam = "";
      if (tg.node) {
        nam = tg.node.localName || tg.node.nodeName;
      }
      var mods = " -none";
      if ( tg.modifiers )
        mods = " -" + tg.modifiers.join( " -" );
      logger.debug("xpmk_dump: target " + targ + ":" + nam + " " +
          "$" + tg.variable + " " + tg.cardinality +
          " (" + tg.group + ") " + mods );
    }
    for ( group in this.groups ) {
      var gp = this.groups[group];
      logger.debug("xpmk_dump: group " + group + ":" + gp.parent +
          " $" + gp.variable + " " + gp.cardinality );
    }
}


// Generate the pattern

XpatternMaker.prototype.getPattern = function () {
    this._clearseen;
    logger.debug("xpmk: Starting to make a pattern");
    this.dumpTargets();
    var res = this._findmatch(this.startnode);
    if (debug && res)
      logger.debug("xpmk: Raw pattern " + res.dumpString(-1) );
    if (res)
      res = this._fixgroups(res);
    if (debug)
      logger.debug("xpmk: Done " );
    return res;
};

// Helper to set the variable, cardinality, and attribute(s)
// of an xpattern node
XpatternMaker.prototype._setxpfields = function (xp, v,c,a,m) {
    if (v && v != "none" ) {
      xp.setVariable(v);
    }
    if (c) {
      xp.setCardinality(c);
    }
    if (m) {
      for ( var mod in m ) {
        var ok = xp.setModifier(m[mod]);
        if ( !ok ) // Should never happen
          logger.warn("Unknown modifier '" + m + "' for " + v );
      }
    }
    if (a) {
      var parser = new XpatternTextParser(a);
      parser.parseAttr(xp);
    }
};

// Make a pattern node out of one dom node (and a target?)
XpatternMaker.prototype._nodepattern = function (nType, tg) {
    var xp = new Xpattern();
    xp.setType(nType);
    if (tg) {
      tg.seen=true;
      this._setxpfields(xp, tg.variable, tg.cardinality, tg.attr, tg.modifiers);
      //if (debug)
      //  logger.debug( "xpmk: nodepatt: Found node " + nType +
      //     " group " + tg.group );
      xp.group = tg.group; // remember for later use
    } else {
      xp.group = 0;
    }
    return xp;
};

// Make a pattern of one group target
XpatternMaker.prototype._grouppattern = function (groupno) {
    var xp = new Xpattern();
    var grp = this.groups[groupno];
    xp.setType( "(" );  // also for or-bags, we need a containing group
    if ( !grp ) {
      logger.debug("xpmk_groupattern: no group " + groupno +" ###" );
      return xp; //### Should not happen!
    }
    this._setxpfields(xp, grp.variable, grp.cardinality, grp.attr, []);
    return xp;
};


// Check if a single node matches a target dom node
XpatternMaker.prototype._nodematch = function (n) {
    var tg = null;
    for ( var targ in this.targets ) {
        if ( this.targets[targ].node === n ) { 
            tg = this.targets[targ];
        }
    }
    return tg;
};

// Scan through the dom tree, looking for the target nodes.
// If it finds the first target, but fails to find all targets
// after/under that node, tries higher up in the dom tree.
XpatternMaker.prototype._findmatch = function (n) {
    if (n == null) 
        return null;
    var retrycount = 0;
    var tg = this._nodematch(n);    
    if (tg != null) { // this node matched
        if (debug)
            logger.debug("xpmk_fi: node " + n.nodeName + " $" + tg.variable +
               " matched in _findmatch");
        var sp = null;
        do { // Check that all target nodes were found here.
             // If not, try one level higher up, or same place, with
             // newly added targets (if group failed)
            if (debug) {
              logger.debug("xpmk_fi: Checking node at ===== " );
              logger.debug(xmlHelper.serializexml(n) );
            }
            this._clearseen();
            sp = this._followmatch(n);
            if (debug)
              logger.debug("xpmk_fi: after follow: "+
                  " sp=" + sp );
            if ( n == this.startnode ) {
                return null;
            }
            n = n.parentNode;
            retrycount++;
        } while ( !this._allseen() );
        if (sp != null) {
            if (debug)
              logger.debug("xpmk_fi: Found a pattern after " + (retrycount-1) +
                " retries");
            return sp;
        }
    } else { // node didn't match
        var cp = this._findmatch(n.firstChild);
        if (cp != null)  { // but its children did
            return cp;
        }
        sp = this._findmatch(n.nextSibling);
        // or it s siblings did
        return sp; // null or not
    }
    return null;
};



// This is the old version of the _followmatch without the groups
// It is left here as documentation, as the flow can be seen so much
// easier here.
//XpatternMaker.prototype._TRIVIAL_followmatch = function (n) {
XpatternMaker.prototype._followmatch = function (n) {
  if (n == null) {
    return null;
  }
  while ( n.nodeName == "#text" || n.nodeName == "#comment"  ){
        // Could be done with recursion, but on long pages with many
        // such nodes, may lead to "too much recursion".
    n = n.nextSibling;
    if ( n == null ) {
      return null;
    }
  }
  if (debug)
    logger.debug("xpmk_fo: " + n.nodeName );
  var sp = this._followmatch(n.nextSibling);
  var cp = this._followmatch(n.firstChild);
  var tg = this._nodematch(n);
  if ( sp != null || tg != null || cp != null ) {
    var nn = n.localName || n.nodeName
    var xp = this._nodepattern(nn,tg);
    xp.nextSibling = sp; // null or not, doesn't matter
    xp.firstChild = cp;
    return xp;
  }
  return null;
};

//////////////////////////////
// Fixing the groups
// Takes a ungrouped pattern, and moves things inside groups where needed
// iterates the groups in reverse order, so inner groups get done before
// outer ones.
XpatternMaker.prototype._fixgroups = function(patt) {
  // create a dummy node, and put the pattern as its child,
  // so we can handle the beginning of a pattern the same way as
  // any childnode
  var dummyroot = new Xpattern();
  dummyroot.firstChild = patt;
  dummyroot.setType("DummyRoot");
  var n = this.groups.length-1;
  while( n > 0 ) {
    var ok = this._fixonegroup(patt, dummyroot, null, n);
    if (debug)
      logger.debug("After fixing group " + n + " ok=" + ok + " " +
         dummyroot.dumpString(-1) );
      if ( !ok )
        return null; // impossible combination
    n--;
  }
  // Clean up the pattern.
  // Make some TR nodes repeatable
  this._fixRepeatedTr(dummyroot.firstChild);
  
  // Trim surrounding nodes that could be part of an or-bag
  // f.ex the first br in A : BR : ( B $b | U $u | BR )+ : BR
  n = 0;
  while(  this._mergeOrBagPrecedents(dummyroot.firstChild, dummyroot, null) ) {
    n++;  // try again if we found something to merge, there might more than one
    logger.debug("After merging or-bag precedent " + n + " " + dummyroot.dumpString(-1) );
    // (note, this loop only writes debug, the work is done in the condition)
  }
  return dummyroot.firstChild;
};


// Helper to see if a pattern node is in a given group
// Also true, if it is inside a group that... is inside the given group.
// unknown tells how to view pattern nodes without a group.
XpatternMaker.prototype._ingroup = function (xp, groupno, unknown) {
  //logger.debug("_ingroup: Starting " + xp.group + " against " + groupno +
  //   " unknown=" + unknown );
  if ( !xp.group && ! xp.variable )  // not in a group
    return unknown;
  var g = xp.group;
  while (g) {
    //logger.debug("_ingroup: Checking " + g + " against " + groupno +
    //   " (g.p=" + this.groups[g].parent + ")" );
    if ( g == groupno )
      return true;
    g = this.groups[g].parent;
  }
  return false;
}; // isingroup


// Helper to see if a subtree contains a node that is in the given group
XpatternMaker.prototype._containsGroup = function (xp, group) {
  if (!xp)
    return false;
  if ( this._ingroup(xp,group,false) )
    return true;
  return this._containsGroup(xp.firstChild, group) ||
         this._containsGroup(xp.nextSibling, group);
}; // _containsGroup

// Helper to check a (newly created) group, to make sure that all
// nodes in it can indeed be parts of that group. That is, check that we
// do not meet a node that has a $.variable but no group number other
// than the given one (or its (grand..)parent
XpatternMaker.prototype._canallbeingroup = function (xp, group) {
  if (!xp)
    return true;  // reached the end of a chain, ok
  var ok = this._ingroup(xp,group,true) &&
           this._canallbeingroup(xp.firstChild, group) &&
           this._canallbeingroup(xp.nextSibling, group) &&
           this._canallbeingroup(xp.nextAlternative, group);
  if(debug)
    logger.debug("xpmk:can " + xp.dumpString(-2) +
      " xp.g=" + xp.group + " looking for " + group +
      " ingroup=" + this._ingroup(xp,group,true) +
      " result=" + ok );
  return ok;
}; // _containsGroup

// Fix one group
// Gets the node, its parent and previousSibling, so we can move the
// node into a group and update firstChild/nextSibling pointers.
XpatternMaker.prototype._fixonegroup = function(xp, parent, prev, groupno) {
  var childhasit = this._containsGroup(xp.firstChild, groupno);
  var nexthasit = this._containsGroup(xp.nextSibling, groupno );
  var thishasit = this._ingroup(xp, groupno, false);
  if (debug) 
    logger.debug("xpmk_fix: Looking for " + groupno +
        " at " + xp.dumpString(-2) +
        " t=" + thishasit + " n=" + nexthasit + " c=" + childhasit );
  if ( thishasit || ( childhasit && nexthasit ) ){
    // group must start at this node
    return this._makegroup(xp, parent, prev, groupno);
  } else {
    if ( childhasit ) {
      return this._fixonegroup( xp.firstChild, xp, null, groupno );
    } else if ( nexthasit ) {
      return this._fixonegroup( xp.nextSibling, parent, xp, groupno);
    }
  }
  return true; // Can happen if we have a group with nothing in it.
    // That is not really correct, but we can accept the rest of the
    // pattern. No need to bail out on that.
}; // _fixonegroup

// Turn a node into a group
// two cases: If we have prev, we can put the group into its nextSibling
// otherwise we need to put into the firstChild of the parent.
// In any case, we follow the nextSibling of the node and move things into
// the group as long as they belong in it.
XpatternMaker.prototype._makegroup = function(xp, parent, prev, groupno) {
  var group = this._grouppattern(groupno);
  group.group = groupno;
  if ( prev ) { // insert group in middle of list
    group.nextSibling = prev.nextSibling;
    prev.nextSibling = group;
    if (debug)
      logger.debug("xpmk_mk: Inserting a group after " + prev.dumpString(-2) );
  } else { // insert group as the first child of parent
    group.nextSibling = parent.firstChild;
    parent.firstChild = group;
    if (debug)
      logger.debug("xpmk_mk: Inserting a group as first child of " +
         parent.dumpString(-2) );
  }

  // Now we have a (empty) group node in the right place. Its nextSibling
  // is known to belong to the group, so move it there.
  group.firstChild = group.nextSibling;
  group.nextSibling = group.firstChild.nextSibling; // may be null.
  group.firstChild.nextSibling = null;
  // And then move siblings until all that belongs to the group is there
  prev = group.firstChild;
  while ( this._containsGroup(group.nextSibling, groupno) ) {
    prev.nextSibling = group.nextSibling;
    group.nextSibling = group.nextSibling.nextSibling;
    prev = prev.nextSibling;
    prev.nextSibling = null;
  }
  var ok = this._canallbeingroup(group.firstChild,groupno);
  if (debug)
    logger.debug("xpmk_mk: Check for " + group.dumpString(-1) +
      " returns " + ok );
  // convert into a OR-bag if that's what we need
  var seen = {}; // deduplicate
  if ( ok && this.groups[groupno].grouptype == "|" ) { 
    var ornode = new Xpattern();  // the or-node inside the group
    ornode.setType( "|" );
    ornode.firstChild = group.firstChild;
    group.firstChild = ornode;
    // Turn all children into alternatives
    var child = ornode.firstChild;
    var prev = null;
    var tail = [];
    while ( child ) {
      var nodestr = child.dumpString(-2);  // this node only
      // Can't use -1 here, it dumps child's nextSibling chain too
      if ( child.firstChild )
        nodestr += " { " + child.firstChild.dumpString(-1) + " }"; // and all children
      if(debug) 
        logger.debug("xpmk_mk_or: '" + nodestr + "' " +
           "p=" + child.isPlainNode() + " pv=" + child.isPlainVarNode() );
      if ( !seen[nodestr] ) {
        if ( ! child.isPlainNode() ) {  // regular pattern node
          child.nextAlternative = child.nextSibling;
          child.nextSibling = null;
          prev = child;
          child = child.nextAlternative;
          seen[nodestr] = 1;
          var plain = nodestr.replace ( /\$\w+\s*/, "" ); // remove first $var
          logger.debug("xpmk_mk_or:   plain: '" + plain + "'" );
          seen[plain] = 1;
        } else { // a plain node, collect into tail
          logger.debug("xpmk_mk_or:  collected tail '" + nodestr + "'" );
          tail.push(child);
          seen[nodestr] = 2;
          tailend = child;
          prev.nextAlternative = child.nextSibling;
          child = child.nextSibling;
        }
      } else { // seen it before, skip it
        prev.nextAlternative = child.nextSibling;
        child = child.nextSibling;
        logger.debug("xpmk_mk_or:   skipping '" + nodestr + "': " + seen[nodestr] );
      }
    }
    for ( var i=0; i<tail.length; i++){
      tail[i].nextSibling = null;
      logger.debug("xpmk_mk_or:  Appending tail " + tail[i].dumpString(-1) );
      prev.nextAlternative = tail[i];
      prev = prev.nextAlternative;
      prev.nextAlternative = null;
    }
  }
  return ok;
}; // _makegroup

// Recurse through the pattern, looking for or-bags that can are preceded
// or followed by things that are already included in the or-bag. Remove
// such nodes, the or-bag will handle them. For example the first and
// last BR in
//   A : BR : ( U $u | B $b | BR ) : BR
// Returns true if something removed, in which case it should be called again

XpatternMaker.prototype._mergeOrBagPrecedents = function(node, parent, prev) {
  while (node) {
    if ( this._mergeCandidate(node) ) {
      if ( parent ) {
        parent.firstChild = node.nextSibling;
        logger.debug("xpmk_merge: Merged firstChild " + parent.dumpString(-1) );
      } else {
        prev.nextSibling = node.nextSibling;
        logger.debug("xpmk_merge: Merged nextSibling " + prev.dumpString(-1) );
      }
      return true;
    } else if (this._tailmergeCandidate(node)) {
      node.nextSibling = node.nextSibling.nextSibling;
      logger.debug("xpmk_merge: Tail merge " + node.dumpString(-1) );
    } else { // not removed, iterate further
      if ( this._mergeOrBagPrecedents(node.firstChild, node, null) )
        return true;
      prev = node;
      parent = null;
      node = node.nextSibling;
    } // not removed
  } // node loop
  return false;
}; // _mergeOrBagPrecedents

// Check if one node is already contained in the following ornode
XpatternMaker.prototype._mergeCandidate = function(node) {
  if ( ! node.isPlainNode() )
    return false; // only plain nodes count
  if ( ! ( node.nextSibling && node.nextSibling.nodeType == "(" ) )
    return false; // not followed by a group node
  var ornode = node.nextSibling.firstChild;
  if ( ! ( ornode && ornode.nodeType == "|" ) )
    return false; // not an or-bag
  var alt = ornode.firstChild;
  while (alt) {
    if ( alt.nodeType == node.nodeType ) {
      if ( ! alt.isPlainNode() )
        return false; // disregard alts with variables, etc
      return true; // can not find any more objections
    }
    alt = alt.nextAlternative;
  }
  return false; // didn't find anything similar
}

// Check if we have an or-bag that is followed by a node
// that is also contained in the or-bag
XpatternMaker.prototype._tailmergeCandidate = function(node) {
  if ( node.nodeType != "(" )
    return false;
  var ornode = node.firstChild;
  if ( ! ( ornode && ornode.nodeType == "|" ) )
    return false; // not an or-bag, must be a regular group
  if ( ! node.nextSibling )
    return false; // nothing after it
  var alt = ornode.firstChild;
  while (alt) {
    if ( alt.nodeType == node.nextSibling.nodeType ) {
      if ( ! alt.isPlainNode() )
        return false; // disregard alts with variables, etc
      return true; // can not find any more objections
    }
    alt = alt.nextAlternative;
  }
  return false;
}

// Helper to check if a node contains only repeatable children
// in those nodes that the user has explicitly defined
XpatternMaker.prototype._allrepeatable = function(n) {
  while ( n ) {
    if ( n.nodeType == "(" || n.nodeType == "|" ) { // group node
      if ( ! n.repeatable )
        return false; // if a group is repeatable, don't look at children
    } else { // regular node
      if ( n.variable && ! n.repeatable )
        return false;
      if ( !this._allrepeatable(n.firstChild) )
        return false;
    }
    n = n.nextSibling;
  }
  return true;
}

// Recurse through the pattern, and make some TR nodes
// repeatable. Only if
//  - all children of the TR are repeatable
//  - the TR is not followed by another TR 
XpatternMaker.prototype._fixRepeatedTr = function(node) {
  var ctx = this;
  node.foreachnode( function(n) {
    if ( n.nodeType.toUpperCase() != "TR" )
      return;
    if ( n.repeatable )
      return; // already repeating, no point in checking
    if ( ! n.firstChild )
      return; // a plain empty TR
    if ( ! ctx._allrepeatable(n.firstChild) )
      return; // non-repeatable user-defined nodes somewhere
    var nxt = n.nextSibling;
    if ( nxt && nxt.nodeType.toUpperCase() == "TR" &&
          !nxt.isPlainNode() ) 
      return; // pattern looks in the next TR already
    // If we get so far, we have a TR that should be repeatable
    n.repeatable = true;
    logger.debug("xpmk_trfix: Found a TR that should repeat!");
    // remove following simple TR nodes from pattern
    while ( n.nextSibling && 
            n.nextSibling.nodeType.toUpperCase() == "TR" &&
            n.nextSibling.isPlainNode() ) {
      n.nextSibling = n.nextSibling.nextSibling;
      logger.debug("xpmk_trfix: Removed following plain TR");
    }
  });
}

///////////////////////////////
// Unit tests

XpatternMaker.prototype.unitTest = function ( ) {

  const testpage =
    "<body>" +
    "<h1>Test page for XPatternMaker</h1>" +
    "<div>" +    // div 1 - linear list of tags
      "<A href='http://indexdata.com'>Title</A><br/>"+
      "<U>Author</U><br/>" +
      "<B>Publisher</B><br/>" +
      "<I>Description</I><br/>" +
    "</div>"+
    "<div>" +    // div 2 - table 
      "<A href='http://indexdata.com'>Title</A><br/>"+
      "<TABLE>"+
        "<TR>"+
          "<TD>Author</TD>" +
          "<TD>Publisher</TD>" +
          "<TD>Description</TD>" +
        "</TR>"+
        "<TR>"+
          "<TD>Publisher2</TD>" +
        "</TR>"+
        "<TR>"+
          "<TD>Description2</TD>" +
        "</TR>"+
      "</TABLE>"+
    "</div>"+
    "<div>" + // div 3 - test for CP-3412, optimized or-bags
      "<H4>Title</H4>" +
      "<U>First Author </U>" +
      "<U>Second Author</U>" + 
      "<B>Publisher</B>" +
    "</div>"+
    "<div>" + // div 4 - another test for CP-3412, optimized or-bags
      "<TR>" +
      "<TD><U>First Author </U></TD>" +
      "<TD><U>Second Author</U></TD>" +
      "<TD><B>Publisher</B></TD>" +
      "</TR>" +
    "</div>"+
    "<div>" + // div 5 - for the TR fixes
      "<I>Title</I>" +
      "<table>" +
      "<TR><TD><U>First Author </U></TD></TR>" +
      "<TR><TD><U>Second Author</U></TD></TR>" +
      "<TR><TD><B>Publisher</B></TD></TR>" +
      "</table>" +
    "</div>"+
    "</body>";

  const tests = [
    { name: "trivial",
      targets: [ { p:"//body/div[1]/A", v: "title" } ],
      ex: "A $title",
    },
    { name: "linear-1",
      targets: [ { p:"//body/div[1]/A", v: "title" },
                 { p:"//body/div[1]/U", v: "author" }  ],
      ex: "A $title : br : U $author",
    },
    { name: "linear-2",
      targets: [ { p:"//body/div[1]/A", v: "title" },
                 { p:"//body/div[1]/B", v: "publisher" }  ],
      ex: "A $title : br : U : br : B $publisher",
    },
    { name: "linear-3-backwards",
      targets: [ { p:"//body/div[1]/B", v: "publisher" },
                 { p:"//body/div[1]/A", v: "title" }  ],
      ex: "A $title : br : U : br : B $publisher",
    },
    { name: "Table-row-1",
      targets: [
        { p:"//body/div[2]/TABLE/TR/TD[1]", v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", v: "publisher" }
      ],
      ex: "TD $author : TD $publisher",
    },
    { name: "Table-row-2",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", v: "publisher" }
      ],
      ex: "A $title : br : TABLE  { TR  { TD $author : TD $publisher } }",
    },
    { name: "Table-1",
      targets: [
        { p:"//body/div[2]/TABLE/TR[1]/TD[1]", v: "author" },
        { p:"//body/div[2]/TABLE/TR[2]/TD[1]", v: "publisher" }
      ],
      ex: "TR { TD $author } : TR  { TD $publisher }",
    },
    { name: "Table-2",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR[1]/TD[1]", v: "author" },
        { p:"//body/div[2]/TABLE/TR[2]/TD[1]", v: "publisher" }
      ],
      ex: "A $title : br : TABLE  { TR  { TD $author } : TR  { TD $publisher } }",
    },
    { name: "Group-1",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( TD $author : TD $publisher )} }",
    },
    { name: "Group-2",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:0, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( TD $author : TD $publisher ): TD $description } }",
    },
    { name: "Group-3",
      targets: [
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR[1]/TD", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR[2]/TD", group:1, v: "publisher" },
      ],
      ex: "??",
      fail: "CP-3375. Can not back up to the table level to find tr[2]/TD",
    },
    { name: "Group-4",
      targets: [
        { p:"//body/div[2]/A", group:1, parent:0 },
        { p:"//body/div[2]/A", group:1, v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
      ],
      ex: "( A $title : br : TABLE  { TR  { TD $author : TD $publisher } } )",
    },
    { name: "Bad-Group-1",
      // Can not have the description outside the group, if title is in it,
      // that forces the whole table into the group
      targets: [
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/A", group:1, v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:0, v: "description" },
      ],
      ex: "",
    },
    { name: "TwoGroup-1",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( TD $author ): TD : ( TD $description )} }",
    },
    { name: "TwoGroup-2",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( TD $author : TD $publisher ): ( TD $description )} }",
    },
    { name: "TwoGroup-Bad",
      // Can not have group-2 inside group-1, when they are not nested
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:1, v: "description" },
      ],
      ex: "",
    },
    { name: "Nested-1",
      // Now group-2 is nested in group1, so this is possible.
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, parent:1 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:1, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( TD $author : ( TD $publisher ): TD $description )} }",
    },
    { name: "Nested-2",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, parent:1 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:2, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:1, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( ( TD $author : TD $publisher ): TD $description )} }",
    },
    { name: "Nested-3",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, parent:1 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( TD $author : ( TD $publisher : TD $description ))} }",
    },
    { name: "Nested-4",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, parent:1 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:2, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR  { ( ( TD $author : TD $publisher : TD $description ))} }",
    },
    { name: "Nested-Bad",
      // Group-2 can not be discontinuous
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0 },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:2, parent:1 },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:2, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:2, v: "description" },
      ],
      ex: "",
    },
    { name: "Orbag-1",
      targets: [
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:1, v: "description" },
      ],
      ex: "( TD $author  | TD $publisher  | TD $description ) +",
    },
    { name: "Orbag-2",
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR/TD[2]", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR/TD[3]", group:1, v: "description" },
      ],
      ex: "A $title : br : TABLE  { TR + { ( TD $author  | TD $publisher  | TD $description ) +} }",
    },
    { name: "Orbag-3",
      // Check that the repeated BR tags only come once in the list
      targets: [
        { p:"//body/div[1]/A", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[1]/A", v: "title", group:1 },
        { p:"//body/div[1]/B", v: "publisher", group:1 }
      ],
      //ex: "( A $title  | br  | U  | B $publisher ) +",
      ex: "( A $title | B $publisher | br | U ) +",
    },
    { name: "Orbag-4",
      // Match the table rows instead
      targets: [
        { p:"//body/div[2]/A", v: "title" },
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[2]/TABLE/TR[1]/TD", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR[2]/TD", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR[3]/TD", group:1, v: "description" },
      ],
      ex: "A $title : br : TABLE  { ( TR  { TD $author }  | TR  { TD $publisher }  | TR  { TD $description } ) +}",
    },
    { name: "Orbag-5",
      // Match the table rows instead
      targets: [
        { p:"//body/div[2]/TABLE/TR/TD[1]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[2]/TABLE/TR[1]/TD", group:1, v: "author" },
        { p:"//body/div[2]/TABLE/TR[2]/TD", group:1, v: "publisher" },
        { p:"//body/div[2]/TABLE/TR[3]/TD", group:1, v: "description" },
      ],
      ex: "( TR  { TD $author }  | TR  { TD $publisher }  | TR  { TD $description } ) +",
    },
    { name: "Orbag-6",
      // Test case for CP-3412, should not get confused about the second U
      targets: [
        { p:"//body/div[3]/H4", v: "title" },
        { p:"//body/div[3]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[3]/U", group:1, v: "author" },
        { p:"//body/div[3]/B", group:1, v: "publisher" },
      ],
      ex: "H4 $title : ( U $author  | B $publisher ) +",
    },
    { name: "Orbag-7",
      // Another test case for CP-3412,
      // When multiple U's, the one without a $var should be last
      targets: [
        { p:"//body/div[3]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[3]/H4", group:1, v: "title" },
        { p:"//body/div[3]/U[2]", group:1, v: "author" },
        { p:"//body/div[3]/B", group:1, v: "publisher" },
      ],
      ex: "( H4 $title | U $author  | B $publisher | U ) +",
    },
    { name: "Orbag-8",
      // Test case for CP-3412, should not get confused about the second U
      targets: [
        { p:"//body/div[4]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[4]/TR/TD/U", group:1, v: "author" },
        { p:"//body/div[4]/TR/TD/B", group:1, v: "publisher" },
      ],
      ex: "( TD { U $author } | TD { B $publisher } | TD ) +",
    },
    { name: "Orbagprefix",
      // Also CP-3412, No BR before the or-bag
      targets: [
        { p:"//body/div[1]/A", v: "title" },
        { p:"//body/div[1]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[1]/U", group:1, v: "author" },
        { p:"//body/div[1]/B", group:1, v: "publisher" },
      ],
      ex: "A $title : ( U $author | B $publisher | br ) +",
    },
    { name: "Orbagsuffix",
      // Also CP-3412, No BR after the or-bag
      targets: [
        { p:"//body/div[1]", group:1, parent:0,
                   type:"|", cardinality:"+" },
        { p:"//body/div[1]/U", group:1, v: "author" },
        { p:"//body/div[1]/B", group:1, v: "publisher" },
        { p:"//body/div[1]/I", v: "description" },
      ],
      //ex: "( U $author | B $publisher | br ) +: br : I $description",
      ex: "( U $author | B $publisher | br ) +: I $description",
    },
    { name: "Trfix-1",
      // See that the TR gets repeatable
      targets: [
        { p:"//body/div[5]/I", v: "title"  },
        { p:"//body/div[5]/table/TR[1]/TD/U", v: "author" , cardinality:"+" },
      ],
      ex: "I $title : table { TR + { TD { U + $author } } }",
    },
    { name: "Trfix-2",
      // Not repeating here
      targets: [
        { p:"//body/div[5]/I", v: "title"  },
        { p:"//body/div[5]/table/TR[1]/TD/U", v: "author" , cardinality:"?" },
      ],
      ex: "I $title : table { TR { TD { U ? $author } } }",
    },
    { name: "Trfix-3",
      // Only the second TR should repeat since pattern continues with TRs
      targets: [
        { p:"//body/div[5]/I", v: "title"  },
        { p:"//body/div[5]/table/TR[1]/TD/U", v: "author" , cardinality:"+" },
        { p:"//body/div[5]/table/TR[3]/TD/B", v: "publisher" , cardinality:"+" },
      ],
      ex: "I $title : table { TR + { TD { U + $author } } : TR + { TD { B + $publisher } } }",
    },
    
  ]; // test array


  // A little helper to tidy the dumped xpatterns
  function cleanstr(s) {
    s = s.replace( /^\s*/,"" );  // leading
    s = s.replace( /\s*$/,"" );  // trailing
    s = s.replace( /\s+/g, " ");  // long spaces
    return s;
  };
  
  function addtest( maker, doc, test ) {
    var node = xmlHelper.getElementByXpath(doc, test.p);
    var varname = test.v || "";
    var card = test.cardinality || "";
    var attrs = "";  // TODO
    var groupno = test.group || 0; 
    if ( typeof(test.parent) != "undefined" ) { // group target
      var parent = test.parent;
      var grptype = test.type || "("; // default to normal group
      maker.addGroup(node,varname,card,attrs, grptype, parent, groupno);
    } else { // must be a group
      maker.addNode(node, test.v, card, attrs, groupno );
    }
  }; // addtest

  logger.debug("Unit test for XPatternMaker starting");
  var doc = xmlHelper.docFromString(testpage);
  var knownfailures = 0;
  logger.debug("Test page: " + xmlHelper.serializexml(doc) );
  
  for ( var testno = 0; testno < tests.length; testno++) {
    var t = tests[testno];
    logger.debug("Test " + t.name + " starting");
    if ( t.failure ) {
      
    }
    var maker = new XpatternMaker( doc );
    for ( var targno = 0; targno < t.targets.length; targno++ ) {
      logger.debug("Test " + testno + ":" + t.name + " target " + targno +
        ":" + t.targets[targno].p );
      addtest( maker, doc, t.targets[targno] );
    }
    var pat = maker.getPattern();
    var res = "";
    if (pat)
      res = cleanstr(pat.dumpString(-1));
    var ex = cleanstr(t.ex);
    if ( !t.fail ) { 
      if ( res == ex )
        logger.info("Test " + testno + ":" + t.name + " OK: " + res );
      else {
        maker.dumpTargets();
        logger.info("Test " + testno + ":" + t.name + " FAILED" );
        logger.info(" Expected '" + t.ex +"'");
        logger.info(" But got  '" + res + "'");
        return false;
      }
    } else {  // Test known to fail
      if ( res != ex ) {
        logger.info("Test " + testno + ":" + t.name + " is a KNOWN failure: "
          + t.fail );
        knownfailures++;
      } else {
        logger.info("Test " + testno + ":" + t.name +
           " OK, although it is a known failure!" );
        logger.info( t.fail );
        logger.info( "Got pattern: " + res );
        return false;
      }
    }
  }
  var failmsg = "";  
  if ( knownfailures )
    failmsg = "(But with " + knownfailures + " known failures!)";  
  logger.debug("Unit test for XPatternMaker OK. " + failmsg);
  return true; // all ok
};
