var EXPORTED_SYMBOLS = ["Xpattern", "XpatternHit"];

// Xpattern - a node in a pattern language, to be matched against a DOM tree
// The main method is match(), which returns an array of XpatternHits, possibly
// empty.

Components.utils.import('resource://indexdata/util/xmlHelper.js');

// Warning - this produces a *lot* of debug output, and slows things down
// considerably. Be sure to leave it off in production systems!
var xpatternDebug= false;
//xpatternDebug= true;
// TODO - Look at some environment variable or such like to get this!
// (not so easy, no good access to the environment. A file in current dir?)

// Constructor for Xpattern itself
// The Xpattern is a node in the pattern tree.
// Many fields are commented out, as they are not used
// very often. Maybe only in the builder, or in special cases.
// Saves some memory...
function Xpattern() {
  this.nodeType = "any"; // often a tag like "TD". Or group, or special...
  this.repeatable = false;
  this.optional = false;
  this.greedy = true; // default to true, for historical reasons
  this.attributes = null; // will contain a new Xpattern object, attr list
  this.firstChild = null; // Will contain a new Xpattern object, first child
  this.nextSibling = null; // the next Xpattern object on the same level
  // this.nextAlternative = null; // alternative branches in an OR-node
                                  // commented out here, usually not needed
                                  // Assigned when making OR-nodes.
  // this.negation = null; // The 'but not' pattern, usually not needed
  this.variable = ""; // where to store the result
  this.attrValue = ""; // the attribute value to match against
  this.relationValue = "";  // how to match the attribute, regex or equal
  // this.nodesToo = false; // can be set by the builder to request nodes
                            // commented out here, as not needed in engine
  // this.stringoffset = -1; // offset to the string that defined the pattern
                             // only used in builder to highlight
  // this.modifiers={};
  // xpatternMaker may alse set .group while working on the stuff. Ignored here.
} // Xpattern constructor

// All known modifiers
const xpatternModifiers = [ "html", "xml", "textcontent", "whitespace" ];
// The following do affect $variables
const variableModifiers = [ "html", "xml", "textcontent", "whitespace" ];

// Constructor for XpatternHit
// A XpatternHit contains all the $variable fields collected during the 
// recursion into the Dom tree.
// In the end the hits array will be something like
// this.hits[0]["name"]="author"
// this.hits[0]["value"]=""
// this.hits[1]["name"]="author"
// this.hits[1]["value"]="Smith"
// this.hits[2]["name"]="author"
// this.hits[2]["value"]=""
// this.hits[3]["name"]="author"
// this.hits[3]["value"]="Wesson"
// this.hits[4]["name"]="title"
// this.hits[4]["value"]=""
// this.hits[5]["name"]="title"
// this.hits[5]["value"]="Making Peace"
// this.hits[6]["name"]="title"
// this.hits[6]["value"]="in the West"
// As it is now, each #text tag goes into its own line in the hits array
// We can combine them on some higher level
// A hit with a name but no content indicates that here we should start a new
// field (as in the authors above, and in contrast to the two parts of the 
// title).
// Groups are started by a hit with a name that ends in a slash, as in "item/".
// Groups end with a hit with a plain "/" as name.
// For the builder, we can mark some interesting nodes with an empty name and
// value.
// Variable names include the modifiers too, as in $author/html
// These are checked where they may make a difference, and stripped much later
// in the process.
function XpatternHit() {
  this.hits=[];
  this.lastDomNode=null; // The last node matching the end of the (sub)pattern
} // XpatternHit constructor

XpatternHit.prototype.appendString = function (key, txt, domNode, patNode) {
  if (txt == null)
    return; // should not really happen, but I've seen it
  if ( key.indexOf("/whitespace") == -1 ) {
    txt = txt.replace(/[\s]+/g," ");
    txt = txt.replace(/^[\s]+/g,"");
    //dump("Appending to $" + key + ": '" + txt + "'\n");
    }
  if ( txt == "" ) {
    return; // no need to save pure whitespace. Would confuse breaks etc.
  }
  var v = [];
  v["name"] = key;
  v["value"] = txt;
  if (patNode.nodesToo) {
    v["dom"] = domNode;
    v["pattern"] = patNode;
  }
  this.hits.push(v);
  //dump("appendValue: hits array now " + this.hits.length +" long\n");
};

// Push an empty hit in the hits array to mark the beginning of a new variable
// instance. These will be filtered out later.
// breaks with no name are hints to the builder about interesting nodes,
// so we only pass them on if the builder has asked for nodes to be remembered.
XpatternHit.prototype.break = function (name, domNode, patNode) {
  if ( name == "none" ) {
    return;
  }
  if ( name == "" & ! patNode.nodesToo ) {
    return;
  }
  var v = [];
  v["name"] = name;
  v["value"] = "";
  if ( patNode && patNode.nodesToo ) {
    v["dom"] = domNode;
    v["pattern"] = patNode;
  }
  this.hits.push(v);
};

XpatternHit.prototype.appendValue = function (key, domNode, patNode) {
  if ( key != "" && key != "none" &&
        domNode != null && domNode.nodeValue != null) {
    var txt = domNode.nodeValue ;
    this.appendString(key,txt, domNode, patNode);
  }
}; // appendValue

// Append the hist from otherhits into this
XpatternHit.prototype.appendHits = function (otherhits, copylast) {
  if (otherhits == null) {
    return;
  }
  this.hits = this.hits.concat(otherhits.hits);
  if (copylast && otherhits.lastDomNode ) {
    this.lastDomNode = otherhits.lastDomNode;
  }
  return;
}// appendHits


// Debug dump of the hits array
XpatternHit.prototype.dump = function ( ) {
  var last="";
  if (this.lastDomNode != null) {
    last="last node:" + dompath(this.lastDomNode) ;
  }
  if (this.hits.length==0) {
    dump("  no hits to dump. " + last + "\n");
  } else {
    dump("  dumping " + this.hits.length + " hits. " + last + "\n");
    for ( i in this.hits ) {
      if ( this.hits[i]["dom"] && this.hits[i]["pattern"] ) {
        dump("    hit [" + i + "] "+
            " '" + this.hits[i]["name"] + "' :" +
            " '" + this.hits[i]["value"] + "'" +
            " dom:" + dompath(this.hits[i]["dom"]) +
            " pat:" + this.hits[i]["pattern"].dumpString(-2) +
            "\n");
      } else {
        dump("    hit [" + i + "] "+
            " '" + this.hits[i]["name"] + "' :" +
            " '" + this.hits[i]["value"] + "'\n");
      }
    }
  }
}; // dump


// Get a displayable name for the dom node
// depth.sibling:tag. Used in debug traces only.
function dompath(domNode) {
  if (domNode == null) {
    return "null";
  }
  var d=0;
  var pn=domNode;
  while ( pn != null ) {
    pn = pn.parentNode;
    d++;
  }
  var s=0;
  var sn=domNode;
  while ( sn != null ) {
    sn = sn.previousSibling;
    s++;
  }
  var nn = domNode.localName || domNode.nodeName;
  return "" + d + "." + s + "." + nn;
}

// Debug log
Xpattern.prototype.trace = function(msg, domNode, hits) {
  if (xpatternDebug) {
    var dp = dompath(domNode);
    var dt = cleanedNodeValue(domNode);
    dump("Xpattern: " + msg + " (" + this.dumpString(-2) + ") "+
          "(" + dp + ":" + dt + ") \n");
    if (hits != null) {
      hits.dump();
    }
  }
}; // trace 


// Simple setters and getters for various fields
Xpattern.prototype.setType = function(t) {
  this.nodeType=t;
};

Xpattern.prototype.getType = function() {
  return this.nodeType;
};

Xpattern.prototype.setVariable = function(v) {
  this.variable=v;
  this.recalulateVariable();
};

Xpattern.prototype.getVariable = function() {
  return this.variable;
};

// Get the variable itself, without any appended modifiers
Xpattern.prototype.getPlainVariable = function() {
  return this.variable.replace( /\/.+$/, "" );
};

// Combine the modifiers and plain variable
Xpattern.prototype.recalulateVariable = function() {
  this.variable = this.getPlainVariable();
  if ( this.modifiers ) {
    for (var m in variableModifiers ) {
      var mod = variableModifiers[m];
      if (this.modifiers[mod] ) {
        this.variable += "/" + mod ;
      }
    }
  }
};

// Get an array of all known modifiers (for builder menu)
Xpattern.getAllModifiers = function() {
  return xpatternModifiers;  
}

// Check if the variable has a modifier appended to it
Xpattern.prototype.varhasmodifier = function(variable,mod) {
  return (variable.indexOf("/"+mod) != -1 );
};

// Sets a modifier. Returns true if all went well, false in case of
// a bad modifier.
Xpattern.prototype.setModifier = function(mod) {
  if (xpatternModifiers.indexOf(mod) != -1 ) {
    if ( ! this.modifiers )
      this.modifiers = {};
    this.modifiers[mod] = true;
    if (variableModifiers.indexOf(mod) != -1 ) {
      this.recalulateVariable();
    }
    return true;
  }
  return false; // oops, didn't recognize it
};

// Get an array of the modifiers (usually only one, but can be more)
Xpattern.prototype.getModifiers = function() {
  if ( !this.modifiers ) {
    return [];
  }
  var mods = [];
  for ( var m in this.modifiers ) {
    mods.push(m);
  }
  return mods;
};

Xpattern.prototype.setCardinality = function(c) {
  if ( c == "+" || c == "*" || c== "+?" || c == "*?" ) {
    this.repeatable = true;
  }
  if ( c == "?" || c == "*" || c == "*?" ) {
    this.optional = true;
  }
  if ( c == "+?" || c == "*?" || c == "??" ) { // Is ?? valid ###
    this.greedy = false;
  }
};

Xpattern.prototype.getCardinality = function() {
  var c = "";
  if ( this.repeatable && this.optional )
    c = "*";
  else if ( this.repeatable )
    c = "+";
  else if ( this.optional )
    c = "?";
  if ( c != "" && !this.greedy )
    c = c + "?";
  return c;
};

Xpattern.prototype.isOptional = function() {
  return this.optional;
}; 

Xpattern.prototype.isRepeatable = function() {
  return this.repeatable;
};

Xpattern.prototype.getStringoffset = function() {
    return this.stringoffset;
};
Xpattern.prototype.setStringoffset = function( so ) {
    this.stringoffset = so;
};


// Add a node as the next nextSibling of this node
// That is, insert in the sequence after this one
Xpattern.prototype.addAfter = function ( sib ) {
  sib.nextSibling = this.nextSibling;
  this.nextSibling=sib;
};

Xpattern.prototype.appendChild = function ( ch ) {
  if (this.firstChild==null) {
    this.firstChild=ch;
    return;
  }
  var c = this.firstChild;
  while (c.nextSibling != null ) {
    c = c.nextSibling;
  }
  c.nextSibling=ch;
}; // appendChild

Xpattern.prototype.appendAlternative = function ( alt ) {
  var c = this;
  while (c.nextAlternative != null ) {
    c = c.nextAlternative;
  }
  c.nextAlternative = alt;
}; // appendChild

Xpattern.prototype.setNegation = function ( neg ) {
    if ( this.negation ) {
        // TODO - throw an exception, we should not have
        // multiple negations in one node.
        // Should never happen, anyway. The parser won't do it
    }
    this.negation = neg;
}; // setNegation

Xpattern.prototype.addAttribute = function ( a ) {
  if (this.attributes == null) {
    this.attributes=a;
  } else {
    var anode = this.attributes;
    while (anode.nextSibling != null) {
      anode = anode.nextSibling;
    }
    anode.nextSibling = a;
  }
}; // addAttribute

Xpattern.prototype.setAttrValue = function(v) {
  this.attrValue = v;
};

Xpattern.prototype.setRelationValue = function(v) {
  this.relationValue = v;
};


// Helper to recurse through an XPattern, and call func
// for each node along the way
Xpattern.prototype.foreachnode = function( func ) {
    func( this );
    if (this.firstChild != null) {
        this.firstChild.foreachnode(func);
    }
    if (this.nextSibling != null) {
        this.nextSibling.foreachnode(func);
    }
    if (this.nextAlternative != null) {
        this.nextAlternative.foreachnode(func);
    }
    if (this.negation != null) {
        this.negation.foreachnode(func);
    }
    if (this.attributes != null) {
        this.attributes.foreachnode(func);
    }
};


// Set the internal flag nodesToo to true on all pattern nodes
// This forces the hits to return the corresponging pattern
// nodes and DOM nodes as well as the text. Used in the editor
// for highlighting, not in the engine
Xpattern.prototype.askForNodes = function() {
    this.foreachnode( function(n) { n.nodesToo = true; } );
};

// Routines to set, get, and clear a "seen" flag in the pattern
// Used in parse_xpattern to detect repeated elements
Xpattern.prototype.setSeen = function(v) {
  this.seen = v;
}

Xpattern.prototype.getSeen = function() {
  return this.seen;
}

// Produce a displayable string of the xPattern
// indent -1 means all in one line
// indent -2 means one-liner, only this one node
// indent -3 is internal use only, comma-separated one-liner (@attrlist)
// non-negative values indicate indentation level
// (toString is a reserved word, seem not to be able to override that #&!)
Xpattern.prototype.dumpString = function ( indent ) {
  var nextindent=indent;
  var indentation="";
  var newline="";
  var openbrace = " { ";
  var closebrace = "} ";
  var afterbrace = "";
  var s = "";
  var relation = "";
  
  if (indent>=0) {
    nextindent++;
    newline="\n";
    for(i=0;i<indent;i++) {
      indentation = indentation + "  ";
    }
  }

  var ntype = this.nodeType;
  if (ntype == "(" ) {
    openbrace = "( ";
    closebrace = ")";
    ntype = "";
  }

  var cardinality = this.getCardinality();
  if ( cardinality != "" ) {
    cardinality = " " + cardinality;
  }

  var assign="";
  if (this.attrValue) {
      relation = this.relationValue ? this.relationValue : "=";
      assign = " " + relation + " \"" + this.attrValue + "\"";
  }

  var variable = "";
  if (this.variable != "" ) {
    variable = " $" + this.getPlainVariable() ;
  }
  var modifier = "";
  for (var m in xpatternModifiers ) {
    var mod = xpatternModifiers[m];
    if (this.modifiers && this.modifiers[mod] )
      modifier += " -" + mod ;
  }

  var attr="";
  if ( this.attributes != null ) {
    attr = " [ ";
    attr += this.attributes.dumpString( -3 );
      // display attributes always on one line
    attr += " ]";
  }
  
  if ( this.nodeType == "(" ) {
    afterbrace = cardinality + variable ;
  } else if ( this.nodeType == '|' ) {
    openbrace = ""; // hide the fact that this is a group
    closebrace = "";
    indentation = "";
    newline = "";
    if ( indent == -2 )
      s="|"; // when tracing only one node, show this is a hidden Or-node
  } else if ( this.nodeType == "/" ) {
    s = indentation + "/" + this.attrValue + "/"; 
  } else { // regular node
    s = indentation + ntype + cardinality + assign + variable + modifier + attr+ " " ;
  }
  
  if (this.firstChild != null && indent != -2 ) {
    if ( this.nodeType == "(" )
        s += indentation;
    s += openbrace  + newline;
    s += this.firstChild.dumpString(nextindent);
    s += indentation + closebrace + afterbrace;
  } else if (this.nodeType == "(" && indent == -2 ) {
    s += "(...)" + afterbrace;
  }
  if (this.negation != null ) {
    if ( indent >= 0 ) {
        s += "! " + newline ;
        s += this.negation.dumpString(indent+1).replace(/\n$/,"");
          // Need to replace the tailing newline away, a new one will
          // be added soon. This is ugly, and coult probably be done
          // nicer. Some day Real Soon Now...
    } else if ( indent == -1 ) {
      s += "! ";
      s += this.negation.dumpString(indent);
    }
  }
  if (this.nextSibling != null && indent != -2 ) {
    if ( indent == -3 )
        s += ", "; // attrlists are comma-separated
    else
        s += ": "; 
    s += newline;
    s += this.nextSibling.dumpString(indent);
  } else if (this.nextAlternative != null && indent != -2 ) {
    s += ' | ' + newline;
    s += this.nextAlternative.dumpString(indent );
  } else {
    s += newline;
  }
  return s;
}; // dumpString

// Simple debug helper to get a displayable value from a dom node
function cleanedNodeValue( domNode ) {
  if (domNode==null) {
    return "(null node)";
  }
  if (domNode.nodeValue==null) {
    return "";
  }
  return "'" + domNode.nodeValue.replace(/[ \n]+/g," ").substring(0,30) + "'";
}

/////////////////////////////////////////////////////////////////
// Actual matching starts here
// Returns an array of hits. Empty, if none found.
Xpattern.prototype.match = function ( domNode ) {
  var hitsArray = [];
  if (xpatternDebug) {
    dump("match: (-2):" + this.dumpString(-2) + "\n");
    dump("match: (-1):" + this.dumpString(-1) + "\n");
    dump("match: \n" + this.dumpString(1));
  }
  this.trace ("starting match", domNode, null);
  this.findMatch(domNode, hitsArray, false );
  this.trace("Match done. Returning " + hitsArray.length + " hits:",null,null);
  for ( var i in hitsArray ) {
    this.trace("Match result: hit " + i + ":", null, hitsArray[i] );
  }
  return hitsArray;
}// match


// Try to match the pattern against every node in the tree.
// Collect hits into the hitsArray
// siblingstoo tells if it should continue to the nextSibling of the domNode.
// Should be false on the initial call (where domNode is the whole hit area),
// and true on recursion to its childrens.
Xpattern.prototype.findMatch = function ( domNode, hitsArray, siblingstoo ) {
  if (domNode == null ) {
    return ;
  }
  while (domNode != null ) {
    this.trace("findMatch",domNode,null);
    var dnn = domNode.localName || domNode.nodeName;
      // Normally we are only interested in localnames (without ns prefixes)
      // but some nodes (#text, #comment, and who knows what else) do not
      // have localNames. 
    if ( (dnn != "#text" && dnn != "#comment" ) ||
        dnn.toUpperCase() == this.nodeType.toUpperCase() || 
        this.nodeType == "|" || this.nodeType == "(" ) {
      // texts are normally collected while matching real nodes, comments ignored.
      // but be more careful if the pattern node is one of those too!
      // Also, or and group patterns need to be checked
      var hit = this.matchHere( domNode, "", true );
        // we pretend we are in a group, so we don't collect the rest
        // of the document into a hit
      if ( hit != null ) {
        this.trace("Found a complete hit ", domNode, hit);
        hitsArray.push(hit);
        if (hit.lastDomNode != null) {
          this.trace("  hit ends at ", hit.lastDomNode, null);
          domNode = hit.lastDomNode; 
          // this is needed in case of a repeated pattern, for example
          // A+ matching against A : A : A, this would result in
          // 3 matches (A:A:A, A:A, and A).
        }
      } else {
        this.findMatch( domNode.firstChild, hitsArray, true );
      }
    }
    if (siblingstoo)
      domNode = domNode.nextSibling;
    else
      domNode = null; // reached the end of the original target node
  }
  return ;
}; // findMatch

// Matches the pattern node against the current domNode.
// returns a XpatternHit with collected hits, if matches (can be emtpy)
// or null if the match fails.
// ingroup determines how restmatches are collected
// Actually, this is mostly concerned about optional nodes, the
// matching itself happens in matchNonOptional() below
Xpattern.prototype.matchHere = function ( domNode, varname, ingroup ) {
  var thishits = this.matchNonOptional( domNode, varname, ingroup );
  if ( thishits ) {
    this.trace("matchHere returning hit ", domNode, thishits);
    return thishits;
  }
  // no hit, do we have an alternative
  if ( this.nextAlternative ) {
    this.trace("matchHere: mismatch, trying alternative", domNode, null);
    var althits = this.nextAlternative.matchHere( domNode, varname, ingroup);
    if ( althits ) {
      this.trace("matchHere: Found a good alternative, returning that",
                 domNode, althits);
      return althits;
    }
  }
  // no hit, check for optional and alternatives
  if ( this.optional ) {
    if ( this.nextSibling ) {
      this.trace("matchHere: Optional node, didn't match, proceeding",
                 domNode, null);
      return this.nextSibling.matchHere( domNode, varname, ingroup );
    }
    this.trace("matchHere: Optional node didn't match, nothing to follow, " +
        "returning empty hits (and collecting rests)", domNode, null );
    if ( !ingroup ) {
      // but not if inside a group, its hits must come from inside!
      var resthits =  this.collectRest(domNode, varname);
      this.trace("matchHere: After collecting rest ", domNode, resthits);
      return resthits;
      // Collect also this node itself, as it has failed to match, so
      // we don't have it in the hits.
    } else {
      this.trace("matchHere: not collecting hits, since inside a group",
          domNode, null);
      return new XpatternHit();
    }
  }
}; // matchHere


// Matches the pattern node against the current domNode.
// returns a XpatternHit with collected hits, if matches (can be emtpy)
// or null if the match fails.
// ingroup determines how restmatches are collected
Xpattern.prototype.matchNonOptional = function ( domNode, varname, ingroup ) {
  // #comment nodes don't matter at all, skip here
  // (without recursion)! - unless a comment pattern or a group
  while (domNode != null && domNode.nodeName.toUpperCase() == "#COMMENT" &&
         this.nodeType.toUpperCase() != "#COMMENT" &&
         this.nodeType != "(" && this.nodeType != "|" ) {
    this.trace("matchNonOpt skipping comment", domNode, null);
    domNode = domNode.nextSibling;
  }
  if (domNode == null) { // nothing to match against
    if (this.allOptional() ) {
      this.trace("matchNonOpt beginning, no node, optional pattern",
         domNode, null);
      return new XpatternHit();
    } else {
      return null; // mismatch
    }
  }
  this.trace("matchNonOpt starting (ingroup=" + ingroup + ")", domNode,null);

  // Check non-greedy optional
  if ( this.optional && ! this.greedy && this.nextSibling != null ) {
    this.trace("matchNonOpt: trying non-greedy optional next", domNode, null);
    var nexthits = this.nextSibling.matchHere( domNode, varname, ingroup);
    if (nexthits != null) {
      this.trace("matchNonOpt: non-greedy optional matched ", domNode, nexthits);
      return nexthits;
    }
    this.trace("matchNonOpt: non-greedy optional failed to match ", domNode, nexthits);
  }

  // match the node itself
  // first special cases, then text nodes if we want to be greedy, then
  // groups and or-groups, then text nodes again, finally regular nodes
  // The order is important! See bug 4508 for the greedy vs non-greedy
  // collecting of #text nodes (and unit tests marked 4508)

  var thishits;
  if (this.nodeType.toUpperCase() == "#TEXT" ) {
    thishits = this.matchTextNode(domNode, varname);
  } else if (this.nodeType.toUpperCase() == "#COMMENT" ) {
    thishits = this.matchCommentNode(domNode, varname);

  } else if (domNode.nodeName == "#text" && !ingroup
        && this.nodeType != "(" && this.nodeType != "|"  ) {
    // Collect the #text node in a greedy way, since we are not
    // doing a group
    //    DIV $foo {B $bar}    <div>some<b>more</b>text</div>
    // collects the "some" into $foo, and recurses to the B with "more"
    return this.collectTextNode( domNode, varname, ingroup, "greedy"  );
  } else if ( this.nodeType == '(' ) { // group
      thishits = this.matchGroup(domNode, varname);
      if (thishits != null && thishits.lastDomNode != null ) {
          this.trace("Skipping matched group from " + dompath(domNode) +
          " to " + dompath(thishits.lastDomNode), domNode, thishits );
       domNode = thishits.lastDomNode;  // skip the whole group
      }
  } else if (this.nodeType == '|' ) { // or-group
      thishits = this.matchOr(domNode, varname);
      if (thishits != null && thishits.lastDomNode != null ) {
          this.trace("Skipping matched OR group from " + dompath(domNode) +
          " to " + dompath(thishits.lastDomNode),
          domNode, thishits );
          domNode = thishits.lastDomNode;  // skip the whole group
      }

  } else if (domNode.nodeName == "#text" ) {
    // Second possible place for this  (??)
    // now we can safely collect the text
    // a dom #text node matches against any real pattern node
    return this.collectTextNode( domNode, varname, ingroup, "nongreedy");
    // TODO - Is this greedy/nongreedy trickery needed any more, after the
    // matchNonOptional stuff (CP-3451)? Seems not to harm.
  } else { // regular node
    thishits = this.matchNode(domNode, varname);
    // including matches from attributes and childnodes
  }

  this.trace("matchNonOpt after matchNode", domNode, thishits);

  // check negation
  if (thishits && this.negation ) {
      var neghits = this.negation.matchHere( domNode, varname, ingroup );
      if (neghits) {
          this.trace("matchNonOpt: Negation matched, nulling hits",
                     domNode, neghits);
          thishits = null;
      }
  }

  if (thishits == null) { // this node failed to match
    this.trace("matchNonOpt: matchNode did not match", domNode, thishits);
    return null; // declare a mismatch, optional stuff is handled higher up
                 // in matchHere()
  } else {  // This node matches
    thishits.lastDomNode = domNode; // Remember end marker
    this.trace("matchNonOpt: matchNode matched", domNode, thishits);
    if (domNode.nextSibling == null) { // dom tree ends here
        if (this.nextSibling != null) {
            // oops, pattern continues, but nothing to match
            if (this.nextSibling.allOptional()) {
              this.trace("matchNonOpt: Mismatch, but optional all the way. "+
                         "Call it a hit.", domNode,thishits);
                return thishits; // luckily it was all optional nodes
            }
            this.trace("matchNonOpt:  Mismatch, pattern continues, " +
                       "but nothing to match", domNode,null);
            return null; // must be a mismatch!
        } else { // both dom and pattern end. That matches.
            return thishits;
        }
    } else { // Dom tree continues
      if (this.repeatable && this.greedy) { // greedy repeat
        this.trace("matchNonOpt: checking (greedy) repeat",domNode,thishits);
        var rephits = this.matchNonOptional(domNode.nextSibling, varname, ingroup );
        if (rephits != null) {
          this.trace("matchNonOpt: Got repeated hits",domNode,rephits);
          thishits.appendHits(rephits,true);
          return thishits;
        }
      }
      if (this.nextSibling == null)
      {
          // end of pattern, now we have a real match
        thishits.lastDomNode = domNode; // remember how far we got
        this.trace("matchNonOpt: End of pattern chain. Setting last. " +
                    "Collecting rest into '$" + varname + "' " +
                    "ingroup= " + ingroup, domNode, thishits);
        var resthits; // collect the rest of the hits
        if ( !ingroup ) {
          // not inside a group, collect all the way to the end
          resthits = this.collectRest(domNode.nextSibling, varname);
          thishits.appendHits(resthits,false);
          // no need to set lastDomNode here, I suppose. Hence the 'false'.
        } // the group has collected its stuff already
        this.trace("matchNonOpt: End of pattern chain ", domNode, thishits );
        return thishits;
      } else { // check the rest of the pattern
        var nexthits = this.nextSibling.matchHere( domNode.nextSibling,
                        varname, ingroup );
        if (nexthits == null) {
          if (this.repeatable && !this.greedy ) {
            // try a non-greedy repeat only after regular childnode failed
            this.trace("matchNonOpt: nongreedyrepeat check", domNode, null );
            nexthits = this.matchNonOptional( domNode.nextSibling, varname, ingroup);
            //this.trace("matchNonOpt: nongreedyrepeat: ", domNode, nexthits );
            if ( nexthits ) {
              this.trace("matchNonOpt: nongreedyrepeat matched: ",
                         domNode, nexthits );
              //dump("matchNonOpt: DEBUG nongreedyrepeat match. " +
              // "thishits.last = '" + dompath(thishits.lastDomNode) +"' " +
              // "nexthits.last = '" + dompath(nexthits.lastDomNode)+ "'" );
              thishits.appendHits(nexthits,true);
              return thishits;
            } else {
              this.trace("matchNonOpt: nongreedyrepeat mismatch: ",
                         domNode, nexthits );
              return null;
            }
          }
          if (this.optional) {
               // we have already tried non-greedy optional earlier,
               // so if we are non-greedy, we know nextSibling won't match!
            this.trace("matchNonOpt: NextSibling mismatch, " +
                "but this was optional",
                domNode,null);
            return this.nextSibling.matchHere(domNode,
                    varname, ingroup );
            // FIXME - Do we ever get here, or does matchHere return an
            // empty list on optional nodes anyway (not null)?
            // If we do get here, what happens to thishits?
          }
          this.trace("matchNonOpt: nextSibling mismatch", domNode,null);
          return null; // didn't match anyway
        } else {
          this.trace("matchNonOpt: NextSibling match.", domNode,nexthits);
          //this.trace("matchNonOpt DEBUG: " +
          //    "thishits.last = '" + dompath(thishits.lastDomNode) + "' " +
          //    "nexthits.last = '" + dompath(nexthits.lastDomNode)+ "'",
          //    domNode, thishits);
          thishits.appendHits(nexthits,true);
          // Even if it was non-greedy repeat, it is fine to return here.
          // That's what non-greedy means!
          return thishits;
        }
      }
    } // dom has nextSibling
  } // this node matches
  // We should never reach here, all branches are supposed to return...
  this.trace("matchNonOpt: Reached the end?!!!",domNode,thishits);
  return thishits;
}; // matchHere


// Matches a (...) group.
// Returns the end of the match in hits, so we can fast-forward past
// all that has been matched.
// Produces breaks for group begin and end
// Since the slash is not allowed in variable names, we can use
// that for signalling start and stop.
Xpattern.prototype.matchGroup = function (domNode, varname) {
  this.trace("matchGroup: group node", domNode, null);
  var newvarname = varname;
  var thishits = new XpatternHit();
  if (this.variable != "") {
    newvarname = this.variable + "/"; // signal group begin
    this.trace("matchGroup: at break $" + newvarname, domNode, thishits);
    thishits.break(newvarname, domNode, this); 
  }
  var grphits = this.firstChild.matchHere(domNode,newvarname, true);
  if ( grphits == null ) {
    this.trace("matchGroup: mismatch", domNode, null);
    return null; // mismatch
  }
  thishits.appendHits(grphits,true);
  this.trace("matchGroup: after group ", domNode, thishits);
  if (this.variable != "") {
    thishits.break("/", domNode, this); // signal end of group
  }
  return thishits;
}; // matchGroup

// Matches an OR group
// Trivially match the first child, and let matchHere worry about alternatives.
Xpattern.prototype.matchOr = function (domNode, varname) {
  this.trace("matchOr ", domNode, null);
  var thishits = new XpatternHit();
  var childNode = this.firstChild;
  if ( this.firstChild ) 
    return this.firstChild.matchHere(domNode, varname, true);
  else
    return null; 
}; // matchOr


// Match a #text pattern
Xpattern.prototype.matchTextNode = function (domNode, varname){
  if ( this.nodeType.toUpperCase() != "#TEXT" ||
       domNode.nodeName != "#text" ) {
    this.trace("matchTextNode: mismatch ", domNode, null);
    return null;
  }
  var newvarname = varname;
  var thishits = new XpatternHit;
  if (this.variable != "") {
    newvarname = this.variable;
    this.trace("matchTextNode: at break $" + newvarname, domNode, thishits);
    thishits.break(newvarname,domNode,this); // here starts a new var
  }
  thishits.appendValue(newvarname, domNode,this);
  var attrhits = this.matchAttributes(domNode);
  if ( attrhits == null ) {
      this.trace("matchTextNode: attribute mismatch ", domNode, attrhits);
      return null;
  }
  thishits.appendHits(attrhits,false);
  
  this.trace("matchTextNode: at end " + newvarname, domNode, thishits);
  return thishits;
}; // matchTextNode

Xpattern.prototype.matchCommentNode = function (domNode, varname){
  if ( this.nodeType.toUpperCase() != "#COMMENT" ||
       domNode.nodeName != "#comment" ) {
    this.trace("matchCommentNode: mismatch ", domNode, null);
    return null;
  }
  var newvarname = varname;
  var thishits = new XpatternHit;
  if (this.variable != "") {
    newvarname = this.variable;
    this.trace("matchCommentNode: at break $" + newvarname, domNode, thishits);
    thishits.break(newvarname,domNode,this); // here starts a new var
  }
  thishits.appendValue(newvarname, domNode,this);
  var attrhits = this.matchAttributes(domNode);
  if ( attrhits == null ) {
      this.trace("matchCommentNode: attribute mismatch ", domNode, attrhits);
      return null;
  }
  thishits.appendHits(attrhits,false);
  this.trace("matchCommentNode: at end " + newvarname, domNode, thishits);
  return thishits;
}; // matchCommentNode


// Checks if one node matches against one domNode. That is
//   - node type matches
//   - attributes match
//   - child nodes match
// returns the collected hits from the whole node, or null if mismatch
// Special cases:
//   - ANY matches any node
//   - #text matches any, but collects hits only until next real node
//   - -html modifier makes this collect the innerHTML of the node itself
Xpattern.prototype.matchNode = function ( domNode, varname ) {
  var dnn = domNode.localName || domNode.nodeName;
  if ( this.nodeType.toUpperCase() != dnn.toUpperCase() &&
      this.nodeType.toUpperCase() != "ANY" ) {
    return null;
  }
  var thishits = this.matchAttributes( domNode );
  if (thishits == null ) {
    return null;
  }
  this.trace("matchNode: After attributes", domNode, thishits);

  var newvarname = varname;
  if (this.variable != "") {
    newvarname = this.variable;
    this.trace("matchNode: at break $" + newvarname, domNode, thishits);
    thishits.break(newvarname,domNode,this); // here starts a new var
  } else { // no var, might be interesting for the builder
    if ( this.attributes &&  // we had some attributes (matched, since we are here)
         thishits.hits.length == 0 && // no hits collected already
         this.nodesToo ) { // and we are in the builder
      thishits.break("",domNode,this); // Mark the node as interesting (no name)
      this.trace("matchnode: Interesting non-variable node", domNode, thishits);
    }
  }
  // Modifiers for alternative content
  if ( this.varhasmodifier(newvarname,"html") ) {
    var content = domNode.innerHTML;
    if ( typeof(content) == "undefined" ) // Happens in unit tests, where we have
      content = xmlHelper.serializexml(domNode); // a much simpler DOM to work with.
    thishits.appendString(newvarname, content, domNode, this);
    this.trace("matchNode: found a -html modifier: $" + newvarname + ": " + content, domNode, thishits);
    newvarname = varname; // Continue with the old varname, often nothing
      // not to collect the inner nodes twice.
  } else if ( this.varhasmodifier(newvarname,"xml") ) {
    var content = xmlHelper.serializexml(domNode);
    if ( typeof(content) == "undefined" )
      content ="" ;  // can that ever happen?
    thishits.appendString(newvarname, content, domNode, this);
    this.trace("matchNode: found a -xml modifier: $" + newvarname + ": " + content, domNode, thishits);
    newvarname = varname; // Continue with the old varname, often nothing
      // not to collect the inner nodes twice.
  } else if ( this.varhasmodifier(newvarname,"textcontent") ) {
    var content = domNode.textContent
    if ( typeof(content) == "undefined" )
      content ="" ;  // can that ever happen?
    thishits.appendString(newvarname, content, domNode, this);
    this.trace("matchNode: found a -textcontent modifier: $" + newvarname + ": " + content, domNode, thishits);
    newvarname = varname; // Continue with the old varname, often nothing
      // not to collect the inner nodes twice.
  }
  
  if (this.firstChild != null) {
      var childhits = this.firstChild.matchHere( domNode.firstChild,
                              newvarname, false);
      // yes, match even if domNode.firstChild is null, may be allOptional
      if (childhits != null ) {
            // we need to check the definite match, because the
            // firstChild could be just text and comment nodes.
        this.trace("firstChild matched all right.",domNode,childhits);
        thishits.appendHits(childhits,false);
      } else {
        this.trace("firstChild didn't match, quitting",domNode,null);
        return null;
      }
  } else {
    // no firstChild in the pattern, collect hits anyway
    var resthits = this.collectRest(domNode.firstChild, newvarname);
    thishits.appendHits(resthits,false);
    this.trace("End of pattern chain, collected child hits",domNode,resthits);
  }
  return thishits;
};


// Match attributes. There are different cases
//  1 Attribute and $variable: Get the value into the $variable. If no attr, fail
//  2 Attribute, $variable, and optional: Get value if there. Succeed always
//  3 Attribute and value: Require the value to match
//  4 Attribute, value, and optional: If attribute exists, its value must match
//  5 Regexp, check against the text content of the node
// Attributes can not be repeatable.
Xpattern.prototype.matchAttributes = function ( domNode ) {
  var thishits = new XpatternHit();
  var a = this.attributes;
  while ( a != null) {
    if ( a.nodeType == "/" )  { // /regexp/
      if ( ! a.regexp ) {
        a.trace("parsed regexp '" + a.attrValue + "'" );
        a.regexp = new RegExp(a.attrValue);
        // cache the parsed regexp in the attr node
      }
      var m = a.regexp.exec(domNode.textContent);
      if (m == null) {
        a.trace("regexp mismatch", domNode, null);
        return null;
      }
      a.trace("regexp match", domNode, null);
    } else { // must be @attr - TODO: check that it really is!
      var av = domNode.getAttribute( a.nodeType.substr(1) ); // skip the @
      
      // string compare "=" or regex "~"
      var relation = a.relationValue ? a.relationValue : "=";
      if (relation == "~") {
        if ( ! a.regexp ) {
          a.trace("parsed regexp '" + a.attrValue + "'" );
          a.regexp = new RegExp(a.attrValue);
        // cache the parsed regexp in the attr node
        }
      }
      
      if ( a.attrValue != "" ) {
        if ( a.optional ) { // 4
          if ( av != "" && av != a.attrValue) {
            a.trace("matchAttributes (opt) mismatch",domNode,null);
            return null;
          }
        } else { // 3
          // regex attr match: [ @attr ~ "fo+" ]
          if (relation == '~') {
            var m = a.regexp.exec(av);
            if (m == null) {
              a.trace("regexp mismatch " + av, domNode, null);
              return null;
            }
          } else if ( av != a.attrValue) { // string equal
            a.trace("matchAttributes mismatch '" + av + "'", domNode,null);
            return null;
          }
        }
      } // attrValue
      if ( a.variable ) {
        if ( av != "" ) { // 1
          //this.trace("matchAttributes: at break " + newvarname,
          // domNode, thishits);
          thishits.break(a.variable, domNode, this);
          thishits.appendString( a.variable, av, domNode, this );
          a.trace("matchAttributes saving '"+av+"'",domNode,thishits);
        } else { // 2
          if (!a.optional) {
            a.trace("matchAttributes. no such attribute. mismatch",
                    domNode,null);
            return null;
          }
        }
      } // variable
    } // @attr
    a = a.nextSibling;
  }
  return thishits;
}; // matchAttributes

// Collect a #text DOM node into variables, and recurse to the next
Xpattern.prototype.collectTextNode = function ( domNode, varname, ingroup, greedy ) {
    var thishits = new XpatternHit;
    thishits.appendValue(varname, domNode,this);
    this.trace("collectTextNode: collected ("+greedy+") text into $" + varname,
            domNode, thishits);
    
    thishits.lastDomNode = domNode; // Remember end marker
      // Still not quite sure about this, but fixes a double-tail problem
      // Heikki 11-Oct-2012
      
    var nexthits = this.matchHere( domNode.nextSibling, varname, ingroup);
    this.trace("collectTextNode: after (greedy) text recursion", domNode, nexthits);
    if (nexthits == null) {
        return null;
    }
    thishits.appendHits(nexthits,true);
    return thishits;
};



// Collect the rest of the nodes into the hit
Xpattern.prototype.collectRest = function ( domNode, varname ) {
  if (domNode == null) {
    return null;
  }
  if (varname == "" ) {  // no need to follow if we don't want the data
    this.trace("collectRest: not bothering, no $variable",domNode,null);
    return new XpatternHit(); // return success, albeit empty
  }
  this.trace("collectRest into $"+varname, domNode, null);
  var thishits = new XpatternHit();
  if ( domNode.nodeName == "#text" || domNode.nodeName == "#cdata-section") {
    thishits.appendValue(varname, domNode, this);
  }
  var childhits = this.collectRest( domNode.firstChild, varname );
  thishits.appendHits(childhits,false);
  var resthits = this.collectRest( domNode.nextSibling, varname );
  thishits.appendHits(resthits,false);
  return thishits;
}; // collectrest

// Check if the node, and its siblings all the way to the end of
// the chain are optional. If meeting a group, check if the contents
// of the group are allOPtional, so that
//   ( B? : A? ) counts as optional. Useful in cases like
//   DIV { ( B? : A? ) }
// An OR-group is optional if any its members are! In practice, only the
// last one makes sense.
Xpattern.prototype.allOptional = function ( ) {
    this.trace("allOptional check " );
    var optional = true;
    var xp = this;
    while (xp && optional) {
        if ( ! xp.optional &&
            (xp.nodeType == "(" || xp.nodeType == "|" ) &&
            xp.firstChild ) {   // should always be there for a group node
            //xp.trace("allOptional: found a group ... ", null, null );
            if ( !xp.firstChild.allOptional() ) {
                xp.trace("allOptional: found a group, and it was not alloptional " );
                optional = false;
            } else {
                xp.trace("allOptional: found a group, but it was alloptional " );
            }
        } else { // regular node
            if ( ! xp.optional ) {
                xp.trace("allOptional: not optional ");
                optional = false;
            }
        }
        if ( ! optional &&
                xp.nextAlternative &&
                xp.nextAlternative.allOptional() ) {
                xp.trace("allOptional: but found an optional alternative " );
            optional = true;
        }
        xp = xp.nextSibling;
    }
    this.trace("allOptional done: " + optional);
    return optional;
};


///////////////////////
// Unit tests
XpatternHit.prototype.unitTest = function ( ) {
  dump("unitTest not (yet?) implemented for XpatternHit \n");
  return true;
}
Xpattern.prototype.unitTest = function ( ) {
  dump("The unit tests for xpattern are in a separate file, " +
    "util/xpatternTester.js\n");
  return true;
}

