var EXPORTED_SYMBOLS = ["XpatternMaker"];

// Produce a xpattern that matches a given set of Dom nodes
//
// This is kind of related to the job of matching a pattern,
// except that the "pattern" we match is just a selection of 
// nodes, and the result is a path through the DOM tree that
// covers all the nodes, expressed as an Xpattern node tree

// TODO: Better handling of repeating nodes!
// for example, if we have marked a B node as a target, and told
// that it can repeat, and the dom tree has a sequence of B's
// followed by a A, try not to create a pattern like
//  B + : B : B : B : A
// Instead, figure that those B's are repeats and should be covered
// by the B+, and come up with
//  B+ : A
// Even this is not always right, but it is a better guess
// Most important for repeating groups!

// TODO: Regrouping patterns like
//  ... : TABLE { TR { ( TD : TD : TD ) + } } : ...
// into
//  ... : TABLE { ( TR { TD : TD : TD } ) + } : ...
// if that produces better matches on the page
// But not (necessarily?) into
// ... ( TABLE {  TR { TD : TD : TD } } ) + : ...
// as the most likely case is that the table row repeats.
// Do not assume what is most likely, try them all.
// Perhaps this should be in a separate module?

Components.utils.import('resource://indexdata/util/xpattern.js');
Components.utils.import('resource://indexdata/util/xpatternText.js');
Components.utils.import('resource://indexdata/util/xmlHelper.js');
Components.utils.import('resource://indexdata/util/logging.js');
var logger = logging.getLogger();


// constructor. Takes a Dom node that indicates the area to start from
// Often /html/body.  
function XpatternMaker( startnode ) {
    this.startnode = startnode;
    this.targets = [];
    this.groups = [];
    this.needretry = false;
}

// Add a node to be included in the pattern
XpatternMaker.prototype.addNode =
    function (node, variable, cardinality, attrs, group ) {
      targ = { "node": node,
              "variable": variable,
              "cardinality": cardinality,
              "attr" : attrs,
              "group" : group };
      targ.seen = false;
      this.targets.push(targ);
};

// Add a group definition
XpatternMaker.prototype.addGroup =
    function (node, variable, cardinality, attrs, group, grouptype, parent ) {
      grp = { "node": node,
              "variable": variable,
              "cardinality": cardinality,
              "attr" : attrs,
              "group" : group,
              "grouptype": grouptype,
              "parent" : parent };
      this.groups[group] = grp;
};

XpatternMaker.prototype._clearseen = function () {
    for ( targ in this.targets ) {
        this.targets[targ].seen = false;
    }
};

// Check that all targets have been seen
// if ingroup is set, limits to that group only
XpatternMaker.prototype._allseen = function (ingroup) {
  for ( targ in this.targets ) {
    var tg = this.targets[targ];
    if (ingroup) {
      if ( tg.group == ingroup && ! tg.seen )
        return false;
    } else {
      if ( ! tg.seen )
        return false;
    }
  }
  return true;
};

// Return the parent group (if nested)
// returns 0 for non-nested groups, or if not in a group at all
XpatternMaker.prototype._parentgroup = function (g) {
  if ( g != 0 && this.groups[g] ) {
    return this.groups[g].parent;
  } else {
    return 0;
  }
};


// dump all targets and groups for debugging
XpatternMaker.prototype.dumpTargets = function () {
    for ( targ in this.targets ) {
      var tg = this.targets[targ];
      var nt = "(null)";
      var nam = "";
      if (tg.node) {
        nam = tg.node.localName || tg.node.nodeName;
      }
      dump("xpmk_dump: target " + targ + ":" + nam + " " +
          "$" + tg.variable + " " + tg.cardinality +
          " (" + tg.group + ")\n" );
    }
    for ( group in this.groups ) {
      var gp = this.groups[group];
      dump("xpmk_dump: group " + group + ":" + gp.parent + 
          " $" + gp.variable + " " + gp.cardinality + "\n");
    }
}


// Generate the pattern

XpatternMaker.prototype.getPattern = function () {
    this._clearseen;
    //dump("xpmk: Starting to make a pattern\n");
    //this.dumpTargets();
    var res = this._findmatch(this.startnode);
    //dump("xpmk: Done\n");
    return res;
};

// Helper to set the variable, cardinality, and attribute(s)
// of an xpattern node
XpatternMaker.prototype._setxpfields = function (xp, v,c,a) {
    if (v && v != "none" ) {
      xp.setVariable(v);
    }
    if (c) {
      xp.setCardinality(c);
    }
    if (a) {
      var parser = new XpatternTextParser(a);
      parser.parseAttr(xp);
    }
};

// Make a pattern node out of one dom node (and a target?)
XpatternMaker.prototype._nodepattern = function (nType, tg) {
    var xp = new Xpattern();
    xp.setType(nType);
    if (tg) {
      tg.seen=true;
      this._setxpfields(xp, tg.variable, tg.cardinality, tg.attr);
    }
    return xp;
};

// Make a pattern of one group target
XpatternMaker.prototype._grouppattern = function (groupno) {
    var xp = new Xpattern();
    var grp = this.groups[groupno];
    xp.setType( "(" );  // also for or-bags, we need a containing group
    if ( !grp ) {
      //logger.debug("xpmk_groupoattern: no group " + groupno +" ###" );
      return xp; //### Should not happen!
    }
    this._setxpfields(xp, grp.variable, grp.cardinality, grp.attr);
    return xp;
};


// Check if a single node matches a target dom node
XpatternMaker.prototype._nodematch = function (n) {
    var tg = null;
    for ( targ in this.targets ) {
        if ( this.targets[targ].node == n ) { 
            tg = this.targets[targ];
        }
    }
    return tg;
};

// Scan through the dom tree, looking for the target nodes.
// If it finds the first target, but fails to find all targets
// after/under that node, tries higher up in the dom tree.
XpatternMaker.prototype._findmatch = function (n) {
    if (n == null) 
        return null;
    var retrycount = 0;
    var tg = this._nodematch(n);    
    if (tg != null) { // this node matched
        //dump("xpmk_fi: node " + n.nodename + " $" + tg.variable +
        //     " matched in _findmatch \n");
        var sp = null;
        do { // Check that all target nodes were found here.
             // If not, try one level higher up, or same place, with
             // newly added targets (if group failed)
            //logger.debug("xpmk_fi: Checking node at ===== (retry=" + this.needretry + ")" );
            //xmlHelper.dumpxml(n);
            this.needretry = false;
            this._clearseen();
            spa = this._followmatch(n,0);
            sp = spa[0];
            //logger.debug("xpmk_fi: after follow: retry=" + this.needretry +
            //    " spa[0]=" + spa[0] + " spa[1]=" + spa[1] );
            if ( !this.needretry ) {
              n = n.parentNode;
            }
            if ( n == this.startnode.parentNode ) {
                return null; 
            }
            retrycount++;
        } while ( this.needretry || !this._allseen() );
        if (sp != null) {
            logger.debug("xpmk_fi: Found a pattern after " + (retrycount-1) +
                " retries");
            return sp;
        }
    } else { // node didn't match
        cp = this._findmatch(n.firstChild);
        if (cp != null)  { // but its children did
            return cp;
        }
        sp = this._findmatch(n.nextSibling);
        // or it s siblings did
        return sp; // null or not
    }
    return null;
};



// This is the old version of the _followmatch without the groups
// It is left here as documentation, as the flow can be seen so much
// easier here.
XpatternMaker.prototype._TRIVIAL_followmatch = function (n) {
  if (n == null) {
    return null;
  }
  while ( n.nodeName == "#text" || n.nodeName == "#comment"  ){
        // Could be done with recursion, but on long pages with many
        // such nodes, may lead to "too much recursion".
    n = n.nextSibling;
    if ( n == null ) {
      return null;
    }
  }
    //dump("Following " + n.nodeName + "\n");
  var sp = this._followmatch(n.nextSibling);
  var cp = this._followmatch(n.firstChild);
  var tg = this._nodematch(n);
  if ( sp != null || tg != null || cp != null ) {
    var xp = this._nodepattern(n,tg);
    xp.nextSibling = sp; // null or not, doesn't matter
    xp.firstChild = cp;
    return xp;
  }
  return null;
};



// helper to dump the matches we work with
// Only used for debugging
XpatternMaker.prototype._dumpmatch = function(prefix, dumpstr, match ) {
  var xpstr = "(null)";
  var folstr = "(null)";
  if ( !match ) {
    xpstr = "(null match)";
    folstr = "(null match)";
  }
  if ( match[0] ) {
    xpstr = '"' + match[0].dumpString(-1) + '"' ;
  }
  if ( match[1] ) {
    n = match[1];
    folstr = n.localName || n.nodeName;
    folstr += " " + n.textContent;
  }
  //logger.debug ( prefix + " " + dumpstr + " xp: " + xpstr + " fo: " + folstr);
};
var followcounter =0; // helper to distinguish dump lines


// Recurse deeper into dom, collecting a path we've been through.
// In case we meet a target node, return that path
// Take two, that handles groups
// Returns the path (as an XPattern node), and an optional
// DomNode to indicate where to continue matching (end of group)
XpatternMaker.prototype._followmatch = function (n, ingroup) {
    while ( (n != null) &&
            (n.nodeName == "#text" || n.nodeName == "#comment"  )) {
        // Could be done with recursion, but on long pages with many 
        // such nodes, may lead to "too much recursion".
        n = n.nextSibling;
    }
    if ( n == null ) {
      //logger.debug("xpmk_fo 0: null ");
      return [ null, null ];
    }
    if ( this._allseen() ) {
      //logger.debug("xpmk_fo 0: All targets seen, stop hunting");
      return [ null, null ];
      // TODO - seems not to be happening ??!!
    }
    // TODO: Can we check that allsee(ingroup) ??
    // Not yet, optimize later. Watch out with end-group
    // conditions!
    
    var tg = this._nodematch(n);
    
    followcounter++; // set up debug tracing
    
    var dumpstr = "(c=" + followcounter +")";
    dumpstr += " i:" + ingroup + " " + n.nodeName +" ";
    if ( tg && tg.variable ) {
        dumpstr += "$" + tg.variable;
    }
    if ( this.needretry ) {
      //logger.debug("xpmk_fo: Not following. need a retry anyway " + dumpstr );
      return [ null,null ];
    }
    //logger.debug("xpmk_fo: Following " + dumpstr );
    
    var newgroup = ingroup;
    if ( tg ) { // found a target node
      newgroup = tg.group;
      var parent = newgroup;
      while ( parent != ingroup && parent != 0 ) {
        newgroup = parent;
        parent = this._parentgroup(newgroup);
      }
      //logger.debug("xpmk_fo A: target has new group " + newgroup );
      dumpstr = "n:" + newgroup + " " + dumpstr;
      if ( ingroup &&
           ( newgroup != ingroup ) &&  // group ends
           ( this._parentgroup(newgroup) != ingroup ) // not nested
         ) {
        // group ends here
        //logger.debug("xpmk_fo: End of (any) group " + dumpstr );
        return [ null, n ];
        // return this dom node, n, to the caller, who will
        // then match from here on to get the stuff after the group.
      }
    }

    if ( newgroup != ingroup ) {
      // Create a group node
      var gxp = this._grouppattern(newgroup);
      //logger.debug("xpmk_fo Ga: about to recurse into group " + dumpstr );
      var gpa = this._followmatch(n, newgroup);
      //this._dumpmatch("xpmk_fo Gb:group ", dumpstr, gpa);
      gxp.firstChild = gpa[0];
      //logger.debug("xpmk_fo Gc: about to follow up: " + dumpstr + "\n");
      var fpa = this._followmatch( gpa[1], ingroup );
      //this._dumpmatch("xpmk_fo Gd: follow up", dumpstr, fpa );
      gxp.nextSibling = fpa[0];
      //logger.debug("xpmk_fo Gd: produced group node " + gxp.dumpString(-1) + "\n");
      if ( ! this._allseen(newgroup) ) {
        //logger.debug("xpmk_fo Ge: Did not see the whole group, no good");
        // This node was not enough to contain all targets.
        // Mark the parent node as a target, and signal that we need to retry
        if (n.parentNode) {
          this.addNode(n.parentNode, "","","",newgroup);
          this.needretry = true;            
          //logger.debug("xpmk_fo Gf: Added retry for parent " + n.parentNode.nodeName );
          return [ null, null ];
        }
      }
      if ( this.groups[newgroup].grouptype == "|" ) { // convert into a or-bag
        var op = new Xpattern();  // the or-node inside the group
        op.setType( "|" );  
        op.firstChild = gxp.firstChild;
        gxp.firstChild = op;
        var p = op.firstChild;
        var seen = {};
        var prev = null;
        while ( p.nextSibling ) {
          var typ = p.getType();
          if ( p.variable || p.attributes || p.firstChild ||
               typ=="|" || typ=="(" ||
               ! seen[typ] || !prev ) { // interesting node
            p.nextAlternative = p.nextSibling;
            p.nextSibling = null;
            seen[typ] = 1;
            prev = p;
            p = p.nextAlternative;
          } else { // skip uninteresting duplicate node
            p = p.nextSibling;
            prev.nextAlternative = p;
          }
        }
        //logger.debug("xpmk_fo: Converted the group into a or-list");
      }
      return [ gxp, fpa[1] ];
    }

    if ( this.needretry ) {
      logger.dump("Giving up on " + dumpstr );
      return [ null,null ];
    }
    
    // Check the matches for children and siblings
    //logger.debug("xpmk_fo: About to recurse into firstchild " + dumpstr);
    var cpa = this._followmatch(n.firstChild, newgroup);
    this._dumpmatch("xpmk_fo Cb:child ", dumpstr, cpa );

    //logger.debug("xpmk_fo: About to recurse into nextSibling " + dumpstr);
    var spa = this._followmatch(n.nextSibling, newgroup);
    this._dumpmatch("xpmk_fo Cc:sibling", dumpstr, spa );
    
    // Create a xpattern node for this dom node ...
    var nn = n.localName || n.nodeName;
    var xp = this._nodepattern(nn,tg);
    xp.firstChild = cpa[0]; // ... and the rest of the pattern
    xp.nextSibling = spa[0]; // null or not, doesn't matter
    
    if ( tg != null || spa[0] != null || cpa[0] != null ) {
      var followup = spa[1];  // may well be null, if we reach end of sib chain
      var fupmsg = "?";
      if (followup)
        fupmsg = followup.localName || followup.nodeName;
      //logger.debug("xpmk_fo C: group continues. fu=" + followup +
      //  "=" + fupmsg +  " " + dumpstr );
      return [ xp, followup ];
    }

    // Could not find anything in, under, or after this node.
    // return a miss.
    //logger.debug("xpmk_fo Cf: " + dumpstr + " returning a miss");
    return [ null, null ]; // nothing found in this node

};