var EXPORTED_SYMBOLS = ["XpatternTextParser"];

Components.utils.import('resource://indexdata/util/xpattern.js');

var xptp_debug=false;
//xptp_debug=true;

Components.utils.import('resource://indexdata/util/logging.js');
var logger = logging.getLogger();

/* 

Maps the following grammar into Xpattern nodes

<pattern> ::= <orlist> | <orlist> ':' <pattern>

<orlist> ::= <node> | <node> '|' <orlist>

<node> ::= <elementname> <opt-cardinality> <opt-assignment> <opt-modifierlist>
           <opt-attrlist> <opt-childnodes> <opt-negation> |
      '(' <pattern> ')' <opt-cardinality> <opt-assignment>

<elementname> ::= <name> | "any" | "#text" | "#comment"

<opt-attrlist> ::= "" | "[" <attrlist> "]"

<attrlist> ::= <attr> | <attr> "," <attrlist>

<attr> ::= "@" <name> <opt-relation> <opt-assignment> | <regexp>

<opt-relation> ::= "" | <compare> <value>

<compare> ::= "=" | "~"

<opt-modifierlist> ::= "" | <modifier> <opt-modifierlist>

<modifier> ::= "-html" 
TODO - List them all here

<opt-childnodes> ::= "" | "{" <pattern> "}"

<opt-assignment> ::= "" | "$" <name>

<opt-cardinality> ::= "" | <plain-cardinality> | <plain-cardinality> <nongreedy>

<plain-cardinality> ::=  "?" | "*" | "+"

<nongreedy> ::= "?"

<regexp> ::= "/" STRING "/"

<value> ::= INTEGER | "\"" STRING "\""

<opt-negation> ::= "" | "!" <node>

<name> ::= HTML-friendly name, i.e. string starting with a-z followed by a-z0-9_. Match is case-insensitive.


*/


// Constructor for the text parser
// Takes the pattern string to parse
function XpatternTextParser(text) {
  this.text = text ? text: "";
  if ( xptp_debug )
      dump("Starting to parse '" + text + "'\n");
  this.offset = 0;
  this.lastoffset = 0; // used only for error messages
  this.offsetcorrection = -1; //
  this.errormsg = "";
  this.char = null;
  this.token = null;
  this.tokenValue = null;

  // Useful constants
  this.T_NUMBER = "T_NUMBER";
  this.T_STRING = "T_STRING";
  this.T_REGEXP = "T_REGEXP";
  this.T_NAME = "T_NAME";
  this.whitespace = "\t\n ";
  this.charTokens = "$=~@:,?*+{}[]()|!-";
  this.numbers = "0123456789";
  this.htmlChars = "abcdefghijklmnopqrstuvwxyz"+
                   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"+
                   "_0123456789#-";
}

XpatternTextParser.prototype.getErrorMsg = function() {
    return this.errormsg;
}
XpatternTextParser.prototype.getErrorPos = function() {
    return this.lastoffset;
}

// This tells the parser that we want string offsets in
// our xpattern nodes, and how much to correct them by.
// Used in the builder to get highlights between the editor
// and the hits on the page. Non-zero values can come when
// parsing a substring of the complete pattern, for example
// when reformatting...
XpatternTextParser.prototype.setOffsetCorrection = function( oc) {
    //dump("XpatternTextParser setting OffsetCorrection to " + oc + "\n");
    this.offsetcorrection = oc;
}

// A little helper to create a new XPattern node
// Sets the offset in the node too, if requested
XpatternTextParser.prototype._newxpnode = function() {
    var xp = new Xpattern();
    if ( this.offsetcorrection != -1) {
        xp.setStringoffset(this.lastoffset + this.offsetcorrection);
    }
    return xp;
}

// Parse a complete XPattern
XpatternTextParser.prototype.parse = function() {
  this._getToken();
  var res = this._pattern();
  if ( this.token ) {
      this._throwError("Syntax error" );
  }
  return res;
}

// Parse an attribute list, like @attr $var : @attr = "str"
// into the attributes of a given node.
XpatternTextParser.prototype.parseAttr = function(xp) {
  this._getToken();
  this._attrList(xp);
  return;
}

XpatternTextParser.prototype._pattern = function() {
  var res = this._orlist();
  if (this.token == ':') {
    this._getToken();
    // we don't use the addAfter method because it breaks our chain.
    var nextSibling = this._pattern();
    res.nextSibling = nextSibling;
  }
  return res;
}

XpatternTextParser.prototype._orlist = function() {
  var ornode = this._newxpnode();
  ornode.setType('|');
  //dump("Starting to parse OR \n");
  ornode.firstChild = this._node();
  var numchildren=1;
  while ( this.token == '|' ) {
    this._getToken();
    // we don't use the addAfter method because it breaks our chain.
    var nextAlt = this._node();
    ornode.firstChild.appendAlternative(nextAlt);
    numchildren++ ;
    //dump("Parsed OR child " + numchildren + "\n");
  }
  if (numchildren == 1) {
    //dump("Parsed a single-element OR \n");
    return ornode.firstChild; // no need to make an OR of one node
  }
  //dump ("Parsed OR with " + numchildren + " nodes \n");
  return ornode;
} // _orlist

XpatternTextParser.prototype._node = function() {
  var xp = this._newxpnode();
  if ( this.token == '(' )
  { // ( group )
    this._getToken();
    xp.setType('(');
    xp.appendChild(this._pattern());
    if (this.token != ')')
      this._throwError("Expected ')'");
    this._getToken();
    this._optCardinality(xp);
    this._optAssignment(xp);
  } else { // plain node
    this._elementName(xp);
    this._optCardinality(xp);
    this._optAssignment(xp);
    this._optModifierList(xp);
    this._optAttrList(xp);
    this._optChildNodes(xp);
    this._optNegation(xp);
  }
  return xp;
} // _node

XpatternTextParser.prototype._optNegation = function(xp) {
    if ( this.token == '!' )
    {
        this._getToken();
        var np = this._node();
        xp.setNegation(np);
    }
} // optNegation
    

XpatternTextParser.prototype._throwError = function(msg) {
  var st = msg;
  this.errormsg = msg; // save for future use
  var t = this.text.substring(0,this.lastoffset) +
          " >>HERE<< " +
          this.text.substring(this.lastoffset);
  // Trim to a decent length around the error
  if ( this.lastoffset > 25 )
      t = "..." + t.substr( this.lastoffset - 25 );
  if ( t.length > 50 )
      t = t.substring(0,50) + "...";
  st += "  " + t;
  throw new Error(st);
}

XpatternTextParser.prototype._elementName = function(xp) {
  if (this.token != this.T_NAME)
    this._throwError("Expected name ");
  xp.setType(this.tokenValue);
  this._getToken();
}

XpatternTextParser.prototype._optCardinality = function(xp) {
  if (this.token == '+') {
    xp.repeatable = true;
    xp.optional = false;
    this._getToken();
  }
  else if (this.token == '?') {
    xp.repeatable = false;
    xp.optional = true;
    this._getToken();
  }
  else if (this.token == '*') {
    xp.repeatable = true;
    xp.optional = true;
    this._getToken();
  }
  if ( (xp.repeatable || xp.optional) && this.token == '?' ) {
    xp.greedy = false;
    this._getToken();
  }
}

XpatternTextParser.prototype._optModifierList = function(xp) {
  while (this.token == "-" ){
    this._getToken(); // skip the hyphen
    if (this.token != this.T_NAME )
      this._throwError("Expected modifier name");
    var mod = this.tokenValue;
    this._getToken();
    if ( ! xp.setModifier(mod) )
      this._throwError("Bad modifier " + mod );
  }
}

XpatternTextParser.prototype._optAttrList = function(xp) {
  if (this.token == '[') {
    this._getToken();
    this._attrList(xp);
    if (this.token != ']')
        this._throwError("Expected ']'");
    this._getToken();
  }
}

XpatternTextParser.prototype._attrList = function(xp) {
  if ( this.token == '@' ) {
    this._getToken();
    if (this.token != this.T_NAME) {
      this._throwError("Expected attribute name");
    }
    var an = this._newxpnode();
    an.setType("@"+this.tokenValue);
    this._getToken();
    this._optRelation(an);
    this._optAssignment(an);
    xp.addAttribute(an);
  } else if ( this.token == this.T_REGEXP ) {
    var an = this._newxpnode();
    an.setType("/");
    an.setAttrValue( this.tokenValue);
    xp.addAttribute(an);
    this._getToken();
  } else {
    this._throwError("Expected '@' or '/' in beginning of an attribute");
  }
  if (this.token == ":" || this.token == ',' ) {
    // The syntax says to use a comma, but historically colon was accepted too
    // Remove the colon in some later version
    this._getToken();
    this._attrList(xp);
  }
}

XpatternTextParser.prototype._optRelation = function(xp) {
  if ( this.token == '=' ) {
    this._getToken();
    if (this.token != this.T_STRING )
      this._throwError("Expected string for attribute value");
    xp.setAttrValue( this.tokenValue);
    xp.setRelationValue( '=');
    this._getToken();
  } else if ( this.token == '~' ) {
    this._getToken();
    if (this.token != this.T_STRING && this.token != this.T_REGEXP )
      this._throwError("Expected string or regex for attribute value");
      
    /* var an = this._newxpnode();
    an.setType("~");
    an.setAttrValue( this.tokenValue);
    xp.addAttribute(an);
    */
    xp.setAttrValue( this.tokenValue);
    xp.setRelationValue( '~');
    
    this._getToken();
  }
}


XpatternTextParser.prototype._optChildNodes = function(xp) {
  if (this.token == '{') {
    this._getToken();
    xp.appendChild(this._pattern());
    if (this.token != '}')
      this._throwError("Expected '}'");
    this._getToken();
  }
}

XpatternTextParser.prototype._optAssignment = function(xp) {
  if (this.token == '$') {
    this._getToken();
    if ( this.token != this.T_NAME )
        this._throwError("Expected variable");
    xp.setVariable(this.tokenValue);
    this._getToken();
  }
}

XpatternTextParser.prototype._getToken = function() {
  this.tokenValue = "";
  this.token = null;
  while (this._isSpace())
    this._getChar();
  this.lastoffset = this.offset;
  if (this._end())
    return null;
  var c = this.text.charAt(this.offset);
  if (this._isMember(this.charTokens)) {
    this.token = this._getChar();
  }
  else if (c == "\"") {
    this._parseString();
    this.token = this.T_STRING;
  }
  else if (c == "/") {
    this._parseString("/");
    this.token = this.T_REGEXP;
  }
  else if (this._isNumber()) {
    this._parseKind(this.numbers);
    this.token = this.T_NUMBER;
  }
  else if (this._isName()) {
    this._parseKind(this.htmlChars);
    this.token = this.T_NAME;
  }
  else {
    this._throwError("Syntax Error");
  }
  if ( xptp_debug )
    dump("xptp: " + this.token +  " '" + this.tokenValue + "'\n");
  return this.token;
}

XpatternTextParser.prototype._parseString = function( quote ) {
  if ( quote == undefined )  // usually the case, except on /regexp/
    quote = "\"";
  this._getChar();    // consume opening "
  while (!this._end() && !this._isMember(quote)) {
    this.tokenValue += this._getChar();
  }
  if (!this._isMember(quote))
      this._throwError("Unterminated string");
  this._getChar();  // Consume ending "
}

XpatternTextParser.prototype._parseKind = function(set) {
  this.tokenValue = "";
  while (!this._end() && this._isMember(set)) {
    this.tokenValue += this._getChar();
  }
}

XpatternTextParser.prototype._isMember = function(set) {
  if (!this._end() && set.indexOf(this.text.charAt(this.offset)) >= 0)
    return 1;
  else
    return 0;
}

XpatternTextParser.prototype._getChar = function() {
  return this._end() ? null : this.text.charAt(this.offset++);
}

XpatternTextParser.prototype._isSpace = function() {
  return this._isMember(this.whitespace);
}

XpatternTextParser.prototype._isNumber = function() {
  return this._isMember(this.numbers);
}

XpatternTextParser.prototype._isName = function() {
  return this._isMember(this.htmlChars);
}

XpatternTextParser.prototype._end = function() {
  return (this.offset >= this.text.length);
}

/////////////////////////////////////////
// Unit tests

// These should parse all right
// and render to approx the same string
// The unit tests for xpattern itself also test parsing of
// all kind of acceptable strings, so this here can be rather
// short. Test catching errors instead!
var oktests = [
  "TD",
  "TD $variable [ @attr=\"value\" ] ",
  "TD $variable [ @attr ~ \"value\" ] ",
  "A [ @href = \"author\" ]",
  "A [ @href ~ \"author\" ]",
  "tag-with-dashes [ @href ~ \"author\" ]",
  //"TD $variable [ @attr ~ /value/ ] ", // TODO not implemented yet. Dumps wrong
  "TD $var -html",
  "TD $var -html -xml",
  "TD $var [ @attr $attrvar ]",
];

// These should fail parsing
var failtests = [
  // Tests that the parsing does not stop too early.
  // see bug 3746
  "ERROR ERROR ",
  "TD [ @attr=\"value\" ] $varinwrongplace ", 
  "TD [ @attr=\"value\" ] $varinwrongplace : ANY $rest",
  "TD $varinwrongplace +",
  "( TD : TD $missingbracket ", 
  "TR { TD $missingbrace ",
  "A $$wrong",
    // The following is a test for the error message reporting
  "TD $verylongpattern : TD $longfiller : TD $evemorestuff : " +
    "TD $stillmorestuff : TD $$errorhere : TD $lotsofstuffaftererror : " +
    "TD $evenmorefillerstuff : TD $andthatsenough ",
  "TD : $missingtag : TD",
  "TD : A [ @missingbracket : TD",
  "TD : A [ @missingvalue = ] : TD",
  "TD : A [ @ = \"foo\" ] : TD",
  "TD : A [ @attr = \"unterminatedstring ]: TD ",
  "TD : A [ /unterminatedpattern ] : TD",
  "TD : & : TD ",
  "TD : A $ : TD ",
  "TD : A ! : TD ",  // no negation
  "TD : A | : TD ",  // two separators
  
  // regex
  "TD [ @attr~\"value\" ] $varinwrongplace ", 
  "TD [ @attr~\"value\" ] $varinwrongplace : ANY $rest",
  "TD : A [ @missingvalue ~ ] : TD",
  "TD : A [ @ ~ \"foo\" ] : TD",
  "TD : A [ @attr ~ \"unterminatedstring ]: TD ",

  // Modifiers
  "A $-badvar",  // no dashes in beginning of a name
  "A -html $var", // wrong order
  "A $var -badmodifier", // unknown modifier
  
/* The following actually manage to pass!
  "TD : # : TD ",  // that's ok, we consider # a valid char for a tag, #text
                   // could also consider it a special character...
*/
];


// Little helper to remove all whitespace
XpatternTextParser.prototype.cleanstr = function ( str ) {
  str = str.replace(/[\s]+/g,"");
  return str;
};


XpatternTextParser.prototype.unitTest = function ( ) {
  dump("Unit test for XpatternTextParser starting \n");
  
  for ( var ti in oktests ) {
    var patt = oktests[ti];
    try {
        var par = new XpatternTextParser(patt);
        xp = par.parse();
            // check that we dump the same string as we parsed,
            // modulo whitespace
        var xpstr = xp.dumpString(-1);
        if ( this.cleanstr(xpstr) != this.cleanstr(patt) ) {
            failmsg = "OkTest " + ti + " FAILED! \n";
            failmsg += "Pattern parsed and dumped different strings\n";
            failmsg += "pattern: '" + patt + "'\n";
            failmsg += "dumped:  '" + xpstr + "'\n";
            return failmsg;
        }
    } catch (e) {
        failmsg  = "OkTest " + ti + " FAILED! \n" ;
        failmsg += "Parse error (bad test?) " + e + "\n'" + patt + "'\n" ;
        if ( e.fileName ) { // catches syntax errors in code, and other interesting things
          failmsg += "  in " + e.fileName + "  line " + e.lineNumber + "\n";
        }
        return failmsg;
    }
    dump("OkTest " + ti + " OK " + patt + "\n");
  } // ok tests

  for ( var ti in failtests ) {
    var patt = failtests[ti];
    try {
        var par = new XpatternTextParser(patt);
        xp = par.parse();
            // check that we dump the same string as we parsed,
            // modulo whitespace
        var xpstr = xp.dumpString(-1);
        failmsg = "FailTest " + ti + " FAILED! \n";
        failmsg += "Managed to parse the pattern '" + patt + "'\n";
        failmsg += "(which dumped as:  '" + xpstr + "' )\n";
        return failmsg;
    } catch (e) {
        dump("Failtest " + ti + " OK. " + e + "\n");
    }
  } // ok tests


  return true;
}

