diff --git a/mozilla/js/js2/parser.cpp b/mozilla/js/js2/parser.cpp index bab0e3804e3..cf2a1ba1c5f 100644 --- a/mozilla/js/js2/parser.cpp +++ b/mozilla/js/js2/parser.cpp @@ -31,7 +31,7 @@ namespace JS = JavaScript; // Create a Reader reading characters from begin up to but not including end. JS::Reader::Reader(const char16 *begin, const char16 *end): - begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0) + begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0), lineNum(1), lineFileOffset(0) { ASSERT(begin <= end); #ifdef DEBUG @@ -140,6 +140,17 @@ JS::StringReader::StringReader(const String &s, const String &source): } +// Set the beginning of the current line. unget cannot be subsequently called past this point. +// This can only be called if the previous character was LF (u000A), CR (u000D), LS (u2028), or PS (u2029). +// Moreover, in these cases this method must be called before reading any more characters. +void JS::StringReader::beginLine() +{ + ++lineNum; + lineStart = p; + lineFileOffset = static_cast(p - begin); +} + + JS::String JS::StringReader::sourceFile() const { return source; @@ -345,7 +356,6 @@ JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world) #ifdef DEBUG nTokensBack = 0; #endif - lineNum = 1; lexingUnit = false; } @@ -418,7 +428,12 @@ void JS::Lexer::syntaxError(const char *message, uint backUp) ch = reader.get(); } while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch))); reader.unget(); - Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos, + + SourcePosition position; + position.lineFileOffset = reader.lineFileOffset; + position.lineNum = reader.lineNum; + position.charPos = charPos; + Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), position, reader.extract(0, reader.charPos())); throw e; } @@ -721,7 +736,7 @@ void JS::Lexer::lexToken(bool preferRegExp) goto next; case CharInfo::IdGroup: - t.charPos = reader.charPos() - 1; + t.pos.charPos = reader.charPos() - 1; readIdentifier: { reader.unget(); @@ -734,7 +749,7 @@ void JS::Lexer::lexToken(bool preferRegExp) case CharInfo::NonIdGroup: case CharInfo::IdContinueGroup: - t.charPos = reader.charPos() - 1; + t.pos.charPos = reader.charPos() - 1; switch (ch) { case '(': kind = Token::OpenParenthesis; // ( @@ -764,7 +779,7 @@ void JS::Lexer::lexToken(bool preferRegExp) kind = Token::Dot; // . ch2 = getChar(); if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) { - reader.backUpTo(t.charPos); + reader.backUpTo(t.pos.charPos); goto number; // decimal point } else if (ch2 == '.') { kind = Token::DoubleDot; // .. @@ -824,7 +839,6 @@ void JS::Lexer::lexToken(bool preferRegExp) ch = getChar(); if (isLineBreak(char16orEOFToChar16(ch))) { reader.beginLine(); - ++lineNum; t.lineBreak = true; } if (ch == char16eof) @@ -941,12 +955,12 @@ void JS::Lexer::lexToken(bool preferRegExp) case CharInfo::LineBreakGroup: endOfLine: reader.beginLine(); - ++lineNum; t.lineBreak = true; goto next; } } } t.kind = kind; - t.lineNum = lineNum; + t.pos.lineFileOffset = reader.lineFileOffset; + t.pos.lineNum = reader.lineNum; } diff --git a/mozilla/js/js2/parser.h b/mozilla/js/js2/parser.h index a078af9143d..1d6c9a88f38 100644 --- a/mozilla/js/js2/parser.h +++ b/mozilla/js/js2/parser.h @@ -33,19 +33,26 @@ namespace JavaScript { // A Reader reads Unicode characters from some source -- either a file or a string. // get() returns all of the characters followed by a char16eof. + // If get() returns LF (u000A), CR (u000D), LS (u2028), or PS (u2029), then beginLine() + // must be called before getting or peeking any more characters. class Reader { + protected: const char16 *begin; // Beginning of current buffer const char16 *p; // Position in current buffer const char16 *end; // End of current buffer const char16 *lineStart; // Pointer to start of current line uint32 nGetsPastEnd; // Number of times char16eof has been returned + public: + uint32 lineNum; // One-based number of current line + FileOffset lineFileOffset; // Byte or character offset of start of current line relative to all of input + private: String *recordString; // String, if any, into which recordChar() records characters const char16 *recordBase; // Position of last beginRecording() call const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred protected: - Reader(): nGetsPastEnd(0) {} + Reader(): nGetsPastEnd(0), lineNum(1), lineFileOffset(0) {} public: Reader(const char16 *begin, const char16 *end); private: @@ -57,7 +64,7 @@ namespace JavaScript { char16orEOF peek(); void unget(uint32 n = 1); - void beginLine(); + virtual void beginLine() = 0; uint32 charPos() const; void backUpTo(uint32 pos); @@ -92,17 +99,8 @@ namespace JavaScript { } - // Set the beginning of the current line. unget cannot be subsequently called past this point. - inline void Reader::beginLine() - { - lineStart = p; - #ifdef DEBUG - recordString = 0; - #endif - } - - // Return the character offset relative to the current line. This cannot be called - // if the current position is past the end of the input. + // Return the number of characters between the current position and the beginning of the current line. + // This cannot be called if the current position is past the end of the input. inline uint32 Reader::charPos() const { ASSERT(!nGetsPastEnd); @@ -139,6 +137,7 @@ namespace JavaScript { public: StringReader(const String &s, const String &source); + void beginLine(); String sourceFile() const; }; @@ -298,8 +297,7 @@ namespace JavaScript { Kind kind; // The token's kind bool lineBreak; // True if line break precedes this token - uint32 lineNum; // One-based source line number - uint32 charPos; // Zero-based character offset of this token in source line + SourcePosition pos; // Position of this token StringAtom *identifier; // The token's characters; non-null for identifiers, keywords, and regular expressions only String chars; // The token's characters; valid for strings, units, numbers, and regular expression flags only float64 value; // The token's value (numbers only) @@ -313,7 +311,7 @@ namespace JavaScript { class Lexer { - enum {tokenBufferSize = 3}; // Token lookahead buffer size + enum {tokenBufferSize = 3}; // Token lookahead buffer size public: Reader &reader; World &world; @@ -325,7 +323,6 @@ namespace JavaScript { int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls #endif - uint32 lineNum; // Current line number bool lexingUnit; // True if lexing a unit identifier immediately following a number public: @@ -351,5 +348,151 @@ namespace JavaScript { void lexToken(bool preferRegExp); public: }; + + + class ParseNode { + enum Kind { + Empty, // Empty (used in array literals, argument lists, etc.) + Id, // Identifier + Num, // Numeral + Str, // String + Unit, // Unit after numeral + RegExp, // Regular expression + + // Punctuators + OpenParenthesis, // ( + CloseParenthesis, // ) + OpenBracket, // [ + CloseBracket, // ] + OpenBrace, // { + CloseBrace, // } + + Comma, // , + Semicolon, // ; + Dot, // . + DoubleDot, // .. + TripleDot, // ... + Arrow, // -> + Colon, // : + DoubleColon, // :: + Pound, // # + At, // @ + + Increment, // ++ + Decrement, // -- + + Complement, // ~ + Not, // ! + + Times, // * + Divide, // / + Modulo, // % + Plus, // + + Minus, // - + LeftShift, // << + RightShift, // >> + LogicalRightShift, // >>> + LogicalAnd, // && + LogicalXor, // ^^ + LogicalOr, // || + And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr + Xor, // ^ + Or, // | + + Assignment, // = + TimesEquals, // *= // These must be at constant offsets from Times ... Or + DivideEquals, // /= + ModuloEquals, // %= + PlusEquals, // += + MinusEquals, // -= + LeftShiftEquals, // <<= + RightShiftEquals, // >>= + LogicalRightShiftEquals, // >>>= + LogicalAndEquals, // &&= + LogicalXorEquals, // ^^= + LogicalOrEquals, // ||= + AndEquals, // &= + XorEquals, // ^= + OrEquals, // |= + + Equal, // == + NotEqual, // != + LessThan, // < + LessThanOrEqual, // <= + GreaterThan, // > // >, >= must be at constant offsets from <, <= + GreaterThanOrEqual, // >= + Identical, // === + NotIdentical, // !== + + Question, // ? + + // Reserved words + Abstract, // abstract + Break, // break + Case, // case + Catch, // catch + Class, // class + Const, // const + Continue, // continue + Debugger, // debugger + Default, // default + Delete, // delete + Do, // do + Else, // else + Enum, // enum + Eval, // eval + Export, // export + Extends, // extends + False, // false + Final, // final + Finally, // finally + For, // for + Function, // function + Goto, // goto + If, // if + Implements, // implements + Import, // import + In, // in + Instanceof, // instanceof + Native, // native + New, // new + Null, // null + Package, // package + Private, // private + Protected, // protected + Public, // public + Return, // return + Static, // static + Super, // super + Switch, // switch + Synchronized, // synchronized + This, // this + Throw, // throw + Throws, // throws + Transient, // transient + True, // true + Try, // try + Typeof, // typeof + Var, // var + Volatile, // volatile + While, // while + With, // with + + // Non-reserved words + Box, // box + Constructor, // constructor + Field, // field + Get, // get + Language, // language + Local, // local + Method, // method + Override, // override + Set, // set + Version // version + }; + }; + + //class Parser: public Lexer { + //}; } #endif diff --git a/mozilla/js2/src/parser.cpp b/mozilla/js2/src/parser.cpp index bab0e3804e3..cf2a1ba1c5f 100644 --- a/mozilla/js2/src/parser.cpp +++ b/mozilla/js2/src/parser.cpp @@ -31,7 +31,7 @@ namespace JS = JavaScript; // Create a Reader reading characters from begin up to but not including end. JS::Reader::Reader(const char16 *begin, const char16 *end): - begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0) + begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0), lineNum(1), lineFileOffset(0) { ASSERT(begin <= end); #ifdef DEBUG @@ -140,6 +140,17 @@ JS::StringReader::StringReader(const String &s, const String &source): } +// Set the beginning of the current line. unget cannot be subsequently called past this point. +// This can only be called if the previous character was LF (u000A), CR (u000D), LS (u2028), or PS (u2029). +// Moreover, in these cases this method must be called before reading any more characters. +void JS::StringReader::beginLine() +{ + ++lineNum; + lineStart = p; + lineFileOffset = static_cast(p - begin); +} + + JS::String JS::StringReader::sourceFile() const { return source; @@ -345,7 +356,6 @@ JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world) #ifdef DEBUG nTokensBack = 0; #endif - lineNum = 1; lexingUnit = false; } @@ -418,7 +428,12 @@ void JS::Lexer::syntaxError(const char *message, uint backUp) ch = reader.get(); } while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch))); reader.unget(); - Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos, + + SourcePosition position; + position.lineFileOffset = reader.lineFileOffset; + position.lineNum = reader.lineNum; + position.charPos = charPos; + Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), position, reader.extract(0, reader.charPos())); throw e; } @@ -721,7 +736,7 @@ void JS::Lexer::lexToken(bool preferRegExp) goto next; case CharInfo::IdGroup: - t.charPos = reader.charPos() - 1; + t.pos.charPos = reader.charPos() - 1; readIdentifier: { reader.unget(); @@ -734,7 +749,7 @@ void JS::Lexer::lexToken(bool preferRegExp) case CharInfo::NonIdGroup: case CharInfo::IdContinueGroup: - t.charPos = reader.charPos() - 1; + t.pos.charPos = reader.charPos() - 1; switch (ch) { case '(': kind = Token::OpenParenthesis; // ( @@ -764,7 +779,7 @@ void JS::Lexer::lexToken(bool preferRegExp) kind = Token::Dot; // . ch2 = getChar(); if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) { - reader.backUpTo(t.charPos); + reader.backUpTo(t.pos.charPos); goto number; // decimal point } else if (ch2 == '.') { kind = Token::DoubleDot; // .. @@ -824,7 +839,6 @@ void JS::Lexer::lexToken(bool preferRegExp) ch = getChar(); if (isLineBreak(char16orEOFToChar16(ch))) { reader.beginLine(); - ++lineNum; t.lineBreak = true; } if (ch == char16eof) @@ -941,12 +955,12 @@ void JS::Lexer::lexToken(bool preferRegExp) case CharInfo::LineBreakGroup: endOfLine: reader.beginLine(); - ++lineNum; t.lineBreak = true; goto next; } } } t.kind = kind; - t.lineNum = lineNum; + t.pos.lineFileOffset = reader.lineFileOffset; + t.pos.lineNum = reader.lineNum; } diff --git a/mozilla/js2/src/parser.h b/mozilla/js2/src/parser.h index a078af9143d..1d6c9a88f38 100644 --- a/mozilla/js2/src/parser.h +++ b/mozilla/js2/src/parser.h @@ -33,19 +33,26 @@ namespace JavaScript { // A Reader reads Unicode characters from some source -- either a file or a string. // get() returns all of the characters followed by a char16eof. + // If get() returns LF (u000A), CR (u000D), LS (u2028), or PS (u2029), then beginLine() + // must be called before getting or peeking any more characters. class Reader { + protected: const char16 *begin; // Beginning of current buffer const char16 *p; // Position in current buffer const char16 *end; // End of current buffer const char16 *lineStart; // Pointer to start of current line uint32 nGetsPastEnd; // Number of times char16eof has been returned + public: + uint32 lineNum; // One-based number of current line + FileOffset lineFileOffset; // Byte or character offset of start of current line relative to all of input + private: String *recordString; // String, if any, into which recordChar() records characters const char16 *recordBase; // Position of last beginRecording() call const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred protected: - Reader(): nGetsPastEnd(0) {} + Reader(): nGetsPastEnd(0), lineNum(1), lineFileOffset(0) {} public: Reader(const char16 *begin, const char16 *end); private: @@ -57,7 +64,7 @@ namespace JavaScript { char16orEOF peek(); void unget(uint32 n = 1); - void beginLine(); + virtual void beginLine() = 0; uint32 charPos() const; void backUpTo(uint32 pos); @@ -92,17 +99,8 @@ namespace JavaScript { } - // Set the beginning of the current line. unget cannot be subsequently called past this point. - inline void Reader::beginLine() - { - lineStart = p; - #ifdef DEBUG - recordString = 0; - #endif - } - - // Return the character offset relative to the current line. This cannot be called - // if the current position is past the end of the input. + // Return the number of characters between the current position and the beginning of the current line. + // This cannot be called if the current position is past the end of the input. inline uint32 Reader::charPos() const { ASSERT(!nGetsPastEnd); @@ -139,6 +137,7 @@ namespace JavaScript { public: StringReader(const String &s, const String &source); + void beginLine(); String sourceFile() const; }; @@ -298,8 +297,7 @@ namespace JavaScript { Kind kind; // The token's kind bool lineBreak; // True if line break precedes this token - uint32 lineNum; // One-based source line number - uint32 charPos; // Zero-based character offset of this token in source line + SourcePosition pos; // Position of this token StringAtom *identifier; // The token's characters; non-null for identifiers, keywords, and regular expressions only String chars; // The token's characters; valid for strings, units, numbers, and regular expression flags only float64 value; // The token's value (numbers only) @@ -313,7 +311,7 @@ namespace JavaScript { class Lexer { - enum {tokenBufferSize = 3}; // Token lookahead buffer size + enum {tokenBufferSize = 3}; // Token lookahead buffer size public: Reader &reader; World &world; @@ -325,7 +323,6 @@ namespace JavaScript { int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls #endif - uint32 lineNum; // Current line number bool lexingUnit; // True if lexing a unit identifier immediately following a number public: @@ -351,5 +348,151 @@ namespace JavaScript { void lexToken(bool preferRegExp); public: }; + + + class ParseNode { + enum Kind { + Empty, // Empty (used in array literals, argument lists, etc.) + Id, // Identifier + Num, // Numeral + Str, // String + Unit, // Unit after numeral + RegExp, // Regular expression + + // Punctuators + OpenParenthesis, // ( + CloseParenthesis, // ) + OpenBracket, // [ + CloseBracket, // ] + OpenBrace, // { + CloseBrace, // } + + Comma, // , + Semicolon, // ; + Dot, // . + DoubleDot, // .. + TripleDot, // ... + Arrow, // -> + Colon, // : + DoubleColon, // :: + Pound, // # + At, // @ + + Increment, // ++ + Decrement, // -- + + Complement, // ~ + Not, // ! + + Times, // * + Divide, // / + Modulo, // % + Plus, // + + Minus, // - + LeftShift, // << + RightShift, // >> + LogicalRightShift, // >>> + LogicalAnd, // && + LogicalXor, // ^^ + LogicalOr, // || + And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr + Xor, // ^ + Or, // | + + Assignment, // = + TimesEquals, // *= // These must be at constant offsets from Times ... Or + DivideEquals, // /= + ModuloEquals, // %= + PlusEquals, // += + MinusEquals, // -= + LeftShiftEquals, // <<= + RightShiftEquals, // >>= + LogicalRightShiftEquals, // >>>= + LogicalAndEquals, // &&= + LogicalXorEquals, // ^^= + LogicalOrEquals, // ||= + AndEquals, // &= + XorEquals, // ^= + OrEquals, // |= + + Equal, // == + NotEqual, // != + LessThan, // < + LessThanOrEqual, // <= + GreaterThan, // > // >, >= must be at constant offsets from <, <= + GreaterThanOrEqual, // >= + Identical, // === + NotIdentical, // !== + + Question, // ? + + // Reserved words + Abstract, // abstract + Break, // break + Case, // case + Catch, // catch + Class, // class + Const, // const + Continue, // continue + Debugger, // debugger + Default, // default + Delete, // delete + Do, // do + Else, // else + Enum, // enum + Eval, // eval + Export, // export + Extends, // extends + False, // false + Final, // final + Finally, // finally + For, // for + Function, // function + Goto, // goto + If, // if + Implements, // implements + Import, // import + In, // in + Instanceof, // instanceof + Native, // native + New, // new + Null, // null + Package, // package + Private, // private + Protected, // protected + Public, // public + Return, // return + Static, // static + Super, // super + Switch, // switch + Synchronized, // synchronized + This, // this + Throw, // throw + Throws, // throws + Transient, // transient + True, // true + Try, // try + Typeof, // typeof + Var, // var + Volatile, // volatile + While, // while + With, // with + + // Non-reserved words + Box, // box + Constructor, // constructor + Field, // field + Get, // get + Language, // language + Local, // local + Method, // method + Override, // override + Set, // set + Version // version + }; + }; + + //class Parser: public Lexer { + //}; } #endif