Work on adding parser

git-svn-id: svn://10.0.0.236/trunk@61470 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
waldemar%netscape.com 2000-02-23 01:54:30 +00:00
parent 617fbb3628
commit 64fa1b6c33
4 changed files with 366 additions and 52 deletions

View File

@ -31,7 +31,7 @@ namespace JS = JavaScript;
// Create a Reader reading characters from begin up to but not including end.
JS::Reader::Reader(const char16 *begin, const char16 *end):
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0)
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0), lineNum(1), lineFileOffset(0)
{
ASSERT(begin <= end);
#ifdef DEBUG
@ -140,6 +140,17 @@ JS::StringReader::StringReader(const String &s, const String &source):
}
// Set the beginning of the current line. unget cannot be subsequently called past this point.
// This can only be called if the previous character was LF (u000A), CR (u000D), LS (u2028), or PS (u2029).
// Moreover, in these cases this method must be called before reading any more characters.
void JS::StringReader::beginLine()
{
++lineNum;
lineStart = p;
lineFileOffset = static_cast<uint32>(p - begin);
}
JS::String JS::StringReader::sourceFile() const
{
return source;
@ -345,7 +356,6 @@ JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world)
#ifdef DEBUG
nTokensBack = 0;
#endif
lineNum = 1;
lexingUnit = false;
}
@ -418,7 +428,12 @@ void JS::Lexer::syntaxError(const char *message, uint backUp)
ch = reader.get();
} while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch)));
reader.unget();
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos,
SourcePosition position;
position.lineFileOffset = reader.lineFileOffset;
position.lineNum = reader.lineNum;
position.charPos = charPos;
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), position,
reader.extract(0, reader.charPos()));
throw e;
}
@ -721,7 +736,7 @@ void JS::Lexer::lexToken(bool preferRegExp)
goto next;
case CharInfo::IdGroup:
t.charPos = reader.charPos() - 1;
t.pos.charPos = reader.charPos() - 1;
readIdentifier:
{
reader.unget();
@ -734,7 +749,7 @@ void JS::Lexer::lexToken(bool preferRegExp)
case CharInfo::NonIdGroup:
case CharInfo::IdContinueGroup:
t.charPos = reader.charPos() - 1;
t.pos.charPos = reader.charPos() - 1;
switch (ch) {
case '(':
kind = Token::OpenParenthesis; // (
@ -764,7 +779,7 @@ void JS::Lexer::lexToken(bool preferRegExp)
kind = Token::Dot; // .
ch2 = getChar();
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
reader.backUpTo(t.charPos);
reader.backUpTo(t.pos.charPos);
goto number; // decimal point
} else if (ch2 == '.') {
kind = Token::DoubleDot; // ..
@ -824,7 +839,6 @@ void JS::Lexer::lexToken(bool preferRegExp)
ch = getChar();
if (isLineBreak(char16orEOFToChar16(ch))) {
reader.beginLine();
++lineNum;
t.lineBreak = true;
}
if (ch == char16eof)
@ -941,12 +955,12 @@ void JS::Lexer::lexToken(bool preferRegExp)
case CharInfo::LineBreakGroup:
endOfLine:
reader.beginLine();
++lineNum;
t.lineBreak = true;
goto next;
}
}
}
t.kind = kind;
t.lineNum = lineNum;
t.pos.lineFileOffset = reader.lineFileOffset;
t.pos.lineNum = reader.lineNum;
}

View File

@ -33,19 +33,26 @@ namespace JavaScript {
// A Reader reads Unicode characters from some source -- either a file or a string.
// get() returns all of the characters followed by a char16eof.
// If get() returns LF (u000A), CR (u000D), LS (u2028), or PS (u2029), then beginLine()
// must be called before getting or peeking any more characters.
class Reader {
protected:
const char16 *begin; // Beginning of current buffer
const char16 *p; // Position in current buffer
const char16 *end; // End of current buffer
const char16 *lineStart; // Pointer to start of current line
uint32 nGetsPastEnd; // Number of times char16eof has been returned
public:
uint32 lineNum; // One-based number of current line
FileOffset lineFileOffset; // Byte or character offset of start of current line relative to all of input
private:
String *recordString; // String, if any, into which recordChar() records characters
const char16 *recordBase; // Position of last beginRecording() call
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
protected:
Reader(): nGetsPastEnd(0) {}
Reader(): nGetsPastEnd(0), lineNum(1), lineFileOffset(0) {}
public:
Reader(const char16 *begin, const char16 *end);
private:
@ -57,7 +64,7 @@ namespace JavaScript {
char16orEOF peek();
void unget(uint32 n = 1);
void beginLine();
virtual void beginLine() = 0;
uint32 charPos() const;
void backUpTo(uint32 pos);
@ -92,17 +99,8 @@ namespace JavaScript {
}
// Set the beginning of the current line. unget cannot be subsequently called past this point.
inline void Reader::beginLine()
{
lineStart = p;
#ifdef DEBUG
recordString = 0;
#endif
}
// Return the character offset relative to the current line. This cannot be called
// if the current position is past the end of the input.
// Return the number of characters between the current position and the beginning of the current line.
// This cannot be called if the current position is past the end of the input.
inline uint32 Reader::charPos() const
{
ASSERT(!nGetsPastEnd);
@ -139,6 +137,7 @@ namespace JavaScript {
public:
StringReader(const String &s, const String &source);
void beginLine();
String sourceFile() const;
};
@ -298,8 +297,7 @@ namespace JavaScript {
Kind kind; // The token's kind
bool lineBreak; // True if line break precedes this token
uint32 lineNum; // One-based source line number
uint32 charPos; // Zero-based character offset of this token in source line
SourcePosition pos; // Position of this token
StringAtom *identifier; // The token's characters; non-null for identifiers, keywords, and regular expressions only
String chars; // The token's characters; valid for strings, units, numbers, and regular expression flags only
float64 value; // The token's value (numbers only)
@ -313,7 +311,7 @@ namespace JavaScript {
class Lexer {
enum {tokenBufferSize = 3}; // Token lookahead buffer size
enum {tokenBufferSize = 3}; // Token lookahead buffer size
public:
Reader &reader;
World &world;
@ -325,7 +323,6 @@ namespace JavaScript {
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
#endif
uint32 lineNum; // Current line number
bool lexingUnit; // True if lexing a unit identifier immediately following a number
public:
@ -351,5 +348,151 @@ namespace JavaScript {
void lexToken(bool preferRegExp);
public:
};
class ParseNode {
enum Kind {
Empty, // Empty (used in array literals, argument lists, etc.)
Id, // Identifier
Num, // Numeral
Str, // String
Unit, // Unit after numeral
RegExp, // Regular expression
// Punctuators
OpenParenthesis, // (
CloseParenthesis, // )
OpenBracket, // [
CloseBracket, // ]
OpenBrace, // {
CloseBrace, // }
Comma, // ,
Semicolon, // ;
Dot, // .
DoubleDot, // ..
TripleDot, // ...
Arrow, // ->
Colon, // :
DoubleColon, // ::
Pound, // #
At, // @
Increment, // ++
Decrement, // --
Complement, // ~
Not, // !
Times, // *
Divide, // /
Modulo, // %
Plus, // +
Minus, // -
LeftShift, // <<
RightShift, // >>
LogicalRightShift, // >>>
LogicalAnd, // &&
LogicalXor, // ^^
LogicalOr, // ||
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
Xor, // ^
Or, // |
Assignment, // =
TimesEquals, // *= // These must be at constant offsets from Times ... Or
DivideEquals, // /=
ModuloEquals, // %=
PlusEquals, // +=
MinusEquals, // -=
LeftShiftEquals, // <<=
RightShiftEquals, // >>=
LogicalRightShiftEquals, // >>>=
LogicalAndEquals, // &&=
LogicalXorEquals, // ^^=
LogicalOrEquals, // ||=
AndEquals, // &=
XorEquals, // ^=
OrEquals, // |=
Equal, // ==
NotEqual, // !=
LessThan, // <
LessThanOrEqual, // <=
GreaterThan, // > // >, >= must be at constant offsets from <, <=
GreaterThanOrEqual, // >=
Identical, // ===
NotIdentical, // !==
Question, // ?
// Reserved words
Abstract, // abstract
Break, // break
Case, // case
Catch, // catch
Class, // class
Const, // const
Continue, // continue
Debugger, // debugger
Default, // default
Delete, // delete
Do, // do
Else, // else
Enum, // enum
Eval, // eval
Export, // export
Extends, // extends
False, // false
Final, // final
Finally, // finally
For, // for
Function, // function
Goto, // goto
If, // if
Implements, // implements
Import, // import
In, // in
Instanceof, // instanceof
Native, // native
New, // new
Null, // null
Package, // package
Private, // private
Protected, // protected
Public, // public
Return, // return
Static, // static
Super, // super
Switch, // switch
Synchronized, // synchronized
This, // this
Throw, // throw
Throws, // throws
Transient, // transient
True, // true
Try, // try
Typeof, // typeof
Var, // var
Volatile, // volatile
While, // while
With, // with
// Non-reserved words
Box, // box
Constructor, // constructor
Field, // field
Get, // get
Language, // language
Local, // local
Method, // method
Override, // override
Set, // set
Version // version
};
};
//class Parser: public Lexer {
//};
}
#endif

View File

@ -31,7 +31,7 @@ namespace JS = JavaScript;
// Create a Reader reading characters from begin up to but not including end.
JS::Reader::Reader(const char16 *begin, const char16 *end):
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0)
begin(begin), p(begin), end(end), lineStart(begin), nGetsPastEnd(0), lineNum(1), lineFileOffset(0)
{
ASSERT(begin <= end);
#ifdef DEBUG
@ -140,6 +140,17 @@ JS::StringReader::StringReader(const String &s, const String &source):
}
// Set the beginning of the current line. unget cannot be subsequently called past this point.
// This can only be called if the previous character was LF (u000A), CR (u000D), LS (u2028), or PS (u2029).
// Moreover, in these cases this method must be called before reading any more characters.
void JS::StringReader::beginLine()
{
++lineNum;
lineStart = p;
lineFileOffset = static_cast<uint32>(p - begin);
}
JS::String JS::StringReader::sourceFile() const
{
return source;
@ -345,7 +356,6 @@ JS::Lexer::Lexer(Reader &reader, World &world): reader(reader), world(world)
#ifdef DEBUG
nTokensBack = 0;
#endif
lineNum = 1;
lexingUnit = false;
}
@ -418,7 +428,12 @@ void JS::Lexer::syntaxError(const char *message, uint backUp)
ch = reader.get();
} while (ch != char16eof && !isLineBreak(char16orEOFToChar16(ch)));
reader.unget();
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), lineNum, charPos,
SourcePosition position;
position.lineFileOffset = reader.lineFileOffset;
position.lineNum = reader.lineNum;
position.charPos = charPos;
Exception e(Exception::SyntaxError, widenCString(message), reader.sourceFile(), position,
reader.extract(0, reader.charPos()));
throw e;
}
@ -721,7 +736,7 @@ void JS::Lexer::lexToken(bool preferRegExp)
goto next;
case CharInfo::IdGroup:
t.charPos = reader.charPos() - 1;
t.pos.charPos = reader.charPos() - 1;
readIdentifier:
{
reader.unget();
@ -734,7 +749,7 @@ void JS::Lexer::lexToken(bool preferRegExp)
case CharInfo::NonIdGroup:
case CharInfo::IdContinueGroup:
t.charPos = reader.charPos() - 1;
t.pos.charPos = reader.charPos() - 1;
switch (ch) {
case '(':
kind = Token::OpenParenthesis; // (
@ -764,7 +779,7 @@ void JS::Lexer::lexToken(bool preferRegExp)
kind = Token::Dot; // .
ch2 = getChar();
if (isASCIIDecimalDigit(char16orEOFToChar16(ch2))) {
reader.backUpTo(t.charPos);
reader.backUpTo(t.pos.charPos);
goto number; // decimal point
} else if (ch2 == '.') {
kind = Token::DoubleDot; // ..
@ -824,7 +839,6 @@ void JS::Lexer::lexToken(bool preferRegExp)
ch = getChar();
if (isLineBreak(char16orEOFToChar16(ch))) {
reader.beginLine();
++lineNum;
t.lineBreak = true;
}
if (ch == char16eof)
@ -941,12 +955,12 @@ void JS::Lexer::lexToken(bool preferRegExp)
case CharInfo::LineBreakGroup:
endOfLine:
reader.beginLine();
++lineNum;
t.lineBreak = true;
goto next;
}
}
}
t.kind = kind;
t.lineNum = lineNum;
t.pos.lineFileOffset = reader.lineFileOffset;
t.pos.lineNum = reader.lineNum;
}

View File

@ -33,19 +33,26 @@ namespace JavaScript {
// A Reader reads Unicode characters from some source -- either a file or a string.
// get() returns all of the characters followed by a char16eof.
// If get() returns LF (u000A), CR (u000D), LS (u2028), or PS (u2029), then beginLine()
// must be called before getting or peeking any more characters.
class Reader {
protected:
const char16 *begin; // Beginning of current buffer
const char16 *p; // Position in current buffer
const char16 *end; // End of current buffer
const char16 *lineStart; // Pointer to start of current line
uint32 nGetsPastEnd; // Number of times char16eof has been returned
public:
uint32 lineNum; // One-based number of current line
FileOffset lineFileOffset; // Byte or character offset of start of current line relative to all of input
private:
String *recordString; // String, if any, into which recordChar() records characters
const char16 *recordBase; // Position of last beginRecording() call
const char16 *recordPos; // Position of last recordChar() call; nil if a discrepancy occurred
protected:
Reader(): nGetsPastEnd(0) {}
Reader(): nGetsPastEnd(0), lineNum(1), lineFileOffset(0) {}
public:
Reader(const char16 *begin, const char16 *end);
private:
@ -57,7 +64,7 @@ namespace JavaScript {
char16orEOF peek();
void unget(uint32 n = 1);
void beginLine();
virtual void beginLine() = 0;
uint32 charPos() const;
void backUpTo(uint32 pos);
@ -92,17 +99,8 @@ namespace JavaScript {
}
// Set the beginning of the current line. unget cannot be subsequently called past this point.
inline void Reader::beginLine()
{
lineStart = p;
#ifdef DEBUG
recordString = 0;
#endif
}
// Return the character offset relative to the current line. This cannot be called
// if the current position is past the end of the input.
// Return the number of characters between the current position and the beginning of the current line.
// This cannot be called if the current position is past the end of the input.
inline uint32 Reader::charPos() const
{
ASSERT(!nGetsPastEnd);
@ -139,6 +137,7 @@ namespace JavaScript {
public:
StringReader(const String &s, const String &source);
void beginLine();
String sourceFile() const;
};
@ -298,8 +297,7 @@ namespace JavaScript {
Kind kind; // The token's kind
bool lineBreak; // True if line break precedes this token
uint32 lineNum; // One-based source line number
uint32 charPos; // Zero-based character offset of this token in source line
SourcePosition pos; // Position of this token
StringAtom *identifier; // The token's characters; non-null for identifiers, keywords, and regular expressions only
String chars; // The token's characters; valid for strings, units, numbers, and regular expression flags only
float64 value; // The token's value (numbers only)
@ -313,7 +311,7 @@ namespace JavaScript {
class Lexer {
enum {tokenBufferSize = 3}; // Token lookahead buffer size
enum {tokenBufferSize = 3}; // Token lookahead buffer size
public:
Reader &reader;
World &world;
@ -325,7 +323,6 @@ namespace JavaScript {
int nTokensBack; // Number of Tokens on which unget() can be called; these Tokens are beind nextToken
bool savedPreferRegExp[tokenBufferSize]; // Circular buffer of saved values of preferRegExp to get() calls
#endif
uint32 lineNum; // Current line number
bool lexingUnit; // True if lexing a unit identifier immediately following a number
public:
@ -351,5 +348,151 @@ namespace JavaScript {
void lexToken(bool preferRegExp);
public:
};
class ParseNode {
enum Kind {
Empty, // Empty (used in array literals, argument lists, etc.)
Id, // Identifier
Num, // Numeral
Str, // String
Unit, // Unit after numeral
RegExp, // Regular expression
// Punctuators
OpenParenthesis, // (
CloseParenthesis, // )
OpenBracket, // [
CloseBracket, // ]
OpenBrace, // {
CloseBrace, // }
Comma, // ,
Semicolon, // ;
Dot, // .
DoubleDot, // ..
TripleDot, // ...
Arrow, // ->
Colon, // :
DoubleColon, // ::
Pound, // #
At, // @
Increment, // ++
Decrement, // --
Complement, // ~
Not, // !
Times, // *
Divide, // /
Modulo, // %
Plus, // +
Minus, // -
LeftShift, // <<
RightShift, // >>
LogicalRightShift, // >>>
LogicalAnd, // &&
LogicalXor, // ^^
LogicalOr, // ||
And, // & // These must be at constant offsets from LogicalAnd ... LogicalOr
Xor, // ^
Or, // |
Assignment, // =
TimesEquals, // *= // These must be at constant offsets from Times ... Or
DivideEquals, // /=
ModuloEquals, // %=
PlusEquals, // +=
MinusEquals, // -=
LeftShiftEquals, // <<=
RightShiftEquals, // >>=
LogicalRightShiftEquals, // >>>=
LogicalAndEquals, // &&=
LogicalXorEquals, // ^^=
LogicalOrEquals, // ||=
AndEquals, // &=
XorEquals, // ^=
OrEquals, // |=
Equal, // ==
NotEqual, // !=
LessThan, // <
LessThanOrEqual, // <=
GreaterThan, // > // >, >= must be at constant offsets from <, <=
GreaterThanOrEqual, // >=
Identical, // ===
NotIdentical, // !==
Question, // ?
// Reserved words
Abstract, // abstract
Break, // break
Case, // case
Catch, // catch
Class, // class
Const, // const
Continue, // continue
Debugger, // debugger
Default, // default
Delete, // delete
Do, // do
Else, // else
Enum, // enum
Eval, // eval
Export, // export
Extends, // extends
False, // false
Final, // final
Finally, // finally
For, // for
Function, // function
Goto, // goto
If, // if
Implements, // implements
Import, // import
In, // in
Instanceof, // instanceof
Native, // native
New, // new
Null, // null
Package, // package
Private, // private
Protected, // protected
Public, // public
Return, // return
Static, // static
Super, // super
Switch, // switch
Synchronized, // synchronized
This, // this
Throw, // throw
Throws, // throws
Transient, // transient
True, // true
Try, // try
Typeof, // typeof
Var, // var
Volatile, // volatile
While, // while
With, // with
// Non-reserved words
Box, // box
Constructor, // constructor
Field, // field
Get, // get
Language, // language
Local, // local
Method, // method
Override, // override
Set, // set
Version // version
};
};
//class Parser: public Lexer {
//};
}
#endif