WIP for push based tokenization
git-svn-id: svn://10.0.0.236/trunk@1683 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
parent
affe63fab7
commit
4e262a1fe0
@ -81,42 +81,49 @@ nsIDTD* CNavDelegate::GetDTD(void) const{
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
* @update gess 5/12/98
|
||||
* @param aChar is the last char read
|
||||
* @param aScanner is represents our input source
|
||||
* @param aToken is the out arg holding our new token
|
||||
* @return error code (may return kInterrupted).
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsAutoString empty("");
|
||||
PRInt32 result=aScanner.GetChar(aChar);
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
if(kNoError==result) {
|
||||
|
||||
if(0!=aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
}
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
}//if
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if((0!=aToken) && (kNoError==result)) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -131,20 +138,26 @@ PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aTo
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner) {
|
||||
PRBool done=PR_FALSE;
|
||||
nsAutoString as("");
|
||||
PRInt32 result=kNoError;
|
||||
nsAutoString as("");
|
||||
|
||||
while((!done) && (result==kNoError)) {
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result= theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(kNoError==result){
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}
|
||||
}
|
||||
}//if
|
||||
}//if
|
||||
|
||||
if(kNoError==result){
|
||||
result=aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}//if
|
||||
}//if
|
||||
}//while
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -166,8 +179,7 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
|
||||
endTag.Append(aString);
|
||||
endTag.Append(">");
|
||||
aToken=new CSkippedContentToken(endTag);
|
||||
PRInt32 result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
return result;
|
||||
return aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
|
||||
/**
|
||||
@ -183,38 +195,43 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
|
||||
PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken=new CStartToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
nsString& str=aToken->GetText();
|
||||
CToken* skippedToken=0;
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
|
||||
|
||||
if(skippedToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=skippedToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(skippedToken);
|
||||
if(kNoError==result) {
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
if(kNoError==result) {
|
||||
nsString& str=aToken->GetText();
|
||||
CToken* skippedToken=0;
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
if((kNoError==result) && skippedToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=skippedToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(skippedToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
} //if
|
||||
} //if
|
||||
} //if
|
||||
} //if
|
||||
}
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -231,19 +248,22 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*
|
||||
PRInt32 CNavDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar ch;
|
||||
PRInt32 result=aScanner.GetChar(ch);
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
}//if
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -336,36 +356,54 @@ PRInt32 CNavDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*&
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
|
||||
PRInt32 result=kNoError;
|
||||
PRUnichar aChar;
|
||||
|
||||
|
||||
aToken=0;
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return result;
|
||||
return kNoError;
|
||||
}
|
||||
aToken=0;
|
||||
while(!aScanner.Eof()) {
|
||||
|
||||
PRInt32 result=kNoError;
|
||||
if(kNoError==result){
|
||||
|
||||
PRUnichar aChar;
|
||||
result=aScanner.GetChar(aChar);
|
||||
switch(aChar) {
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
case kNotFound:
|
||||
switch(result) {
|
||||
case kEOF:
|
||||
break;
|
||||
|
||||
case kInterrupted:
|
||||
aScanner.RewindToMark();
|
||||
break;
|
||||
|
||||
case kNoError:
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
switch(aChar) {
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
|
||||
case kNotFound:
|
||||
break;
|
||||
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
break;
|
||||
} //switch
|
||||
if(result==kEOF)
|
||||
result=0;
|
||||
} //while
|
||||
if(kNoError==result)
|
||||
result=aScanner.Eof();
|
||||
} //while
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#include "CNavDTD.h"
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "plstr.h"
|
||||
#include <fstream.h>
|
||||
#ifdef XP_PC
|
||||
#include <direct.h> //this is here for debug reasons...
|
||||
#endif
|
||||
@ -40,11 +41,13 @@ static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
|
||||
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
|
||||
|
||||
static const char* kNullURL = "Error: Null URL given";
|
||||
static const char* kNullFilename= "Error: Null filename given";
|
||||
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gVerificationOutputDir=0;
|
||||
static int rickGDebug=0;
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
@ -456,92 +459,198 @@ PRBool nsHTMLParser::IterateTokens() {
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(nsIURL* aURL){
|
||||
eParseMode theMode=eParseMode_navigator;
|
||||
eParseMode DetermineParseMode() {
|
||||
const char* theModeStr= PR_GetEnv("PARSE_MODE");
|
||||
const char* other="other";
|
||||
eParseMode result=eParseMode_navigator;
|
||||
|
||||
if(theModeStr)
|
||||
if(0==nsCRT::strcasecmp(other,theModeStr))
|
||||
theMode=eParseMode_other;
|
||||
|
||||
return Parse(aURL,theMode);
|
||||
result=eParseMode_other;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
if(aURL) {
|
||||
void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& aDTD) {
|
||||
switch(aMode) {
|
||||
case eParseMode_navigator:
|
||||
aDelegate=new CNavDelegate(); break;
|
||||
case eParseMode_other:
|
||||
aDelegate=new COtherDelegate(); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(aDelegate)
|
||||
aDTD=aDelegate->GetDTD();
|
||||
}
|
||||
|
||||
result=PR_TRUE;
|
||||
mParseMode=aMode;
|
||||
ITokenizerDelegate* theDelegate=0;
|
||||
|
||||
mDTD=0;
|
||||
switch(mParseMode) {
|
||||
case eParseMode_navigator:
|
||||
theDelegate=new CNavDelegate();
|
||||
if(theDelegate)
|
||||
mDTD=theDelegate->GetDTD();
|
||||
break;
|
||||
case eParseMode_other:
|
||||
theDelegate=new COtherDelegate();
|
||||
if(theDelegate)
|
||||
mDTD=theDelegate->GetDTD();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(!theDelegate) {
|
||||
NS_ERROR(kNullTokenizer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
if(mDTD)
|
||||
mDTD->SetParser(this);
|
||||
mTokenizer=new CTokenizer(aURL, theDelegate, mParseMode);
|
||||
/**
|
||||
* This DEBUG ONLY method is used to simulate a network-based
|
||||
* i/o model where data comes in incrementally.
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param aFilename is the name of the disk file to use for testing.
|
||||
* @return error code (kNoError means ok)
|
||||
*/
|
||||
PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
||||
PRInt32 result=kBadFilename;
|
||||
fstream* mFileStream;
|
||||
nsString theBuffer;
|
||||
PRInt32 iter=-1;
|
||||
const int kBufSize=10;
|
||||
|
||||
mFileStream=new fstream(aFilename,ios::in|ios::binary);
|
||||
if(mFileStream) {
|
||||
result=kNoError;
|
||||
while((kNoError==result) || (kInterrupted==result)) {
|
||||
//read some data from the file...
|
||||
|
||||
char buf[kBufSize];
|
||||
buf[kBufSize]=0;
|
||||
|
||||
if(mFileStream) {
|
||||
mFileStream->read(buf,kBufSize);
|
||||
PRInt32 numread=mFileStream->gcount();
|
||||
if(numread>0) {
|
||||
theBuffer.Truncate();
|
||||
theBuffer.Append(buf);
|
||||
mTokenizer->Append(theBuffer);
|
||||
result=ResumeParse(++iter);
|
||||
}
|
||||
}
|
||||
|
||||
mSink->WillBuildModel();
|
||||
#ifdef __INCREMENTAL
|
||||
int iter=-1;
|
||||
for(;;){
|
||||
mSink->WillResume();
|
||||
mTokenizer->TokenizeAvailable(++iter);
|
||||
mSink->WillInterrupt();
|
||||
}
|
||||
#else
|
||||
mTokenizer->Tokenize();
|
||||
#endif
|
||||
result=IterateTokens();
|
||||
mSink->DidBuildModel();
|
||||
mFileStream->close();
|
||||
delete mFileStream;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
||||
|
||||
PRInt32 status=kBadFilename;
|
||||
|
||||
mIncremental=aIncremental;
|
||||
mParseMode=DetermineParseMode();
|
||||
|
||||
if(aFilename) {
|
||||
|
||||
GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
|
||||
if(mDelegate) {
|
||||
|
||||
if(mDTD)
|
||||
mDTD->SetParser(this);
|
||||
|
||||
mSink->WillBuildModel();
|
||||
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
if(aIncremental) {
|
||||
mTokenizer=new CTokenizer(mDelegate,mParseMode);
|
||||
status=ParseFileIncrementally(aFilename);
|
||||
}
|
||||
else {
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
mTokenizer=new CTokenizer(aFilename,mDelegate,mParseMode);
|
||||
status=ResumeParse(0);
|
||||
}
|
||||
mSink->DidBuildModel();
|
||||
}//if
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,PRBool aIncremental ){
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRInt32 status=kBadURL;
|
||||
|
||||
if(rickGDebug)
|
||||
status=Parse("c:/temp/temp.html",PR_TRUE);
|
||||
|
||||
mIncremental=aIncremental;
|
||||
mParseMode=DetermineParseMode();
|
||||
|
||||
if(aURL) {
|
||||
|
||||
GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
|
||||
if(mDelegate) {
|
||||
|
||||
if(mDTD)
|
||||
mDTD->SetParser(this);
|
||||
|
||||
mSink->WillBuildModel();
|
||||
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
if(aIncremental) {
|
||||
mTokenizer=new CTokenizer(mDelegate,mParseMode);
|
||||
status=aURL->Open(this);
|
||||
}
|
||||
else {
|
||||
mTokenizer=new CTokenizer(aURL,mDelegate,mParseMode);
|
||||
status=ResumeParse(0);
|
||||
mSink->DidBuildModel();
|
||||
}
|
||||
}//if
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this method if all you want to do is parse 1 string full of HTML text.
|
||||
*
|
||||
* @update gess5/11/98
|
||||
* @param anHTMLString contains a string-full of real HTML
|
||||
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
mSink->WillBuildModel();
|
||||
mTokenizer->Append(aSourceBuffer);
|
||||
result=ResumeParse(0);
|
||||
mSink->DidBuildModel();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This routine is called to cause the parser to continue
|
||||
@ -553,17 +662,21 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
|
||||
* @param
|
||||
* @return PR_TRUE if parsing concluded successfully.
|
||||
*/
|
||||
PRBool nsHTMLParser::ResumeParse() {
|
||||
PRInt32 nsHTMLParser::ResumeParse(PRInt32 anIteration) {
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
mSink->WillResume();
|
||||
int iter=0;
|
||||
PRInt32 errcode=mTokenizer->TokenizeAvailable(iter);
|
||||
if(kInterrupted==errcode)
|
||||
mSink->WillInterrupt();
|
||||
PRBool result=IterateTokens();
|
||||
if(kNoError==result) {
|
||||
result=mTokenizer->Tokenize(anIteration);
|
||||
if(kInterrupted==result)
|
||||
mSink->WillInterrupt();
|
||||
|
||||
if(!rickGDebug)
|
||||
IterateTokens();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/22/98
|
||||
@ -1388,5 +1501,64 @@ PRBool nsHTMLParser::ReduceContextStackFor(PRInt32 aChildTag){
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::GetBindInfo(void){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnStartBinding(void){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnStopBinding(void){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -61,6 +61,9 @@
|
||||
#include "nsParserNode.h"
|
||||
#include "nsTokenHandler.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIURL.h"
|
||||
#include "nsIStreamListener.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
|
||||
|
||||
#define NS_IHTML_PARSER_IID \
|
||||
@ -75,7 +78,7 @@ class nsIURL;
|
||||
class nsIDTD;
|
||||
|
||||
|
||||
class nsHTMLParser : public nsIParser {
|
||||
class nsHTMLParser : public nsIParser, public nsIStreamListener {
|
||||
|
||||
public:
|
||||
friend class CTokenHandler;
|
||||
@ -104,14 +107,6 @@ friend class CTokenHandler;
|
||||
*/
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given URL
|
||||
* @update gess5/11/98
|
||||
* @param aURL is a descriptor for source document
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRBool Parse(nsIURL* aURL);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given URL in given mode
|
||||
* @update gess5/11/98
|
||||
@ -119,14 +114,31 @@ friend class CTokenHandler;
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRBool Parse(nsIURL* aURL,eParseMode aMode);
|
||||
virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given file in given mode
|
||||
* @update gess5/11/98
|
||||
* @param aFilename is a path for file document
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* @update gess5/11/98
|
||||
* @param anHTMLString contains a string-full of real HTML
|
||||
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
|
||||
|
||||
/**
|
||||
* This method gets called (automatically) during incremental parsing
|
||||
* @update gess5/11/98
|
||||
* @return TRUE if all went well, otherwise FALSE
|
||||
*/
|
||||
virtual PRBool ResumeParse();
|
||||
virtual PRInt32 ResumeParse(PRInt32 anIteration);
|
||||
|
||||
/**
|
||||
* Retrieve ptr to internal context vector stack
|
||||
@ -230,6 +242,15 @@ friend class CTokenHandler;
|
||||
*/
|
||||
PRBool HandleStyleToken(CToken* aToken);
|
||||
|
||||
//*********************************************
|
||||
// These methods are callback methods used by
|
||||
// net lib to let us know about our inputstream.
|
||||
//*********************************************
|
||||
NS_IMETHOD GetBindInfo(void);
|
||||
NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg);
|
||||
NS_IMETHOD OnStartBinding(void);
|
||||
NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
|
||||
NS_IMETHOD OnStopBinding(void);
|
||||
|
||||
protected:
|
||||
|
||||
@ -485,6 +506,10 @@ protected:
|
||||
*/
|
||||
PRBool CreateContextStackFor(PRInt32 aChildTag);
|
||||
|
||||
private:
|
||||
PRInt32 ParseFileIncrementally(const char* aFilename); //XXX ONLY FOR DEBUG PURPOSES...
|
||||
|
||||
protected:
|
||||
//*********************************************
|
||||
// And now, some data members...
|
||||
//*********************************************
|
||||
@ -502,6 +527,8 @@ protected:
|
||||
nsIDTD* mDTD;
|
||||
eParseMode mParseMode;
|
||||
PRBool mHasOpenForm;
|
||||
PRBool mIncremental;
|
||||
ITokenizerDelegate* mDelegate;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -40,6 +40,7 @@
|
||||
|
||||
|
||||
class nsIContentSink;
|
||||
class nsString;
|
||||
|
||||
/**
|
||||
* This class defines the iparser interface. This XPCOM
|
||||
@ -52,10 +53,14 @@ class nsIParser : public nsISupports {
|
||||
public:
|
||||
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aContentSink)=0;
|
||||
virtual PRBool Parse(nsIURL* aURL)=0;
|
||||
virtual PRBool ResumeParse()=0;
|
||||
virtual PRInt32 GetStack(PRInt32* aStackPtr)=0;
|
||||
virtual PRBool HasOpenContainer(PRInt32 aContainer) const=0;
|
||||
|
||||
virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE)=0;
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
|
||||
|
||||
virtual PRInt32 ResumeParse(PRInt32 anIterator)=0;
|
||||
virtual PRInt32 GetStack(PRInt32* aStackPtr)=0;
|
||||
virtual PRBool HasOpenContainer(PRInt32 aContainer) const=0;
|
||||
};
|
||||
|
||||
extern NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult);
|
||||
|
||||
@ -37,13 +37,16 @@ enum eParseMode {
|
||||
|
||||
eParseMode_unknown=0,
|
||||
eParseMode_navigator,
|
||||
eParseMode_other
|
||||
eParseMode_other,
|
||||
eParseMode_autodetect
|
||||
};
|
||||
|
||||
const PRInt32 kEOF = 1000000L;
|
||||
const PRInt32 kBadFilename = -4;
|
||||
const PRInt32 kBadURL = -3;
|
||||
const PRInt32 kInterrupted = -2;
|
||||
const PRInt32 kNotFound = -1;
|
||||
const PRInt32 kNoError = 0;
|
||||
const PRInt32 kInterrupted = 2;
|
||||
|
||||
const PRUint32 kNewLine = '\n';
|
||||
const PRUint32 kCR = '\r';
|
||||
|
||||
@ -22,9 +22,8 @@
|
||||
#include "nsIURL.h"
|
||||
#include "nsDebug.h"
|
||||
|
||||
const char* gURLRef;
|
||||
const char* kBadHTMLText1="<HTML><BODY><H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
|
||||
const char* kBadHTMLText2="</BODY></HTML>";
|
||||
const char* gURLRef=0;
|
||||
const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
const int kBufsize=1;
|
||||
@ -33,31 +32,63 @@ const int kBufsize=64;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aURL -- pointer to URL to be loaded
|
||||
* Use this constructor if you want an incremental (callback)
|
||||
* based input stream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner::CScanner(eParseMode aMode) : mBuffer("") {
|
||||
mOffset=0;
|
||||
mMarkPos=-1;
|
||||
mTotalRead=0;
|
||||
mParseMode=aMode;
|
||||
mNetStream=0;
|
||||
mFileStream=0;
|
||||
mIncremental=PR_TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be file based.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner::CScanner(const char* aFilename,eParseMode aMode) : mBuffer("") {
|
||||
NS_ASSERTION(0!=aFilename,"Error: Null filename!");
|
||||
mOffset=0;
|
||||
mMarkPos=-1;
|
||||
mTotalRead=0;
|
||||
mParseMode=aMode;
|
||||
mNetStream=0;
|
||||
mIncremental=PR_FALSE;
|
||||
mFileStream=new fstream(aFilename,ios::in|ios::binary);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on a
|
||||
* non-incremental netstream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
|
||||
NS_ASSERTION(0!=aURL,"Error: Null URL!");
|
||||
mOffset=0;
|
||||
mStream=0;
|
||||
mMarkPos=-1;
|
||||
mTotalRead=0;
|
||||
mParseMode=aMode;
|
||||
if(aURL) {
|
||||
|
||||
gURLRef=aURL->GetSpec();
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
mStream=new fstream("c:/temp/temp.html",ios::in|ios::binary);
|
||||
#else
|
||||
int error;
|
||||
mStream=aURL->Open(&error);
|
||||
#endif
|
||||
}
|
||||
mFileStream=0;
|
||||
PRInt32 error=0;
|
||||
mIncremental=PR_FALSE;
|
||||
mNetStream=aURL->Open(&error);
|
||||
gURLRef=aURL->GetSpec();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
*
|
||||
@ -66,19 +97,107 @@ CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
|
||||
* @return
|
||||
*/
|
||||
CScanner::~CScanner() {
|
||||
#ifdef __INCREMENTAL
|
||||
mStream->close();
|
||||
delete mStream;
|
||||
mStream=0;
|
||||
#else
|
||||
if(mStream) {
|
||||
mStream->Close();
|
||||
mStream->Release();
|
||||
mStream=0;
|
||||
if(mFileStream) {
|
||||
mFileStream->close();
|
||||
delete mFileStream;
|
||||
}
|
||||
#endif
|
||||
else if(mNetStream) {
|
||||
mNetStream->Close();
|
||||
mNetStream->Release();
|
||||
}
|
||||
mFileStream=0;
|
||||
mNetStream=0;
|
||||
gURLRef=0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets current offset position of input stream to marked position.
|
||||
* This allows us to back up to this point if the need should arise,
|
||||
* such as when tokenization gets interrupted.
|
||||
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CScanner::RewindToMark(void){
|
||||
mOffset=mMarkPos;
|
||||
return mOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records current offset position in input stream. This allows us
|
||||
* to back up to this point if the need should arise, such as when
|
||||
* tokenization gets interrupted.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CScanner::Mark(void){
|
||||
mMarkPos=mOffset;
|
||||
return mMarkPos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
*/
|
||||
void _PreCompressBuffer(nsString& aBuffer,PRInt32& anOffset,PRInt32& aMarkPos){
|
||||
//To determine how much of our internal buffer to truncate,
|
||||
//we should check mMarkPos. That represents the point at which
|
||||
//we've guaranteed the client we can back up to, so make sure
|
||||
//you don't lose any of the data beyond that point.
|
||||
if((anOffset!=aMarkPos) && (0<=aMarkPos)) {
|
||||
if(aMarkPos>0) {
|
||||
aBuffer.Cut(0,aMarkPos);
|
||||
if(anOffset>aMarkPos)
|
||||
anOffset-=aMarkPos;
|
||||
}
|
||||
}
|
||||
else aBuffer.Truncate();
|
||||
aMarkPos=0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method should only be called by the parser when
|
||||
* we're doing incremental i/o over the net.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aBuffer contains next blob of i/o data
|
||||
* @param aSize contains size of buffer
|
||||
* @return 0 if all went well, otherwise error code.
|
||||
*/
|
||||
PRInt32 CScanner::IncrementalAppend(const char* aBuffer,PRInt32 aSize){
|
||||
NS_ASSERTION(((!mFileStream) && (!mNetStream)),"Error: Should only be called during incremental net i/o!");
|
||||
|
||||
PRInt32 result=0;
|
||||
if((!mFileStream) && (!mNetStream)) {
|
||||
|
||||
_PreCompressBuffer(mBuffer,mOffset,mMarkPos);
|
||||
|
||||
//now that the buffer is (possibly) shortened, let's append the new data.
|
||||
if(0<aSize) {
|
||||
mBuffer.Append(aBuffer,aSize);
|
||||
mTotalRead+=aSize;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grab data from underlying stream.
|
||||
*
|
||||
* @update gess4/3/98
|
||||
* @return error code
|
||||
*/
|
||||
PRBool CScanner::Append(nsString& aBuffer) {
|
||||
_PreCompressBuffer(mBuffer,mOffset,mMarkPos);
|
||||
mBuffer.Append(aBuffer);
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grab data from underlying stream.
|
||||
@ -89,55 +208,63 @@ CScanner::~CScanner() {
|
||||
PRInt32 CScanner::FillBuffer(void) {
|
||||
PRInt32 anError=0;
|
||||
|
||||
mBuffer.Truncate();
|
||||
if(!mStream) {
|
||||
_PreCompressBuffer(mBuffer,mOffset,mMarkPos);
|
||||
|
||||
if((!mIncremental) && (!mNetStream) && (!mFileStream)) {
|
||||
//This is DEBUG code!!!!!! XXX DEBUG XXX
|
||||
//If you're here, it means someone tried to load a
|
||||
//non-existent document. So as a favor, we emit a
|
||||
//little bit of HTML explaining the error.
|
||||
if(0==mTotalRead) {
|
||||
mBuffer.Append((const char*)kBadHTMLText1);
|
||||
mBuffer.Append((const char*)kBadHTMLText);
|
||||
mBuffer.Append((const char*)gURLRef);
|
||||
mBuffer.Append((const char*)kBadHTMLText2);
|
||||
}
|
||||
else return 0;
|
||||
}
|
||||
else {
|
||||
else if(!mIncremental) {
|
||||
PRInt32 numread=0;
|
||||
char buf[kBufsize+1];
|
||||
buf[kBufsize]=0;
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
mStream->read(buf,kBufsize);
|
||||
numread=mStream->gcount();
|
||||
#else
|
||||
numread=mStream->Read(&anError,buf,0,kBufsize);
|
||||
#endif
|
||||
if(mFileStream) {
|
||||
mFileStream->read(buf,kBufsize);
|
||||
numread=mFileStream->gcount();
|
||||
}
|
||||
else if(mNetStream) {
|
||||
numread=mNetStream->Read(&anError,buf,0,kBufsize);
|
||||
if(1==anError)
|
||||
anError=kEOF;
|
||||
}
|
||||
mOffset=mBuffer.Length();
|
||||
if((0<numread) && (0==anError))
|
||||
mBuffer.Append((const char*)buf,numread);
|
||||
mTotalRead+=mBuffer.Length();
|
||||
}
|
||||
mTotalRead+=mBuffer.Length();
|
||||
else anError=kInterrupted;
|
||||
|
||||
return anError;
|
||||
}
|
||||
|
||||
/**
|
||||
* determine if the scanner has reached EOF
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return PR_TRUE upon eof condition
|
||||
* @return 0=!eof 1=eof kInterrupted=interrupted
|
||||
*/
|
||||
PRBool CScanner::Eof() {
|
||||
PRInt32 CScanner::Eof() {
|
||||
PRInt32 theError=0;
|
||||
|
||||
if(mOffset>=mBuffer.Length()) {
|
||||
theError=FillBuffer();
|
||||
mOffset=0;
|
||||
if(!mIncremental)
|
||||
theError=FillBuffer();
|
||||
else return kInterrupted;
|
||||
}
|
||||
PRBool result=PR_TRUE;
|
||||
if(0==theError) {
|
||||
result=PRBool(0==mBuffer.Length());
|
||||
}
|
||||
return result;
|
||||
|
||||
if(0==theError)
|
||||
return (0==mBuffer.Length());
|
||||
|
||||
return theError;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -148,11 +275,12 @@ PRBool CScanner::Eof() {
|
||||
* @return error code reflecting read status
|
||||
*/
|
||||
PRInt32 CScanner::GetChar(PRUnichar& aChar) {
|
||||
if(!Eof()) {
|
||||
PRInt32 result=Eof();
|
||||
if(!result) {
|
||||
aChar=mBuffer[mOffset++];
|
||||
return kNoError;
|
||||
result=kNoError;
|
||||
}
|
||||
return kEOF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -165,11 +293,12 @@ PRInt32 CScanner::GetChar(PRUnichar& aChar) {
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CScanner::Peek(PRUnichar& aChar){
|
||||
if(!Eof()) {
|
||||
PRInt32 result=Eof();
|
||||
if(!result) {
|
||||
aChar=mBuffer[mOffset];
|
||||
return kNoError;
|
||||
result=kNoError;
|
||||
}
|
||||
return kEOF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -181,7 +310,9 @@ PRInt32 CScanner::Peek(PRUnichar& aChar){
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CScanner::PutBack(PRUnichar aChar) {
|
||||
mOffset--;
|
||||
if(mOffset>0)
|
||||
mOffset--;
|
||||
else mBuffer.Insert(aChar,0);
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
@ -301,8 +432,8 @@ PRInt32 CScanner::ReadUntil(nsString& aString,nsString& aTerminalSet,PRBool addT
|
||||
PRUnichar ch=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
while(!Eof()) {
|
||||
result=GetChar(ch);
|
||||
while(!result) {
|
||||
result=GetChar(ch);
|
||||
if(kNoError==result) {
|
||||
PRInt32 pos=aTerminalSet.Find(ch);
|
||||
if(kNotFound!=pos) {
|
||||
|
||||
@ -43,7 +43,36 @@ class ifstream;
|
||||
|
||||
class CScanner {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Use this constructor if you want an incremental (callback)
|
||||
* based input stream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner(eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on a
|
||||
* non-incremental netstream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner(nsIURL* aURL,eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be file based.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner(const char* aFilename,eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
~CScanner();
|
||||
|
||||
/**
|
||||
@ -117,7 +146,7 @@ class CScanner {
|
||||
* @update gess 3/25/98
|
||||
* @return PR_TRUE upon eof condition
|
||||
*/
|
||||
PRBool Eof(void);
|
||||
PRInt32 Eof(void);
|
||||
|
||||
/**
|
||||
* Consume characters until you find the terminal char
|
||||
@ -153,6 +182,48 @@ class CScanner {
|
||||
*/
|
||||
PRInt32 ReadWhile(nsString& aString,nsString& anInputSet,PRBool addTerminal);
|
||||
|
||||
/**
|
||||
* Records current offset position in input stream. This allows us
|
||||
* to back up to this point if the need should arise, such as when
|
||||
* tokenization gets interrupted.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 Mark(void);
|
||||
|
||||
/**
|
||||
* Resets current offset position of input stream to marked position.
|
||||
* This allows us to back up to this point if the need should arise,
|
||||
* such as when tokenization gets interrupted.
|
||||
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 RewindToMark(void);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool Append(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 IncrementalAppend(const char* aBuffer,PRInt32 aSize);
|
||||
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
@ -167,15 +238,15 @@ class CScanner {
|
||||
*/
|
||||
PRInt32 FillBuffer(void);
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
fstream* mStream;
|
||||
#else
|
||||
nsIInputStream* mStream;
|
||||
#endif
|
||||
|
||||
fstream* mFileStream;
|
||||
nsIInputStream* mNetStream;
|
||||
nsString mBuffer;
|
||||
PRInt32 mOffset;
|
||||
PRInt32 mMarkPos;
|
||||
PRInt32 mTotalRead;
|
||||
eParseMode mParseMode;
|
||||
PRBool mIncremental;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@ -39,6 +39,35 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque() {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aFilename,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque() {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
@ -54,6 +83,19 @@ CTokenizer::~CTokenizer() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(nsString& aBuffer) {
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
@ -105,31 +147,31 @@ PRBool CTokenizer::WillTokenize(PRBool aIncremental){
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(void) {
|
||||
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
WillTokenize(PR_TRUE);
|
||||
|
||||
while(kNoError==result) {
|
||||
result=GetToken(theToken);
|
||||
if(theToken && (kNoError==result)) {
|
||||
|
||||
if(WillTokenize(PR_FALSE)) {
|
||||
do {
|
||||
result=GetToken(theToken);
|
||||
if(theToken) {
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
} while(0!=theToken);
|
||||
result=DidTokenize(PR_FALSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(kEOF==result)
|
||||
result=kNoError;
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -141,20 +183,33 @@ PRInt32 CTokenizer::Tokenize(void) {
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CTokenizer::TokenizeAvailable(int anIteration) {
|
||||
PRInt32 CTokenizer::Tokenize(int anIteration) {
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
||||
|
||||
|
||||
while((PR_FALSE==done) && (kInterrupted!=kInterrupted)) {
|
||||
while((PR_FALSE==done) && (kNoError==result)) {
|
||||
mScanner->Mark();
|
||||
result=GetToken(theToken);
|
||||
if(theToken) {
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
if(kNoError==result) {
|
||||
if(theToken) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if(theToken)
|
||||
delete theToken;
|
||||
mScanner->RewindToMark();
|
||||
}
|
||||
else done=PR_TRUE;
|
||||
}
|
||||
if((PR_TRUE==done) && (kInterrupted!=result))
|
||||
DidTokenize(PR_TRUE);
|
||||
|
||||
@ -50,19 +50,13 @@ class nsIURL;
|
||||
|
||||
class CTokenizer {
|
||||
public:
|
||||
CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode);
|
||||
|
||||
CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
|
||||
CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
|
||||
CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
~CTokenizer();
|
||||
|
||||
/**
|
||||
* This control routine causes the entire stream to be
|
||||
* tokenized. You probably want to call TokenizeAvailable()
|
||||
* instead (for incremental tokenization).
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(void);
|
||||
|
||||
/**
|
||||
* This method incrementally tokenizes as much content as
|
||||
* it can get its hands on.
|
||||
@ -70,7 +64,14 @@ class CTokenizer {
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 TokenizeAvailable(int anIteration); //your friendly incremental version
|
||||
PRInt32 Tokenize(int anIteration); //your friendly incremental version
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE);
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
@ -98,6 +99,23 @@ class CTokenizer {
|
||||
*/
|
||||
nsDeque& GetDeque(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
PRBool Append(nsString& aBuffer);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool SetBuffer(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
|
||||
@ -81,42 +81,49 @@ nsIDTD* CNavDelegate::GetDTD(void) const{
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
* @update gess 5/12/98
|
||||
* @param aChar is the last char read
|
||||
* @param aScanner is represents our input source
|
||||
* @param aToken is the out arg holding our new token
|
||||
* @return error code (may return kInterrupted).
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsAutoString empty("");
|
||||
PRInt32 result=aScanner.GetChar(aChar);
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
if(kNoError==result) {
|
||||
|
||||
if(0!=aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
}
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=new CEndToken(empty);
|
||||
else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
|
||||
}//if
|
||||
break;
|
||||
case kExclamation:
|
||||
aToken=new CCommentToken(empty);
|
||||
break;
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if((0!=aToken) && (kNoError==result)) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -131,20 +138,26 @@ PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aTo
|
||||
*/
|
||||
PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner) {
|
||||
PRBool done=PR_FALSE;
|
||||
nsAutoString as("");
|
||||
PRInt32 result=kNoError;
|
||||
nsAutoString as("");
|
||||
|
||||
while((!done) && (result==kNoError)) {
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result= theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
CToken* theToken= new CAttributeToken(as);
|
||||
if(theToken){
|
||||
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(kNoError==result){
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}
|
||||
}
|
||||
}//if
|
||||
}//if
|
||||
|
||||
if(kNoError==result){
|
||||
result=aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}//if
|
||||
}//if
|
||||
}//while
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -166,8 +179,7 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
|
||||
endTag.Append(aString);
|
||||
endTag.Append(">");
|
||||
aToken=new CSkippedContentToken(endTag);
|
||||
PRInt32 result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
return result;
|
||||
return aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
|
||||
/**
|
||||
@ -183,38 +195,43 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
|
||||
PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken=new CStartToken(nsAutoString(""));
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
nsString& str=aToken->GetText();
|
||||
CToken* skippedToken=0;
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
|
||||
|
||||
if(skippedToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=skippedToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(skippedToken);
|
||||
if(kNoError==result) {
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner);
|
||||
}
|
||||
//now that that's over with, we have one more problem to solve.
|
||||
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
||||
//consume all the content itself.
|
||||
if(kNoError==result) {
|
||||
nsString& str=aToken->GetText();
|
||||
CToken* skippedToken=0;
|
||||
if(str.EqualsIgnoreCase("SCRIPT") ||
|
||||
str.EqualsIgnoreCase("STYLE") ||
|
||||
str.EqualsIgnoreCase("TITLE") ||
|
||||
str.EqualsIgnoreCase("TEXTAREA")) {
|
||||
result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
if((kNoError==result) && skippedToken){
|
||||
//now we strip the ending sequence from our new SkippedContent token...
|
||||
PRInt32 slen=str.Length()+3;
|
||||
nsString& skippedText=skippedToken->GetText();
|
||||
|
||||
skippedText.Cut(skippedText.Length()-slen,slen);
|
||||
mTokenDeque.Push(skippedToken);
|
||||
|
||||
//In the case that we just read a given tag, we should go and
|
||||
//consume all the tag content itself (and throw it all away).
|
||||
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
CEndToken* endtoken=new CEndToken(str);
|
||||
mTokenDeque.Push(endtoken);
|
||||
} //if
|
||||
} //if
|
||||
} //if
|
||||
} //if
|
||||
}
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -231,19 +248,22 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*
|
||||
PRInt32 CNavDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar ch;
|
||||
PRInt32 result=aScanner.GetChar(ch);
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
|
||||
if(kNoError==result) {
|
||||
if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or �.
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result = aToken->Consume(ch,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==ch) {
|
||||
aToken = new CEntityToken(nsAutoString(""));
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
}//if
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -336,36 +356,54 @@ PRInt32 CNavDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*&
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CNavDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
|
||||
PRInt32 result=kNoError;
|
||||
PRUnichar aChar;
|
||||
|
||||
|
||||
aToken=0;
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return result;
|
||||
return kNoError;
|
||||
}
|
||||
aToken=0;
|
||||
while(!aScanner.Eof()) {
|
||||
|
||||
PRInt32 result=kNoError;
|
||||
if(kNoError==result){
|
||||
|
||||
PRUnichar aChar;
|
||||
result=aScanner.GetChar(aChar);
|
||||
switch(aChar) {
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
case kNotFound:
|
||||
switch(result) {
|
||||
case kEOF:
|
||||
break;
|
||||
|
||||
case kInterrupted:
|
||||
aScanner.RewindToMark();
|
||||
break;
|
||||
|
||||
case kNoError:
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
switch(aChar) {
|
||||
case kLessThan:
|
||||
return ConsumeTag(aChar,aScanner,aToken);
|
||||
|
||||
case kAmpersand:
|
||||
return ConsumeEntity(aChar,aScanner,aToken);
|
||||
|
||||
case kCR: case kLF:
|
||||
return ConsumeNewline(aChar,aScanner,aToken);
|
||||
|
||||
case kNotFound:
|
||||
break;
|
||||
|
||||
default:
|
||||
if(!nsString::IsSpace(aChar)) {
|
||||
nsAutoString temp(aChar);
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
else return ConsumeWhitespace(aChar,aScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
break;
|
||||
} //switch
|
||||
if(result==kEOF)
|
||||
result=0;
|
||||
} //while
|
||||
if(kNoError==result)
|
||||
result=aScanner.Eof();
|
||||
} //while
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#include "CNavDTD.h"
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "plstr.h"
|
||||
#include <fstream.h>
|
||||
#ifdef XP_PC
|
||||
#include <direct.h> //this is here for debug reasons...
|
||||
#endif
|
||||
@ -40,11 +41,13 @@ static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
|
||||
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
|
||||
|
||||
static const char* kNullURL = "Error: Null URL given";
|
||||
static const char* kNullFilename= "Error: Null filename given";
|
||||
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gVerificationOutputDir=0;
|
||||
static int rickGDebug=0;
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
@ -456,92 +459,198 @@ PRBool nsHTMLParser::IterateTokens() {
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(nsIURL* aURL){
|
||||
eParseMode theMode=eParseMode_navigator;
|
||||
eParseMode DetermineParseMode() {
|
||||
const char* theModeStr= PR_GetEnv("PARSE_MODE");
|
||||
const char* other="other";
|
||||
eParseMode result=eParseMode_navigator;
|
||||
|
||||
if(theModeStr)
|
||||
if(0==nsCRT::strcasecmp(other,theModeStr))
|
||||
theMode=eParseMode_other;
|
||||
|
||||
return Parse(aURL,theMode);
|
||||
result=eParseMode_other;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
if(aURL) {
|
||||
void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& aDTD) {
|
||||
switch(aMode) {
|
||||
case eParseMode_navigator:
|
||||
aDelegate=new CNavDelegate(); break;
|
||||
case eParseMode_other:
|
||||
aDelegate=new COtherDelegate(); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(aDelegate)
|
||||
aDTD=aDelegate->GetDTD();
|
||||
}
|
||||
|
||||
result=PR_TRUE;
|
||||
mParseMode=aMode;
|
||||
ITokenizerDelegate* theDelegate=0;
|
||||
|
||||
mDTD=0;
|
||||
switch(mParseMode) {
|
||||
case eParseMode_navigator:
|
||||
theDelegate=new CNavDelegate();
|
||||
if(theDelegate)
|
||||
mDTD=theDelegate->GetDTD();
|
||||
break;
|
||||
case eParseMode_other:
|
||||
theDelegate=new COtherDelegate();
|
||||
if(theDelegate)
|
||||
mDTD=theDelegate->GetDTD();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(!theDelegate) {
|
||||
NS_ERROR(kNullTokenizer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
if(mDTD)
|
||||
mDTD->SetParser(this);
|
||||
mTokenizer=new CTokenizer(aURL, theDelegate, mParseMode);
|
||||
/**
|
||||
* This DEBUG ONLY method is used to simulate a network-based
|
||||
* i/o model where data comes in incrementally.
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param aFilename is the name of the disk file to use for testing.
|
||||
* @return error code (kNoError means ok)
|
||||
*/
|
||||
PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
||||
PRInt32 result=kBadFilename;
|
||||
fstream* mFileStream;
|
||||
nsString theBuffer;
|
||||
PRInt32 iter=-1;
|
||||
const int kBufSize=10;
|
||||
|
||||
mFileStream=new fstream(aFilename,ios::in|ios::binary);
|
||||
if(mFileStream) {
|
||||
result=kNoError;
|
||||
while((kNoError==result) || (kInterrupted==result)) {
|
||||
//read some data from the file...
|
||||
|
||||
char buf[kBufSize];
|
||||
buf[kBufSize]=0;
|
||||
|
||||
if(mFileStream) {
|
||||
mFileStream->read(buf,kBufSize);
|
||||
PRInt32 numread=mFileStream->gcount();
|
||||
if(numread>0) {
|
||||
theBuffer.Truncate();
|
||||
theBuffer.Append(buf);
|
||||
mTokenizer->Append(theBuffer);
|
||||
result=ResumeParse(++iter);
|
||||
}
|
||||
}
|
||||
|
||||
mSink->WillBuildModel();
|
||||
#ifdef __INCREMENTAL
|
||||
int iter=-1;
|
||||
for(;;){
|
||||
mSink->WillResume();
|
||||
mTokenizer->TokenizeAvailable(++iter);
|
||||
mSink->WillInterrupt();
|
||||
}
|
||||
#else
|
||||
mTokenizer->Tokenize();
|
||||
#endif
|
||||
result=IterateTokens();
|
||||
mSink->DidBuildModel();
|
||||
mFileStream->close();
|
||||
delete mFileStream;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
||||
|
||||
PRInt32 status=kBadFilename;
|
||||
|
||||
mIncremental=aIncremental;
|
||||
mParseMode=DetermineParseMode();
|
||||
|
||||
if(aFilename) {
|
||||
|
||||
GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
|
||||
if(mDelegate) {
|
||||
|
||||
if(mDTD)
|
||||
mDTD->SetParser(this);
|
||||
|
||||
mSink->WillBuildModel();
|
||||
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
if(aIncremental) {
|
||||
mTokenizer=new CTokenizer(mDelegate,mParseMode);
|
||||
status=ParseFileIncrementally(aFilename);
|
||||
}
|
||||
else {
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
mTokenizer=new CTokenizer(aFilename,mDelegate,mParseMode);
|
||||
status=ResumeParse(0);
|
||||
}
|
||||
mSink->DidBuildModel();
|
||||
}//if
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the main controlling routine in the parsing process.
|
||||
* Note that it may get called multiple times for the same scanner,
|
||||
* since this is a pushed based system, and all the tokens may
|
||||
* not have been consumed by the scanner during a given invocation
|
||||
* of this method.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,PRBool aIncremental ){
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRInt32 status=kBadURL;
|
||||
|
||||
if(rickGDebug)
|
||||
status=Parse("c:/temp/temp.html",PR_TRUE);
|
||||
|
||||
mIncremental=aIncremental;
|
||||
mParseMode=DetermineParseMode();
|
||||
|
||||
if(aURL) {
|
||||
|
||||
GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
|
||||
if(mDelegate) {
|
||||
|
||||
if(mDTD)
|
||||
mDTD->SetParser(this);
|
||||
|
||||
mSink->WillBuildModel();
|
||||
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
if(aIncremental) {
|
||||
mTokenizer=new CTokenizer(mDelegate,mParseMode);
|
||||
status=aURL->Open(this);
|
||||
}
|
||||
else {
|
||||
mTokenizer=new CTokenizer(aURL,mDelegate,mParseMode);
|
||||
status=ResumeParse(0);
|
||||
mSink->DidBuildModel();
|
||||
}
|
||||
}//if
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this method if all you want to do is parse 1 string full of HTML text.
|
||||
*
|
||||
* @update gess5/11/98
|
||||
* @param anHTMLString contains a string-full of real HTML
|
||||
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
mSink->WillBuildModel();
|
||||
mTokenizer->Append(aSourceBuffer);
|
||||
result=ResumeParse(0);
|
||||
mSink->DidBuildModel();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This routine is called to cause the parser to continue
|
||||
@ -553,17 +662,21 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
|
||||
* @param
|
||||
* @return PR_TRUE if parsing concluded successfully.
|
||||
*/
|
||||
PRBool nsHTMLParser::ResumeParse() {
|
||||
PRInt32 nsHTMLParser::ResumeParse(PRInt32 anIteration) {
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
mSink->WillResume();
|
||||
int iter=0;
|
||||
PRInt32 errcode=mTokenizer->TokenizeAvailable(iter);
|
||||
if(kInterrupted==errcode)
|
||||
mSink->WillInterrupt();
|
||||
PRBool result=IterateTokens();
|
||||
if(kNoError==result) {
|
||||
result=mTokenizer->Tokenize(anIteration);
|
||||
if(kInterrupted==result)
|
||||
mSink->WillInterrupt();
|
||||
|
||||
if(!rickGDebug)
|
||||
IterateTokens();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess4/22/98
|
||||
@ -1388,5 +1501,64 @@ PRBool nsHTMLParser::ReduceContextStackFor(PRInt32 aChildTag){
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::GetBindInfo(void){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnStartBinding(void){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsresult nsHTMLParser::OnStopBinding(void){
|
||||
nsresult result=0;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -61,6 +61,9 @@
|
||||
#include "nsParserNode.h"
|
||||
#include "nsTokenHandler.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIURL.h"
|
||||
#include "nsIStreamListener.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
|
||||
|
||||
#define NS_IHTML_PARSER_IID \
|
||||
@ -75,7 +78,7 @@ class nsIURL;
|
||||
class nsIDTD;
|
||||
|
||||
|
||||
class nsHTMLParser : public nsIParser {
|
||||
class nsHTMLParser : public nsIParser, public nsIStreamListener {
|
||||
|
||||
public:
|
||||
friend class CTokenHandler;
|
||||
@ -104,14 +107,6 @@ friend class CTokenHandler;
|
||||
*/
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given URL
|
||||
* @update gess5/11/98
|
||||
* @param aURL is a descriptor for source document
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRBool Parse(nsIURL* aURL);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given URL in given mode
|
||||
* @update gess5/11/98
|
||||
@ -119,14 +114,31 @@ friend class CTokenHandler;
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRBool Parse(nsIURL* aURL,eParseMode aMode);
|
||||
virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given file in given mode
|
||||
* @update gess5/11/98
|
||||
* @param aFilename is a path for file document
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
|
||||
|
||||
/**
|
||||
* @update gess5/11/98
|
||||
* @param anHTMLString contains a string-full of real HTML
|
||||
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);
|
||||
|
||||
/**
|
||||
* This method gets called (automatically) during incremental parsing
|
||||
* @update gess5/11/98
|
||||
* @return TRUE if all went well, otherwise FALSE
|
||||
*/
|
||||
virtual PRBool ResumeParse();
|
||||
virtual PRInt32 ResumeParse(PRInt32 anIteration);
|
||||
|
||||
/**
|
||||
* Retrieve ptr to internal context vector stack
|
||||
@ -230,6 +242,15 @@ friend class CTokenHandler;
|
||||
*/
|
||||
PRBool HandleStyleToken(CToken* aToken);
|
||||
|
||||
//*********************************************
|
||||
// These methods are callback methods used by
|
||||
// net lib to let us know about our inputstream.
|
||||
//*********************************************
|
||||
NS_IMETHOD GetBindInfo(void);
|
||||
NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg);
|
||||
NS_IMETHOD OnStartBinding(void);
|
||||
NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
|
||||
NS_IMETHOD OnStopBinding(void);
|
||||
|
||||
protected:
|
||||
|
||||
@ -485,6 +506,10 @@ protected:
|
||||
*/
|
||||
PRBool CreateContextStackFor(PRInt32 aChildTag);
|
||||
|
||||
private:
|
||||
PRInt32 ParseFileIncrementally(const char* aFilename); //XXX ONLY FOR DEBUG PURPOSES...
|
||||
|
||||
protected:
|
||||
//*********************************************
|
||||
// And now, some data members...
|
||||
//*********************************************
|
||||
@ -502,6 +527,8 @@ protected:
|
||||
nsIDTD* mDTD;
|
||||
eParseMode mParseMode;
|
||||
PRBool mHasOpenForm;
|
||||
PRBool mIncremental;
|
||||
ITokenizerDelegate* mDelegate;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -40,6 +40,7 @@
|
||||
|
||||
|
||||
class nsIContentSink;
|
||||
class nsString;
|
||||
|
||||
/**
|
||||
* This class defines the iparser interface. This XPCOM
|
||||
@ -52,10 +53,14 @@ class nsIParser : public nsISupports {
|
||||
public:
|
||||
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aContentSink)=0;
|
||||
virtual PRBool Parse(nsIURL* aURL)=0;
|
||||
virtual PRBool ResumeParse()=0;
|
||||
virtual PRInt32 GetStack(PRInt32* aStackPtr)=0;
|
||||
virtual PRBool HasOpenContainer(PRInt32 aContainer) const=0;
|
||||
|
||||
virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE)=0;
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
|
||||
|
||||
virtual PRInt32 ResumeParse(PRInt32 anIterator)=0;
|
||||
virtual PRInt32 GetStack(PRInt32* aStackPtr)=0;
|
||||
virtual PRBool HasOpenContainer(PRInt32 aContainer) const=0;
|
||||
};
|
||||
|
||||
extern NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult);
|
||||
|
||||
@ -37,13 +37,16 @@ enum eParseMode {
|
||||
|
||||
eParseMode_unknown=0,
|
||||
eParseMode_navigator,
|
||||
eParseMode_other
|
||||
eParseMode_other,
|
||||
eParseMode_autodetect
|
||||
};
|
||||
|
||||
const PRInt32 kEOF = 1000000L;
|
||||
const PRInt32 kBadFilename = -4;
|
||||
const PRInt32 kBadURL = -3;
|
||||
const PRInt32 kInterrupted = -2;
|
||||
const PRInt32 kNotFound = -1;
|
||||
const PRInt32 kNoError = 0;
|
||||
const PRInt32 kInterrupted = 2;
|
||||
|
||||
const PRUint32 kNewLine = '\n';
|
||||
const PRUint32 kCR = '\r';
|
||||
|
||||
@ -22,9 +22,8 @@
|
||||
#include "nsIURL.h"
|
||||
#include "nsDebug.h"
|
||||
|
||||
const char* gURLRef;
|
||||
const char* kBadHTMLText1="<HTML><BODY><H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
|
||||
const char* kBadHTMLText2="</BODY></HTML>";
|
||||
const char* gURLRef=0;
|
||||
const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
const int kBufsize=1;
|
||||
@ -33,31 +32,63 @@ const int kBufsize=64;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aURL -- pointer to URL to be loaded
|
||||
* Use this constructor if you want an incremental (callback)
|
||||
* based input stream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner::CScanner(eParseMode aMode) : mBuffer("") {
|
||||
mOffset=0;
|
||||
mMarkPos=-1;
|
||||
mTotalRead=0;
|
||||
mParseMode=aMode;
|
||||
mNetStream=0;
|
||||
mFileStream=0;
|
||||
mIncremental=PR_TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be file based.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner::CScanner(const char* aFilename,eParseMode aMode) : mBuffer("") {
|
||||
NS_ASSERTION(0!=aFilename,"Error: Null filename!");
|
||||
mOffset=0;
|
||||
mMarkPos=-1;
|
||||
mTotalRead=0;
|
||||
mParseMode=aMode;
|
||||
mNetStream=0;
|
||||
mIncremental=PR_FALSE;
|
||||
mFileStream=new fstream(aFilename,ios::in|ios::binary);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on a
|
||||
* non-incremental netstream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
|
||||
NS_ASSERTION(0!=aURL,"Error: Null URL!");
|
||||
mOffset=0;
|
||||
mStream=0;
|
||||
mMarkPos=-1;
|
||||
mTotalRead=0;
|
||||
mParseMode=aMode;
|
||||
if(aURL) {
|
||||
|
||||
gURLRef=aURL->GetSpec();
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
mStream=new fstream("c:/temp/temp.html",ios::in|ios::binary);
|
||||
#else
|
||||
int error;
|
||||
mStream=aURL->Open(&error);
|
||||
#endif
|
||||
}
|
||||
mFileStream=0;
|
||||
PRInt32 error=0;
|
||||
mIncremental=PR_FALSE;
|
||||
mNetStream=aURL->Open(&error);
|
||||
gURLRef=aURL->GetSpec();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
*
|
||||
@ -66,19 +97,107 @@ CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
|
||||
* @return
|
||||
*/
|
||||
CScanner::~CScanner() {
|
||||
#ifdef __INCREMENTAL
|
||||
mStream->close();
|
||||
delete mStream;
|
||||
mStream=0;
|
||||
#else
|
||||
if(mStream) {
|
||||
mStream->Close();
|
||||
mStream->Release();
|
||||
mStream=0;
|
||||
if(mFileStream) {
|
||||
mFileStream->close();
|
||||
delete mFileStream;
|
||||
}
|
||||
#endif
|
||||
else if(mNetStream) {
|
||||
mNetStream->Close();
|
||||
mNetStream->Release();
|
||||
}
|
||||
mFileStream=0;
|
||||
mNetStream=0;
|
||||
gURLRef=0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets current offset position of input stream to marked position.
|
||||
* This allows us to back up to this point if the need should arise,
|
||||
* such as when tokenization gets interrupted.
|
||||
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CScanner::RewindToMark(void){
|
||||
mOffset=mMarkPos;
|
||||
return mOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records current offset position in input stream. This allows us
|
||||
* to back up to this point if the need should arise, such as when
|
||||
* tokenization gets interrupted.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CScanner::Mark(void){
|
||||
mMarkPos=mOffset;
|
||||
return mMarkPos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
*/
|
||||
void _PreCompressBuffer(nsString& aBuffer,PRInt32& anOffset,PRInt32& aMarkPos){
|
||||
//To determine how much of our internal buffer to truncate,
|
||||
//we should check mMarkPos. That represents the point at which
|
||||
//we've guaranteed the client we can back up to, so make sure
|
||||
//you don't lose any of the data beyond that point.
|
||||
if((anOffset!=aMarkPos) && (0<=aMarkPos)) {
|
||||
if(aMarkPos>0) {
|
||||
aBuffer.Cut(0,aMarkPos);
|
||||
if(anOffset>aMarkPos)
|
||||
anOffset-=aMarkPos;
|
||||
}
|
||||
}
|
||||
else aBuffer.Truncate();
|
||||
aMarkPos=0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method should only be called by the parser when
|
||||
* we're doing incremental i/o over the net.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aBuffer contains next blob of i/o data
|
||||
* @param aSize contains size of buffer
|
||||
* @return 0 if all went well, otherwise error code.
|
||||
*/
|
||||
PRInt32 CScanner::IncrementalAppend(const char* aBuffer,PRInt32 aSize){
|
||||
NS_ASSERTION(((!mFileStream) && (!mNetStream)),"Error: Should only be called during incremental net i/o!");
|
||||
|
||||
PRInt32 result=0;
|
||||
if((!mFileStream) && (!mNetStream)) {
|
||||
|
||||
_PreCompressBuffer(mBuffer,mOffset,mMarkPos);
|
||||
|
||||
//now that the buffer is (possibly) shortened, let's append the new data.
|
||||
if(0<aSize) {
|
||||
mBuffer.Append(aBuffer,aSize);
|
||||
mTotalRead+=aSize;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grab data from underlying stream.
|
||||
*
|
||||
* @update gess4/3/98
|
||||
* @return error code
|
||||
*/
|
||||
PRBool CScanner::Append(nsString& aBuffer) {
|
||||
_PreCompressBuffer(mBuffer,mOffset,mMarkPos);
|
||||
mBuffer.Append(aBuffer);
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grab data from underlying stream.
|
||||
@ -89,55 +208,63 @@ CScanner::~CScanner() {
|
||||
PRInt32 CScanner::FillBuffer(void) {
|
||||
PRInt32 anError=0;
|
||||
|
||||
mBuffer.Truncate();
|
||||
if(!mStream) {
|
||||
_PreCompressBuffer(mBuffer,mOffset,mMarkPos);
|
||||
|
||||
if((!mIncremental) && (!mNetStream) && (!mFileStream)) {
|
||||
//This is DEBUG code!!!!!! XXX DEBUG XXX
|
||||
//If you're here, it means someone tried to load a
|
||||
//non-existent document. So as a favor, we emit a
|
||||
//little bit of HTML explaining the error.
|
||||
if(0==mTotalRead) {
|
||||
mBuffer.Append((const char*)kBadHTMLText1);
|
||||
mBuffer.Append((const char*)kBadHTMLText);
|
||||
mBuffer.Append((const char*)gURLRef);
|
||||
mBuffer.Append((const char*)kBadHTMLText2);
|
||||
}
|
||||
else return 0;
|
||||
}
|
||||
else {
|
||||
else if(!mIncremental) {
|
||||
PRInt32 numread=0;
|
||||
char buf[kBufsize+1];
|
||||
buf[kBufsize]=0;
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
mStream->read(buf,kBufsize);
|
||||
numread=mStream->gcount();
|
||||
#else
|
||||
numread=mStream->Read(&anError,buf,0,kBufsize);
|
||||
#endif
|
||||
if(mFileStream) {
|
||||
mFileStream->read(buf,kBufsize);
|
||||
numread=mFileStream->gcount();
|
||||
}
|
||||
else if(mNetStream) {
|
||||
numread=mNetStream->Read(&anError,buf,0,kBufsize);
|
||||
if(1==anError)
|
||||
anError=kEOF;
|
||||
}
|
||||
mOffset=mBuffer.Length();
|
||||
if((0<numread) && (0==anError))
|
||||
mBuffer.Append((const char*)buf,numread);
|
||||
mTotalRead+=mBuffer.Length();
|
||||
}
|
||||
mTotalRead+=mBuffer.Length();
|
||||
else anError=kInterrupted;
|
||||
|
||||
return anError;
|
||||
}
|
||||
|
||||
/**
|
||||
* determine if the scanner has reached EOF
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return PR_TRUE upon eof condition
|
||||
* @return 0=!eof 1=eof kInterrupted=interrupted
|
||||
*/
|
||||
PRBool CScanner::Eof() {
|
||||
PRInt32 CScanner::Eof() {
|
||||
PRInt32 theError=0;
|
||||
|
||||
if(mOffset>=mBuffer.Length()) {
|
||||
theError=FillBuffer();
|
||||
mOffset=0;
|
||||
if(!mIncremental)
|
||||
theError=FillBuffer();
|
||||
else return kInterrupted;
|
||||
}
|
||||
PRBool result=PR_TRUE;
|
||||
if(0==theError) {
|
||||
result=PRBool(0==mBuffer.Length());
|
||||
}
|
||||
return result;
|
||||
|
||||
if(0==theError)
|
||||
return (0==mBuffer.Length());
|
||||
|
||||
return theError;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -148,11 +275,12 @@ PRBool CScanner::Eof() {
|
||||
* @return error code reflecting read status
|
||||
*/
|
||||
PRInt32 CScanner::GetChar(PRUnichar& aChar) {
|
||||
if(!Eof()) {
|
||||
PRInt32 result=Eof();
|
||||
if(!result) {
|
||||
aChar=mBuffer[mOffset++];
|
||||
return kNoError;
|
||||
result=kNoError;
|
||||
}
|
||||
return kEOF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -165,11 +293,12 @@ PRInt32 CScanner::GetChar(PRUnichar& aChar) {
|
||||
* @return
|
||||
*/
|
||||
PRInt32 CScanner::Peek(PRUnichar& aChar){
|
||||
if(!Eof()) {
|
||||
PRInt32 result=Eof();
|
||||
if(!result) {
|
||||
aChar=mBuffer[mOffset];
|
||||
return kNoError;
|
||||
result=kNoError;
|
||||
}
|
||||
return kEOF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -181,7 +310,9 @@ PRInt32 CScanner::Peek(PRUnichar& aChar){
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CScanner::PutBack(PRUnichar aChar) {
|
||||
mOffset--;
|
||||
if(mOffset>0)
|
||||
mOffset--;
|
||||
else mBuffer.Insert(aChar,0);
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
@ -301,8 +432,8 @@ PRInt32 CScanner::ReadUntil(nsString& aString,nsString& aTerminalSet,PRBool addT
|
||||
PRUnichar ch=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
while(!Eof()) {
|
||||
result=GetChar(ch);
|
||||
while(!result) {
|
||||
result=GetChar(ch);
|
||||
if(kNoError==result) {
|
||||
PRInt32 pos=aTerminalSet.Find(ch);
|
||||
if(kNotFound!=pos) {
|
||||
|
||||
@ -43,7 +43,36 @@ class ifstream;
|
||||
|
||||
class CScanner {
|
||||
public:
|
||||
|
||||
/**
|
||||
* Use this constructor if you want an incremental (callback)
|
||||
* based input stream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner(eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on a
|
||||
* non-incremental netstream.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner(nsIURL* aURL,eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be file based.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
CScanner(const char* aFilename,eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
~CScanner();
|
||||
|
||||
/**
|
||||
@ -117,7 +146,7 @@ class CScanner {
|
||||
* @update gess 3/25/98
|
||||
* @return PR_TRUE upon eof condition
|
||||
*/
|
||||
PRBool Eof(void);
|
||||
PRInt32 Eof(void);
|
||||
|
||||
/**
|
||||
* Consume characters until you find the terminal char
|
||||
@ -153,6 +182,48 @@ class CScanner {
|
||||
*/
|
||||
PRInt32 ReadWhile(nsString& aString,nsString& anInputSet,PRBool addTerminal);
|
||||
|
||||
/**
|
||||
* Records current offset position in input stream. This allows us
|
||||
* to back up to this point if the need should arise, such as when
|
||||
* tokenization gets interrupted.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 Mark(void);
|
||||
|
||||
/**
|
||||
* Resets current offset position of input stream to marked position.
|
||||
* This allows us to back up to this point if the need should arise,
|
||||
* such as when tokenization gets interrupted.
|
||||
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 RewindToMark(void);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool Append(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRInt32 IncrementalAppend(const char* aBuffer,PRInt32 aSize);
|
||||
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
@ -167,15 +238,15 @@ class CScanner {
|
||||
*/
|
||||
PRInt32 FillBuffer(void);
|
||||
|
||||
#ifdef __INCREMENTAL
|
||||
fstream* mStream;
|
||||
#else
|
||||
nsIInputStream* mStream;
|
||||
#endif
|
||||
|
||||
fstream* mFileStream;
|
||||
nsIInputStream* mNetStream;
|
||||
nsString mBuffer;
|
||||
PRInt32 mOffset;
|
||||
PRInt32 mMarkPos;
|
||||
PRInt32 mTotalRead;
|
||||
eParseMode mParseMode;
|
||||
PRBool mIncremental;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@ -39,6 +39,35 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque() {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aFilename,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
|
||||
mTokenDeque() {
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
@ -54,6 +83,19 @@ CTokenizer::~CTokenizer() {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(nsString& aBuffer) {
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
@ -105,31 +147,31 @@ PRBool CTokenizer::WillTokenize(PRBool aIncremental){
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(void) {
|
||||
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
WillTokenize(PR_TRUE);
|
||||
|
||||
while(kNoError==result) {
|
||||
result=GetToken(theToken);
|
||||
if(theToken && (kNoError==result)) {
|
||||
|
||||
if(WillTokenize(PR_FALSE)) {
|
||||
do {
|
||||
result=GetToken(theToken);
|
||||
if(theToken) {
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
} while(0!=theToken);
|
||||
result=DidTokenize(PR_FALSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(kEOF==result)
|
||||
result=kNoError;
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -141,20 +183,33 @@ PRInt32 CTokenizer::Tokenize(void) {
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CTokenizer::TokenizeAvailable(int anIteration) {
|
||||
PRInt32 CTokenizer::Tokenize(int anIteration) {
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
||||
|
||||
|
||||
while((PR_FALSE==done) && (kInterrupted!=kInterrupted)) {
|
||||
while((PR_FALSE==done) && (kNoError==result)) {
|
||||
mScanner->Mark();
|
||||
result=GetToken(theToken);
|
||||
if(theToken) {
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
if(kNoError==result) {
|
||||
if(theToken) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
if(mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if(theToken)
|
||||
delete theToken;
|
||||
mScanner->RewindToMark();
|
||||
}
|
||||
else done=PR_TRUE;
|
||||
}
|
||||
if((PR_TRUE==done) && (kInterrupted!=result))
|
||||
DidTokenize(PR_TRUE);
|
||||
|
||||
@ -50,19 +50,13 @@ class nsIURL;
|
||||
|
||||
class CTokenizer {
|
||||
public:
|
||||
CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode);
|
||||
|
||||
CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
|
||||
CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
|
||||
CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
|
||||
|
||||
~CTokenizer();
|
||||
|
||||
/**
|
||||
* This control routine causes the entire stream to be
|
||||
* tokenized. You probably want to call TokenizeAvailable()
|
||||
* instead (for incremental tokenization).
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(void);
|
||||
|
||||
/**
|
||||
* This method incrementally tokenizes as much content as
|
||||
* it can get its hands on.
|
||||
@ -70,7 +64,14 @@ class CTokenizer {
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 TokenizeAvailable(int anIteration); //your friendly incremental version
|
||||
PRInt32 Tokenize(int anIteration); //your friendly incremental version
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE);
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
@ -98,6 +99,23 @@ class CTokenizer {
|
||||
*/
|
||||
nsDeque& GetDeque(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
PRBool Append(nsString& aBuffer);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool SetBuffer(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user