WIP for push based tokenization

git-svn-id: svn://10.0.0.236/trunk@1683 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
rickg
1998-05-14 22:19:08 +00:00
parent affe63fab7
commit 4e262a1fe0
18 changed files with 1630 additions and 590 deletions

View File

@@ -31,6 +31,7 @@
#include "CNavDTD.h"
#include "prenv.h" //this is here for debug reasons...
#include "plstr.h"
#include <fstream.h>
#ifdef XP_PC
#include <direct.h> //this is here for debug reasons...
#endif
@@ -40,11 +41,13 @@ static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
static const char* kNullURL = "Error: Null URL given";
static const char* kNullFilename= "Error: Null filename given";
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
static const char* kNullToken = "Error: Null token given";
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
static char* gVerificationOutputDir=0;
static char* gVerificationOutputDir=0;
static int rickGDebug=0;
/**
* This method is defined in nsIParser. It is used to
@@ -456,92 +459,198 @@ PRBool nsHTMLParser::IterateTokens() {
return result;
}
/**
* This is the main controlling routine in the parsing process.
* Note that it may get called multiple times for the same scanner,
* since this is a pushed based system, and all the tokens may
* not have been consumed by the scanner during a given invocation
* of this method.
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
*
*
* @update gess 5/13/98
* @param
* @return
*/
PRBool nsHTMLParser::Parse(nsIURL* aURL){
eParseMode theMode=eParseMode_navigator;
eParseMode DetermineParseMode() {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
const char* other="other";
eParseMode result=eParseMode_navigator;
if(theModeStr)
if(0==nsCRT::strcasecmp(other,theModeStr))
theMode=eParseMode_other;
return Parse(aURL,theMode);
result=eParseMode_other;
return result;
}
/**
* This is the main controlling routine in the parsing process.
* Note that it may get called multiple times for the same scanner,
* since this is a pushed based system, and all the tokens may
* not have been consumed by the scanner during a given invocation
* of this method.
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
*
*
* @update gess 5/13/98
* @param
* @return
*/
PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
NS_PRECONDITION(0!=aURL,kNullURL);
PRBool result=PR_FALSE;
if(aURL) {
void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& aDTD) {
switch(aMode) {
case eParseMode_navigator:
aDelegate=new CNavDelegate(); break;
case eParseMode_other:
aDelegate=new COtherDelegate(); break;
default:
break;
}
if(aDelegate)
aDTD=aDelegate->GetDTD();
}
result=PR_TRUE;
mParseMode=aMode;
ITokenizerDelegate* theDelegate=0;
mDTD=0;
switch(mParseMode) {
case eParseMode_navigator:
theDelegate=new CNavDelegate();
if(theDelegate)
mDTD=theDelegate->GetDTD();
break;
case eParseMode_other:
theDelegate=new COtherDelegate();
if(theDelegate)
mDTD=theDelegate->GetDTD();
break;
default:
break;
}
if(!theDelegate) {
NS_ERROR(kNullTokenizer);
return PR_FALSE;
}
if(mDTD)
mDTD->SetParser(this);
mTokenizer=new CTokenizer(aURL, theDelegate, mParseMode);
/**
* This DEBUG ONLY method is used to simulate a network-based
* i/o model where data comes in incrementally.
*
* @update gess 5/13/98
* @param aFilename is the name of the disk file to use for testing.
* @return error code (kNoError means ok)
*/
PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
PRInt32 result=kBadFilename;
fstream* mFileStream;
nsString theBuffer;
PRInt32 iter=-1;
const int kBufSize=10;
mFileStream=new fstream(aFilename,ios::in|ios::binary);
if(mFileStream) {
result=kNoError;
while((kNoError==result) || (kInterrupted==result)) {
//read some data from the file...
char buf[kBufSize];
buf[kBufSize]=0;
if(mFileStream) {
mFileStream->read(buf,kBufSize);
PRInt32 numread=mFileStream->gcount();
if(numread>0) {
theBuffer.Truncate();
theBuffer.Append(buf);
mTokenizer->Append(theBuffer);
result=ResumeParse(++iter);
}
}
mSink->WillBuildModel();
#ifdef __INCREMENTAL
int iter=-1;
for(;;){
mSink->WillResume();
mTokenizer->TokenizeAvailable(++iter);
mSink->WillInterrupt();
}
#else
mTokenizer->Tokenize();
#endif
result=IterateTokens();
mSink->DidBuildModel();
mFileStream->close();
delete mFileStream;
}
return result;
}
/**
* This is the main controlling routine in the parsing process.
* Note that it may get called multiple times for the same scanner,
* since this is a pushed based system, and all the tokens may
* not have been consumed by the scanner during a given invocation
* of this method.
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
*/
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
NS_PRECONDITION(0!=aFilename,kNullFilename);
PRInt32 status=kBadFilename;
mIncremental=aIncremental;
mParseMode=DetermineParseMode();
if(aFilename) {
GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
if(mDelegate) {
if(mDTD)
mDTD->SetParser(this);
mSink->WillBuildModel();
//ok, time to create our tokenizer and begin the process
if(aIncremental) {
mTokenizer=new CTokenizer(mDelegate,mParseMode);
status=ParseFileIncrementally(aFilename);
}
else {
//ok, time to create our tokenizer and begin the process
mTokenizer=new CTokenizer(aFilename,mDelegate,mParseMode);
status=ResumeParse(0);
}
mSink->DidBuildModel();
}//if
}
return status;
}
/**
* This is the main controlling routine in the parsing process.
* Note that it may get called multiple times for the same scanner,
* since this is a pushed based system, and all the tokens may
* not have been consumed by the scanner during a given invocation
* of this method.
*
* @update gess 3/25/98
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
*/
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,PRBool aIncremental ){
NS_PRECONDITION(0!=aURL,kNullURL);
PRInt32 status=kBadURL;
if(rickGDebug)
status=Parse("c:/temp/temp.html",PR_TRUE);
mIncremental=aIncremental;
mParseMode=DetermineParseMode();
if(aURL) {
GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
if(mDelegate) {
if(mDTD)
mDTD->SetParser(this);
mSink->WillBuildModel();
//ok, time to create our tokenizer and begin the process
if(aIncremental) {
mTokenizer=new CTokenizer(mDelegate,mParseMode);
status=aURL->Open(this);
}
else {
mTokenizer=new CTokenizer(aURL,mDelegate,mParseMode);
status=ResumeParse(0);
mSink->DidBuildModel();
}
}//if
}
return status;
}
/**
* Call this method if all you want to do is parse 1 string full of HTML text.
*
* @update gess5/11/98
* @param anHTMLString contains a string-full of real HTML
* @param appendTokens tells us whether we should insert tokens inline, or append them.
* @return TRUE if all went well -- FALSE otherwise
*/
PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
PRInt32 result=kNoError;
mSink->WillBuildModel();
mTokenizer->Append(aSourceBuffer);
result=ResumeParse(0);
mSink->DidBuildModel();
return result;
}
/**
* This routine is called to cause the parser to continue
@@ -553,17 +662,21 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
* @param
* @return PR_TRUE if parsing concluded successfully.
*/
PRBool nsHTMLParser::ResumeParse() {
PRInt32 nsHTMLParser::ResumeParse(PRInt32 anIteration) {
PRInt32 result=kNoError;
mSink->WillResume();
int iter=0;
PRInt32 errcode=mTokenizer->TokenizeAvailable(iter);
if(kInterrupted==errcode)
mSink->WillInterrupt();
PRBool result=IterateTokens();
if(kNoError==result) {
result=mTokenizer->Tokenize(anIteration);
if(kInterrupted==result)
mSink->WillInterrupt();
if(!rickGDebug)
IterateTokens();
}
return result;
}
/**
*
* @update gess4/22/98
@@ -1388,5 +1501,64 @@ PRBool nsHTMLParser::ReduceContextStackFor(PRInt32 aChildTag){
}
/**
*
*
* @update gess 5/12/98
* @param
* @return
*/
nsresult nsHTMLParser::GetBindInfo(void){
nsresult result=0;
return result;
}
/**
*
*
* @update gess 5/12/98
* @param
* @return
*/
nsresult nsHTMLParser::OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg){
nsresult result=0;
return result;
}
/**
*
*
* @update gess 5/12/98
* @param
* @return
*/
nsresult nsHTMLParser::OnStartBinding(void){
nsresult result=0;
return result;
}
/**
*
*
* @update gess 5/12/98
* @param
* @return
*/
nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
nsresult result=0;
return result;
}
/**
*
*
* @update gess 5/12/98
* @param
* @return
*/
nsresult nsHTMLParser::OnStopBinding(void){
nsresult result=0;
return result;
}