WIP for push based tokenization

git-svn-id: svn://10.0.0.236/trunk@1683 18797224-902f-48f8-a5cc-f745e15eee43
1998-05-14 22:19:08 +00:00 · 1998-05-14 22:19:08 +00:00 · 4e262a1fe0
commit 4e262a1fe0
parent affe63fab7
18 changed files with 1630 additions and 590 deletions
--- a/mozilla/htmlparser/src/CNavDelegate.cpp
+++ b/mozilla/htmlparser/src/CNavDelegate.cpp
@ -81,42 +81,49 @@ nsIDTD* CNavDelegate::GetDTD(void) const{
 *  and we know we're at the start of some kind of tagged 
 *  element. We don't know yet if it's a tag or a comment.
 *  
- *  @update  gess 3/25/98
- *  @param   
- *  @return  
+ *  @update  gess 5/12/98
+ *  @param   aChar is the last char read
+ *  @param   aScanner is represents our input source
+ *  @param   aToken is the out arg holding our new token
+ *  @return  error code (may return kInterrupted).
 */
 PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {

  nsAutoString empty("");
  PRInt32 result=aScanner.GetChar(aChar);

-  switch(aChar) {
-    case kForwardSlash:
-      PRUnichar ch; 
-      result=aScanner.Peek(ch);
-      if(nsString::IsAlpha(ch))
-        aToken=new CEndToken(empty);
-      else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
-      break;
-    case kExclamation:
-      aToken=new CCommentToken(empty);
-      break;
-    default:
-      if(nsString::IsAlpha(aChar))
-        return ConsumeStartTag(aChar,aScanner,aToken);
-      else if(kEOF!=aChar) {
-        nsAutoString temp("<");
-        return ConsumeText(temp,aScanner,aToken);
-      }
-  } //switch
+  if(kNoError==result) {

-  if(0!=aToken) {
-    result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
-    if(result) {
-      delete aToken;
-      aToken=0;
-    }
-  }
+    switch(aChar) {
+      case kForwardSlash:
+        PRUnichar ch; 
+        result=aScanner.Peek(ch);
+        if(kNoError==result) {
+          if(nsString::IsAlpha(ch))
+            aToken=new CEndToken(empty);
+          else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
+        }//if
+        break;
+      case kExclamation:
+        aToken=new CCommentToken(empty);
+        break;
+      default:
+        if(nsString::IsAlpha(aChar))
+          return ConsumeStartTag(aChar,aScanner,aToken);
+        else if(kEOF!=aChar) {
+          nsAutoString temp("<");
+          return ConsumeText(temp,aScanner,aToken);
+        }
+    } //switch
+
+    if((0!=aToken) && (kNoError==result)) {
+      result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
+      if(result) {
+        delete aToken;
+        aToken=0;
+      }
+    } //if
+  } //if
  return result;
 }

@ -131,20 +138,26 @@ PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aTo
 */
 PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner) {
  PRBool done=PR_FALSE;
-  nsAutoString as("");
  PRInt32 result=kNoError;
+  nsAutoString as("");
+
  while((!done) && (result==kNoError)) {
-     CToken* theToken= new CAttributeToken(as);
-      if(theToken){
-        result= theToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
+    CToken* theToken= new CAttributeToken(as);
+    if(theToken){
+      result=theToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
+      if(kNoError==result){
        mTokenDeque.Push(theToken);
-      }
-    aScanner.Peek(aChar);
-    if(aChar==kGreaterThan) { //you just ate the '>'
-      aScanner.GetChar(aChar); //skip the '>'
-      done=PR_TRUE;
-    }
-  }
+      }//if
+    }//if
+    
+    if(kNoError==result){
+      result=aScanner.Peek(aChar);
+      if(aChar==kGreaterThan) { //you just ate the '>'
+        aScanner.GetChar(aChar); //skip the '>'
+        done=PR_TRUE;
+      }//if
+    }//if
+  }//while
  return result;
 }

@ -166,8 +179,7 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
  endTag.Append(aString);
  endTag.Append(">");
  aToken=new CSkippedContentToken(endTag);
-  PRInt32 result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
-  return result;
+  return aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
 }

 /**
@ -183,38 +195,43 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
 PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
  aToken=new CStartToken(nsAutoString(""));
  PRInt32 result=kNoError;
+
  if(aToken) {
    result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
-    if(((CStartToken*)aToken)->IsAttributed()) {
-      result=ConsumeAttributes(aChar,aScanner);
-    }
-    //now that that's over with, we have one more problem to solve.
-    //In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
-    //consume all the content itself.
-    nsString& str=aToken->GetText();
-    CToken*   skippedToken=0;
-    if(str.EqualsIgnoreCase("SCRIPT") ||
-       str.EqualsIgnoreCase("STYLE") ||
-       str.EqualsIgnoreCase("TITLE") ||
-       str.EqualsIgnoreCase("TEXTAREA")) {
-      result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
-      
-      if(skippedToken){
-          //now we strip the ending sequence from our new SkippedContent token...
-        PRInt32 slen=str.Length()+3;
-        nsString& skippedText=skippedToken->GetText();
-      
-        skippedText.Cut(skippedText.Length()-slen,slen);
-        mTokenDeque.Push(skippedToken);
+    if(kNoError==result) {
+      if(((CStartToken*)aToken)->IsAttributed()) {
+        result=ConsumeAttributes(aChar,aScanner);
+      }
+      //now that that's over with, we have one more problem to solve.
+      //In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
+      //consume all the content itself.
+      if(kNoError==result) {
+        nsString& str=aToken->GetText();
+        CToken*   skippedToken=0;
+        if(str.EqualsIgnoreCase("SCRIPT") ||
+           str.EqualsIgnoreCase("STYLE") ||
+           str.EqualsIgnoreCase("TITLE") ||
+           str.EqualsIgnoreCase("TEXTAREA")) {
+          result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
    
-        //In the case that we just read a given tag, we should go and
-        //consume all the tag content itself (and throw it all away).
+          if((kNoError==result) && skippedToken){
+              //now we strip the ending sequence from our new SkippedContent token...
+            PRInt32 slen=str.Length()+3;
+            nsString& skippedText=skippedToken->GetText();
+    
+            skippedText.Cut(skippedText.Length()-slen,slen);
+            mTokenDeque.Push(skippedToken);
+  
+            //In the case that we just read a given tag, we should go and
+            //consume all the tag content itself (and throw it all away).

-        CEndToken* endtoken=new CEndToken(str);
-        mTokenDeque.Push(endtoken);
+            CEndToken* endtoken=new CEndToken(str);
+            mTokenDeque.Push(endtoken);
+          } //if
+        } //if
      } //if
    } //if
-  }
+  } //if
  return result;
 }

@ -231,19 +248,22 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*
 PRInt32 CNavDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
   PRUnichar  ch;
   PRInt32 result=aScanner.GetChar(ch);
-   if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or &#000.
-     aToken = new CEntityToken(nsAutoString(""));
-     result = aToken->Consume(ch,aScanner);  //tell new token to finish consuming text...    
-   }
-   else if(kHashsign==ch) {
-     aToken = new CEntityToken(nsAutoString(""));
-     result=aToken->Consume(0,aScanner);
-   }
-   else {
-     //oops, we're actually looking at plain text...
-     nsAutoString temp("&");
-     result=ConsumeText(temp,aScanner,aToken);
-   }
+
+   if(kNoError==result) {
+     if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or &#000.
+       aToken = new CEntityToken(nsAutoString(""));
+       result = aToken->Consume(ch,aScanner);  //tell new token to finish consuming text...    
+     }
+     else if(kHashsign==ch) {
+       aToken = new CEntityToken(nsAutoString(""));
+       result=aToken->Consume(0,aScanner);
+     }
+     else {
+       //oops, we're actually looking at plain text...
+       nsAutoString temp("&");
+       result=ConsumeText(temp,aScanner,aToken);
+     }
+   }//if
   return result;
 }

@ -336,36 +356,54 @@ PRInt32 CNavDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*&
 *  @return new token or null 
 */
 PRInt32 CNavDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
-  PRInt32   result=kNoError;
-  PRUnichar aChar;
-
+  
+  aToken=0;
  if(mTokenDeque.GetSize()>0) {
    aToken=(CToken*)mTokenDeque.Pop();
-    return result;
+    return kNoError;
  }
-  aToken=0;
-  while(!aScanner.Eof()) {
+
+  PRInt32 result=kNoError;
+  if(kNoError==result){
+    
+    PRUnichar aChar;
    result=aScanner.GetChar(aChar);
-    switch(aChar) {
-      case kAmpersand:
-        return ConsumeEntity(aChar,aScanner,aToken);
-      case kLessThan:
-        return ConsumeTag(aChar,aScanner,aToken);
-      case kCR: case kLF:
-        return ConsumeNewline(aChar,aScanner,aToken);
-      case kNotFound:
+    switch(result) {
+      case kEOF:
        break;
+
+      case kInterrupted:
+        aScanner.RewindToMark();
+        break; 
+
+      case kNoError:
      default:
-        if(!nsString::IsSpace(aChar)) {
-          nsAutoString temp(aChar);
-          return ConsumeText(temp,aScanner,aToken);
-        }
-        else return ConsumeWhitespace(aChar,aScanner,aToken);
-        break;
+        switch(aChar) {
+          case kLessThan:
+            return ConsumeTag(aChar,aScanner,aToken);
+
+          case kAmpersand:
+            return ConsumeEntity(aChar,aScanner,aToken);
+          
+          case kCR: case kLF:
+            return ConsumeNewline(aChar,aScanner,aToken);
+          
+          case kNotFound:
+            break;
+          
+          default:
+            if(!nsString::IsSpace(aChar)) {
+              nsAutoString temp(aChar);
+              return ConsumeText(temp,aScanner,aToken);
+            }
+            else return ConsumeWhitespace(aChar,aScanner,aToken);
+            break;
+        } //switch
+        break; 
    } //switch
-    if(result==kEOF)
-      result=0;
-   } //while
+    if(kNoError==result)
+      result=aScanner.Eof();
+  } //while
  return result;
 }

--- a/mozilla/htmlparser/src/nsHTMLParser.cpp
+++ b/mozilla/htmlparser/src/nsHTMLParser.cpp
@ -31,6 +31,7 @@
 #include "CNavDTD.h"
 #include "prenv.h"  //this is here for debug reasons...
 #include "plstr.h"
+#include <fstream.h>
 #ifdef XP_PC
 #include <direct.h> //this is here for debug reasons...
 #endif
@ -40,11 +41,13 @@ static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
 static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);

 static const char* kNullURL = "Error: Null URL given";
+static const char* kNullFilename= "Error: Null filename given";
 static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
 static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

-static char* gVerificationOutputDir=0;
+static char*  gVerificationOutputDir=0;
+static int    rickGDebug=0;

 /**
 *  This method is defined in nsIParser. It is used to 
@ -456,92 +459,198 @@ PRBool nsHTMLParser::IterateTokens() {
  return result;
 }

-
 /**
- *  This is the main controlling routine in the parsing process. 
- *  Note that it may get called multiple times for the same scanner, 
- *  since this is a pushed based system, and all the tokens may 
- *  not have been consumed by the scanner during a given invocation 
- *  of this method. 
- *
- *  @update  gess 3/25/98
- *  @param   aFilename -- const char* containing file to be parsed.
- *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ *  
+ *  
+ *  @update  gess 5/13/98
+ *  @param   
+ *  @return  
 */
-PRBool nsHTMLParser::Parse(nsIURL* aURL){
-  eParseMode  theMode=eParseMode_navigator;
+eParseMode DetermineParseMode() {
  const char* theModeStr= PR_GetEnv("PARSE_MODE");
  const char* other="other";
+  eParseMode  result=eParseMode_navigator;

  if(theModeStr) 
    if(0==nsCRT::strcasecmp(other,theModeStr))
-      theMode=eParseMode_other;
-
-  return Parse(aURL,theMode);
+      result=eParseMode_other;    
+  return result;
 }

+
 /**
- *  This is the main controlling routine in the parsing process. 
- *  Note that it may get called multiple times for the same scanner, 
- *  since this is a pushed based system, and all the tokens may 
- *  not have been consumed by the scanner during a given invocation 
- *  of this method. 
- *
- *  @update  gess 3/25/98
- *  @param   aFilename -- const char* containing file to be parsed.
- *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ *  
+ *  
+ *  @update  gess 5/13/98
+ *  @param   
+ *  @return  
 */
-PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
-  NS_PRECONDITION(0!=aURL,kNullURL);
-  
-  PRBool result=PR_FALSE;
-  if(aURL) {
+void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& aDTD) {
+  switch(aMode) {
+    case eParseMode_navigator:
+      aDelegate=new CNavDelegate(); break;
+    case eParseMode_other:
+      aDelegate=new COtherDelegate(); break;
+    default:
+      break;
+  }
+  if(aDelegate)
+    aDTD=aDelegate->GetDTD();
+}

-    result=PR_TRUE;
-    mParseMode=aMode;
-    ITokenizerDelegate* theDelegate=0;
-    
-    mDTD=0;
-    switch(mParseMode) {
-      case eParseMode_navigator:
-        theDelegate=new CNavDelegate();
-        if(theDelegate)
-          mDTD=theDelegate->GetDTD();
-        break;
-      case eParseMode_other:
-        theDelegate=new COtherDelegate();
-        if(theDelegate)
-          mDTD=theDelegate->GetDTD();
-        break;
-      default:
-        break;
-    }
-    if(!theDelegate) {
-      NS_ERROR(kNullTokenizer);
-      return PR_FALSE;
-    }

-    if(mDTD)
-      mDTD->SetParser(this);
-    mTokenizer=new CTokenizer(aURL, theDelegate, mParseMode);
+/**
+ *  This DEBUG ONLY method is used to simulate a network-based
+ *  i/o model where data comes in incrementally.
+ *  
+ *  @update  gess 5/13/98
+ *  @param   aFilename is the name of the disk file to use for testing.
+ *  @return  error code (kNoError means ok)
+ */
+PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
+  PRInt32   result=kBadFilename;
+  fstream*  mFileStream;
+  nsString  theBuffer;
+  PRInt32   iter=-1;
+  const int kBufSize=10;
+
+  mFileStream=new fstream(aFilename,ios::in|ios::binary);
+  if(mFileStream) {
+    result=kNoError;
+    while((kNoError==result) || (kInterrupted==result)) {
+      //read some data from the file...
+
+      char buf[kBufSize];
+      buf[kBufSize]=0;
+
+      if(mFileStream) {
+        mFileStream->read(buf,kBufSize);
+        PRInt32 numread=mFileStream->gcount();
+        if(numread>0) {
+          theBuffer.Truncate();
+          theBuffer.Append(buf);
+          mTokenizer->Append(theBuffer);
+          result=ResumeParse(++iter);
+        }
+      }

-    mSink->WillBuildModel();
-#ifdef __INCREMENTAL 
-    int iter=-1;
-    for(;;){
-      mSink->WillResume();
-      mTokenizer->TokenizeAvailable(++iter);
-      mSink->WillInterrupt();
    }
-#else
-    mTokenizer->Tokenize();
-#endif
-    result=IterateTokens();
-    mSink->DidBuildModel();
+    mFileStream->close();
+    delete mFileStream;
  }
  return result;
 }

+/**
+ *  This is the main controlling routine in the parsing process. 
+ *  Note that it may get called multiple times for the same scanner, 
+ *  since this is a pushed based system, and all the tokens may 
+ *  not have been consumed by the scanner during a given invocation 
+ *  of this method. 
+ *
+ *  @update  gess 3/25/98
+ *  @param   aFilename -- const char* containing file to be parsed.
+ *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ */
+PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
+  NS_PRECONDITION(0!=aFilename,kNullFilename);
+
+  PRInt32 status=kBadFilename;
+
+  mIncremental=aIncremental;
+  mParseMode=DetermineParseMode();  
+
+  if(aFilename) {
+
+    GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
+    if(mDelegate) {
+
+      if(mDTD)
+        mDTD->SetParser(this);
+
+      mSink->WillBuildModel();
+
+      //ok, time to create our tokenizer and begin the process
+      if(aIncremental) {
+        mTokenizer=new CTokenizer(mDelegate,mParseMode);
+        status=ParseFileIncrementally(aFilename);
+      }
+      else {
+        //ok, time to create our tokenizer and begin the process
+        mTokenizer=new CTokenizer(aFilename,mDelegate,mParseMode);
+        status=ResumeParse(0);
+      }
+      mSink->DidBuildModel();
+    }//if
+  }
+  return status;
+}
+
+/**
+ *  This is the main controlling routine in the parsing process. 
+ *  Note that it may get called multiple times for the same scanner, 
+ *  since this is a pushed based system, and all the tokens may 
+ *  not have been consumed by the scanner during a given invocation 
+ *  of this method. 
+ *
+ *  @update  gess 3/25/98
+ *  @param   aFilename -- const char* containing file to be parsed.
+ *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ */
+PRInt32 nsHTMLParser::Parse(nsIURL* aURL,PRBool aIncremental ){
+  NS_PRECONDITION(0!=aURL,kNullURL);
+
+  PRInt32 status=kBadURL;
+
+  if(rickGDebug)
+    status=Parse("c:/temp/temp.html",PR_TRUE);
+
+  mIncremental=aIncremental;
+  mParseMode=DetermineParseMode();  
+
+  if(aURL) {
+
+    GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
+    if(mDelegate) {
+
+      if(mDTD)
+        mDTD->SetParser(this);
+
+      mSink->WillBuildModel();
+
+      //ok, time to create our tokenizer and begin the process
+      if(aIncremental) {
+        mTokenizer=new CTokenizer(mDelegate,mParseMode);
+        status=aURL->Open(this);
+      }
+      else {
+        mTokenizer=new CTokenizer(aURL,mDelegate,mParseMode);
+        status=ResumeParse(0);
+        mSink->DidBuildModel();
+      }
+    }//if
+  }
+  return status;
+}
+
+/**
+ * Call this method if all you want to do is parse 1 string full of HTML text.
+ *
+ * @update	gess5/11/98
+ * @param   anHTMLString contains a string-full of real HTML
+ * @param   appendTokens tells us whether we should insert tokens inline, or append them.
+ * @return  TRUE if all went well -- FALSE otherwise
+ */
+PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
+  PRInt32 result=kNoError;
+  
+  mSink->WillBuildModel();
+  mTokenizer->Append(aSourceBuffer);
+  result=ResumeParse(0);
+  mSink->DidBuildModel();
+  
+  return result;
+}

 /**
 *  This routine is called to cause the parser to continue
@ -553,17 +662,21 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
 *  @param   
 *  @return  PR_TRUE if parsing concluded successfully.
 */
-PRBool nsHTMLParser::ResumeParse() {
+PRInt32 nsHTMLParser::ResumeParse(PRInt32 anIteration) {
+  PRInt32 result=kNoError;
+
  mSink->WillResume();
-  int iter=0;
-  PRInt32 errcode=mTokenizer->TokenizeAvailable(iter);
-  if(kInterrupted==errcode)
-    mSink->WillInterrupt();
-  PRBool result=IterateTokens();
+  if(kNoError==result) {
+    result=mTokenizer->Tokenize(anIteration);
+    if(kInterrupted==result)
+      mSink->WillInterrupt();
+
+    if(!rickGDebug)
+      IterateTokens();
+  }
  return result;
 }

-
 /**
 * 
 * @update  gess4/22/98
@ -1388,5 +1501,64 @@ PRBool nsHTMLParser::ReduceContextStackFor(PRInt32 aChildTag){
 }


+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::GetBindInfo(void){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnStartBinding(void){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnStopBinding(void){
+  nsresult result=0;
+  return result;
+}


--- a/mozilla/htmlparser/src/nsHTMLParser.h
+++ b/mozilla/htmlparser/src/nsHTMLParser.h
@ -61,6 +61,9 @@
 #include "nsParserNode.h"
 #include "nsTokenHandler.h"
 #include "nsParserTypes.h"
+#include "nsIURL.h"
+#include "nsIStreamListener.h"
+#include "nsITokenizerDelegate.h"


 #define NS_IHTML_PARSER_IID      \
@ -75,7 +78,7 @@ class nsIURL;
 class nsIDTD;


-class nsHTMLParser : public nsIParser {
+class nsHTMLParser : public nsIParser, public nsIStreamListener {
            
  public:
 friend class CTokenHandler;
@ -104,14 +107,6 @@ friend class CTokenHandler;
     */
    virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);

-    /**
-     * Cause parser to parse input from given URL
-     * @update	gess5/11/98
-     * @param   aURL is a descriptor for source document
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRBool Parse(nsIURL* aURL);
-
    /**
     * Cause parser to parse input from given URL in given mode
     * @update	gess5/11/98
@ -119,14 +114,31 @@ friend class CTokenHandler;
     * @param   aMode is the desired parser mode (Nav, other, etc.)
     * @return  TRUE if all went well -- FALSE otherwise
     */
-    virtual PRBool Parse(nsIURL* aURL,eParseMode aMode);
+    virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE);
+
+    /**
+     * Cause parser to parse input from given file in given mode
+     * @update	gess5/11/98
+     * @param   aFilename is a path for file document
+     * @param   aMode is the desired parser mode (Nav, other, etc.)
+     * @return  TRUE if all went well -- FALSE otherwise
+     */
+    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
+
+    /**
+     * @update	gess5/11/98
+     * @param   anHTMLString contains a string-full of real HTML
+     * @param   appendTokens tells us whether we should insert tokens inline, or append them.
+     * @return  TRUE if all went well -- FALSE otherwise
+     */
+    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);

    /**
     * This method gets called (automatically) during incremental parsing
     * @update	gess5/11/98
     * @return  TRUE if all went well, otherwise FALSE
     */
-    virtual PRBool ResumeParse();
+    virtual PRInt32 ResumeParse(PRInt32 anIteration);

    /**
     * Retrieve ptr to internal context vector stack
@ -230,6 +242,15 @@ friend class CTokenHandler;
     */
    PRBool HandleStyleToken(CToken* aToken);

+      //*********************************************
+      // These methods are callback methods used by
+      // net lib to let us know about our inputstream.
+      //*********************************************
+    NS_IMETHOD GetBindInfo(void);
+    NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg);
+    NS_IMETHOD OnStartBinding(void);
+    NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
+    NS_IMETHOD OnStopBinding(void);

 protected:

@ -485,6 +506,10 @@ protected:
     */
    PRBool CreateContextStackFor(PRInt32 aChildTag);

+private:
+    PRInt32 ParseFileIncrementally(const char* aFilename);  //XXX ONLY FOR DEBUG PURPOSES...
+
+protected:
    //*********************************************
    // And now, some data members...
    //*********************************************
@ -502,6 +527,8 @@ protected:
    nsIDTD*             mDTD;
    eParseMode          mParseMode;
    PRBool              mHasOpenForm;
+    PRBool              mIncremental;
+    ITokenizerDelegate* mDelegate;
 };


--- a/mozilla/htmlparser/src/nsIParser.h
+++ b/mozilla/htmlparser/src/nsIParser.h
@ -40,6 +40,7 @@


 class nsIContentSink;
+class nsString;

 /**
 *  This class defines the iparser interface. This XPCOM
@ -52,10 +53,14 @@ class nsIParser : public nsISupports {
  public:

    virtual nsIContentSink* SetContentSink(nsIContentSink* aContentSink)=0;
-    virtual PRBool          Parse(nsIURL* aURL)=0;
-    virtual PRBool          ResumeParse()=0;
-    virtual PRInt32         GetStack(PRInt32* aStackPtr)=0;
-    virtual PRBool          HasOpenContainer(PRInt32 aContainer) const=0;
+
+    virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE)=0;
+    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
+    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
+
+    virtual PRInt32 ResumeParse(PRInt32 anIterator)=0;
+    virtual PRInt32 GetStack(PRInt32* aStackPtr)=0;
+    virtual PRBool  HasOpenContainer(PRInt32 aContainer) const=0;
 };

 extern NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult);
--- a/mozilla/htmlparser/src/nsParserTypes.h
+++ b/mozilla/htmlparser/src/nsParserTypes.h
@ -37,13 +37,16 @@ enum  eParseMode {
  
  eParseMode_unknown=0,
  eParseMode_navigator,
-  eParseMode_other
+  eParseMode_other,
+  eParseMode_autodetect
 };

 const PRInt32   kEOF          = 1000000L;
+const PRInt32   kBadFilename  = -4;
+const PRInt32   kBadURL       = -3;
+const PRInt32   kInterrupted  = -2;
 const PRInt32   kNotFound     = -1;
 const PRInt32   kNoError      = 0;
-const PRInt32   kInterrupted  = 2;

 const PRUint32  kNewLine      = '\n';
 const PRUint32  kCR           = '\r';
--- a/mozilla/htmlparser/src/nsScanner.cpp
+++ b/mozilla/htmlparser/src/nsScanner.cpp
@ -22,9 +22,8 @@
 #include "nsIURL.h" 
 #include "nsDebug.h"

-const char* gURLRef;
-const char* kBadHTMLText1="<HTML><BODY><H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
-const char* kBadHTMLText2="</BODY></HTML>";
+const char* gURLRef=0;
+const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";

 #ifdef __INCREMENTAL
 const int   kBufsize=1;
@ -33,31 +32,63 @@ const int   kBufsize=64;
 #endif

 /**
- *  default constructor
- *  
- *  @update  gess 3/25/98
- *  @param   aURL -- pointer to URL to be loaded
+ *  Use this constructor if you want an incremental (callback)
+ *  based input stream.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
+ *  @return  
+ */
+CScanner::CScanner(eParseMode aMode) : mBuffer("") {
+  mOffset=0;
+  mMarkPos=-1;
+  mTotalRead=0;
+  mParseMode=aMode;
+  mNetStream=0;
+  mFileStream=0;
+  mIncremental=PR_TRUE;
+}
+
+/**
+ *  Use this constructor if you want i/o to be file based.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
+ *  @return  
+ */
+CScanner::CScanner(const char* aFilename,eParseMode aMode) : mBuffer("") {
+  NS_ASSERTION(0!=aFilename,"Error: Null filename!");
+  mOffset=0;
+  mMarkPos=-1;
+  mTotalRead=0;
+  mParseMode=aMode;
+  mNetStream=0;
+  mIncremental=PR_FALSE;
+  mFileStream=new fstream(aFilename,ios::in|ios::binary);
+}
+
+/**
+ *  Use this constructor if you want i/o to be based on a
+ *  non-incremental netstream.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
 *  @return  
 */
 CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
  NS_ASSERTION(0!=aURL,"Error: Null URL!");
  mOffset=0;
-  mStream=0;
+  mMarkPos=-1;
  mTotalRead=0;
  mParseMode=aMode;
-  if(aURL) {
-
-    gURLRef=aURL->GetSpec();
-
-#ifdef  __INCREMENTAL
-    mStream=new fstream("c:/temp/temp.html",ios::in|ios::binary);
-#else
-    int error;
-    mStream=aURL->Open(&error);
-#endif
-  }
+  mFileStream=0;
+  PRInt32 error=0;
+  mIncremental=PR_FALSE;
+  mNetStream=aURL->Open(&error);
+  gURLRef=aURL->GetSpec();
 }

+
 /**
 *  default destructor
 *  
@ -66,19 +97,107 @@ CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
 *  @return  
 */
 CScanner::~CScanner() {
-#ifdef __INCREMENTAL
-  mStream->close();
-  delete mStream;
-  mStream=0;
-#else
-  if(mStream) {
-    mStream->Close();
-    mStream->Release();
-    mStream=0;
+  if(mFileStream) {
+    mFileStream->close();
+    delete mFileStream;
  }
-#endif
+  else if(mNetStream) {
+    mNetStream->Close();
+    mNetStream->Release();
+  }
+  mFileStream=0;
+  mNetStream=0;
+  gURLRef=0;
 }

+/**
+ *  Resets current offset position of input stream to marked position. 
+ *  This allows us to back up to this point if the need should arise, 
+ *  such as when tokenization gets interrupted.
+ *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+ *
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+PRInt32 CScanner::RewindToMark(void){
+  mOffset=mMarkPos;
+  return mOffset;
+}
+
+/**
+ *  Records current offset position in input stream. This allows us
+ *  to back up to this point if the need should arise, such as when
+ *  tokenization gets interrupted.
+ *
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+PRInt32 CScanner::Mark(void){
+  mMarkPos=mOffset;
+  return mMarkPos;
+}
+
+
+/**
+ *  
+ *  @update  gess 5/12/98
+ */
+void _PreCompressBuffer(nsString& aBuffer,PRInt32& anOffset,PRInt32& aMarkPos){
+  //To determine how much of our internal buffer to truncate, 
+  //we should check mMarkPos. That represents the point at which
+  //we've guaranteed the client we can back up to, so make sure
+  //you don't lose any of the data beyond that point.
+  if((anOffset!=aMarkPos) && (0<=aMarkPos)) {
+    if(aMarkPos>0) {
+      aBuffer.Cut(0,aMarkPos);
+      if(anOffset>aMarkPos)
+        anOffset-=aMarkPos;
+    }
+  }
+  else aBuffer.Truncate();
+  aMarkPos=0;
+}
+
+
+/**
+ *  This method should only be called by the parser when
+ *  we're doing incremental i/o over the net.
+ *  
+ *  @update  gess 5/12/98
+ *  @param   aBuffer contains next blob of i/o data
+ *  @param   aSize contains size of buffer
+ *  @return  0 if all went well, otherwise error code.
+ */
+PRInt32 CScanner::IncrementalAppend(const char* aBuffer,PRInt32 aSize){
+  NS_ASSERTION(((!mFileStream) && (!mNetStream)),"Error: Should only be called during incremental net i/o!");
+
+  PRInt32 result=0;
+  if((!mFileStream) && (!mNetStream)) {
+
+    _PreCompressBuffer(mBuffer,mOffset,mMarkPos);
+
+    //now that the buffer is (possibly) shortened, let's append the new data.
+    if(0<aSize) {
+      mBuffer.Append(aBuffer,aSize);
+      mTotalRead+=aSize;
+    }
+  }
+  return result;
+}
+
+/** 
+ * Grab data from underlying stream.
+ *
+ * @update  gess4/3/98
+ * @return  error code
+ */
+PRBool CScanner::Append(nsString& aBuffer) {
+  _PreCompressBuffer(mBuffer,mOffset,mMarkPos);
+  mBuffer.Append(aBuffer);
+  return PR_TRUE;
+}

 /** 
 * Grab data from underlying stream.
@ -89,55 +208,63 @@ CScanner::~CScanner() {
 PRInt32 CScanner::FillBuffer(void) {
  PRInt32 anError=0;

-  mBuffer.Truncate();
-  if(!mStream) {
+  _PreCompressBuffer(mBuffer,mOffset,mMarkPos);
+
+  if((!mIncremental) && (!mNetStream) && (!mFileStream)) {
    //This is DEBUG code!!!!!!  XXX DEBUG XXX
    //If you're here, it means someone tried to load a
    //non-existent document. So as a favor, we emit a
    //little bit of HTML explaining the error.
    if(0==mTotalRead) {
-      mBuffer.Append((const char*)kBadHTMLText1);
+      mBuffer.Append((const char*)kBadHTMLText);
      mBuffer.Append((const char*)gURLRef);
-      mBuffer.Append((const char*)kBadHTMLText2);
    }
    else return 0;
  }
-  else {
+  else if(!mIncremental) {
    PRInt32 numread=0;
    char buf[kBufsize+1];
    buf[kBufsize]=0;

-#ifdef __INCREMENTAL 
-    mStream->read(buf,kBufsize);
-    numread=mStream->gcount();
-#else
-    numread=mStream->Read(&anError,buf,0,kBufsize);
-#endif
+    if(mFileStream) {
+      mFileStream->read(buf,kBufsize);
+      numread=mFileStream->gcount();
+    }
+    else if(mNetStream) {
+      numread=mNetStream->Read(&anError,buf,0,kBufsize);
+      if(1==anError)
+        anError=kEOF;
+    }
+    mOffset=mBuffer.Length();
    if((0<numread) && (0==anError))
      mBuffer.Append((const char*)buf,numread);
+    mTotalRead+=mBuffer.Length();
  }
-  mTotalRead+=mBuffer.Length();
+  else anError=kInterrupted;
+
  return anError;
 }

 /**
 *  determine if the scanner has reached EOF
 *  
- *  @update  gess 3/25/98
+ *  @update  gess 5/12/98
 *  @param   
- *  @return  PR_TRUE upon eof condition
+ *  @return  0=!eof 1=eof kInterrupted=interrupted
 */
-PRBool CScanner::Eof() {
+PRInt32 CScanner::Eof() {
  PRInt32 theError=0;
+
  if(mOffset>=mBuffer.Length()) {
-    theError=FillBuffer();
-    mOffset=0;
+    if(!mIncremental)
+      theError=FillBuffer();  
+    else return kInterrupted;
  }
-  PRBool result=PR_TRUE;
-  if(0==theError) {
-    result=PRBool(0==mBuffer.Length());
-  }
-  return result;
+  
+  if(0==theError) 
+    return (0==mBuffer.Length());
+
+  return theError;
 }

 /**
@ -148,11 +275,12 @@ PRBool CScanner::Eof() {
 *  @return  error code reflecting read status
 */
 PRInt32 CScanner::GetChar(PRUnichar& aChar) {
-  if(!Eof()) {
+  PRInt32 result=Eof();
+  if(!result) {
    aChar=mBuffer[mOffset++];
-    return kNoError;
+    result=kNoError;
  }
-  return kEOF;
+  return result;
 }


@ -165,11 +293,12 @@ PRInt32 CScanner::GetChar(PRUnichar& aChar) {
 *  @return  
 */
 PRInt32 CScanner::Peek(PRUnichar& aChar){
-  if(!Eof()) {
+  PRInt32 result=Eof();
+  if(!result) {
    aChar=mBuffer[mOffset];        
-    return kNoError;
+    result=kNoError;
  }
-  return kEOF;
+  return result;
 }


@ -181,7 +310,9 @@ PRInt32 CScanner::Peek(PRUnichar& aChar){
 *  @return  error code
 */
 PRInt32 CScanner::PutBack(PRUnichar aChar) {
-  mOffset--;
+  if(mOffset>0)
+    mOffset--;
+  else mBuffer.Insert(aChar,0);
  return kNoError;
 }

@ -301,8 +432,8 @@ PRInt32 CScanner::ReadUntil(nsString& aString,nsString& aTerminalSet,PRBool addT
  PRUnichar ch=0;
  PRInt32   result=kNoError;

-  while(!Eof()) {
-     result=GetChar(ch);
+  while(!result) {
+    result=GetChar(ch);
    if(kNoError==result) {
      PRInt32 pos=aTerminalSet.Find(ch);
      if(kNotFound!=pos) {
--- a/mozilla/htmlparser/src/nsScanner.h
+++ b/mozilla/htmlparser/src/nsScanner.h
@ -43,7 +43,36 @@ class ifstream;

 class CScanner {
  public:
+
+      /**
+       *  Use this constructor if you want an incremental (callback)
+       *  based input stream.
+       *
+       *  @update  gess 5/12/98
+       *  @param   aMode represents the parser mode (nav, other)
+       *  @return  
+       */
+      CScanner(eParseMode aMode=eParseMode_navigator);
+      
+      /**
+       *  Use this constructor if you want i/o to be based on a
+       *  non-incremental netstream.
+       *
+       *  @update  gess 5/12/98
+       *  @param   aMode represents the parser mode (nav, other)
+       *  @return  
+       */
      CScanner(nsIURL* aURL,eParseMode aMode=eParseMode_navigator);
+
+      /**
+       *  Use this constructor if you want i/o to be file based.
+       *
+       *  @update  gess 5/12/98
+       *  @param   aMode represents the parser mode (nav, other)
+       *  @return  
+       */
+      CScanner(const char* aFilename,eParseMode aMode=eParseMode_navigator);
+
      ~CScanner();

      /**
@ -117,7 +146,7 @@ class CScanner {
       *  @update  gess 3/25/98
       *  @return  PR_TRUE upon eof condition
       */
-      PRBool Eof(void);
+      PRInt32 Eof(void);

      /**
       *  Consume characters until you find the terminal char
@ -153,6 +182,48 @@ class CScanner {
       */
      PRInt32 ReadWhile(nsString& aString,nsString& anInputSet,PRBool addTerminal);

+      /**
+       *  Records current offset position in input stream. This allows us
+       *  to back up to this point if the need should arise, such as when
+       *  tokenization gets interrupted.
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      PRInt32 Mark(void);
+
+      /**
+       *  Resets current offset position of input stream to marked position. 
+       *  This allows us to back up to this point if the need should arise, 
+       *  such as when tokenization gets interrupted.
+       *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      PRInt32 RewindToMark(void);
+
+
+      /**
+       *  
+       *  
+       *  @update  gess 5/13/98
+       *  @param   
+       *  @return  
+       */
+      PRBool Append(nsString& aBuffer);
+
+      /**
+       *  
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      PRInt32 IncrementalAppend(const char* aBuffer,PRInt32 aSize);
+
      static void SelfTest();

  protected:
@ -167,15 +238,15 @@ class CScanner {
       */
      PRInt32 FillBuffer(void);

-#ifdef __INCREMENTAL
-      fstream*        mStream;
-#else
-      nsIInputStream* mStream;
-#endif
+
+      fstream*        mFileStream;
+      nsIInputStream* mNetStream;
      nsString        mBuffer;
      PRInt32         mOffset;
+      PRInt32         mMarkPos;
      PRInt32         mTotalRead;
      eParseMode      mParseMode;
+      PRBool          mIncremental;
 };

 #endif
--- a/mozilla/htmlparser/src/nsTokenizer.cpp
+++ b/mozilla/htmlparser/src/nsTokenizer.cpp
@ -39,6 +39,35 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo
  mParseMode=aMode;
 }

+/**
+ *  Default constructor
+ *  
+ *  @update gess 3/25/98
+ *  @param  aFilename -- name of file to be tokenized
+ *  @param  aDelegate -- ref to delegate to be used to tokenize
+ *  @return 
+ */
+CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
+  mTokenDeque() {
+  mDelegate=aDelegate;
+  mScanner=new CScanner(aFilename,aMode);
+  mParseMode=aMode;
+}
+
+/**
+ *  Default constructor
+ *  
+ *  @update gess 3/25/98
+ *  @param  aFilename -- name of file to be tokenized
+ *  @param  aDelegate -- ref to delegate to be used to tokenize
+ *  @return 
+ */
+CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
+  mTokenDeque() {
+  mDelegate=aDelegate;
+  mScanner=new CScanner(aMode);
+  mParseMode=aMode;
+}

 /**
 *  default destructor
@ -54,6 +83,19 @@ CTokenizer::~CTokenizer() {
 }


+/**
+ *  
+ *  
+ *  @update  gess 5/13/98
+ *  @param   
+ *  @return  
+ */
+PRBool CTokenizer::Append(nsString& aBuffer) {
+  if(mScanner)
+    return mScanner->Append(aBuffer);
+  return PR_FALSE;
+}
+
 /**
 * Retrieve a reference to the internal token deque.
 *
@ -105,31 +147,31 @@ PRBool CTokenizer::WillTokenize(PRBool aIncremental){
 }

 /**
- *  This is the primary control routine. It iteratively
- *  consumes tokens until an error occurs or you run out
- *  of data.
 *  
 *  @update  gess 3/25/98
- *  @return  error code 
+ *  @return  TRUE if it's ok to proceed
 */
-PRInt32 CTokenizer::Tokenize(void) {
+PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
  CToken* theToken=0;
  PRInt32 result=kNoError;
+  
+  WillTokenize(PR_TRUE);
+
+  while(kNoError==result) {
+    result=GetToken(theToken);
+    if(theToken && (kNoError==result)) {

-  if(WillTokenize(PR_FALSE)) {
-    do {
-      result=GetToken(theToken);
-      if(theToken) {
 #ifdef VERBOSE_DEBUG
        theToken->DebugDumpToken(cout);
 #endif
-        if(mDelegate->WillAddToken(*theToken)) {
-          mTokenDeque.Push(theToken);
-        }
+      if(mDelegate->WillAddToken(*theToken)) {
+        mTokenDeque.Push(theToken);
      }
-    } while(0!=theToken);
-    result=DidTokenize(PR_FALSE);
-  }
+    }
+  } 
+  if(kEOF==result)
+    result=kNoError;
+  DidTokenize(PR_TRUE);
  return result;
 }

@ -141,20 +183,33 @@ PRInt32 CTokenizer::Tokenize(void) {
 *  @update  gess 3/25/98
 *  @return  error code 
 */
-PRInt32 CTokenizer::TokenizeAvailable(int anIteration) {
+PRInt32 CTokenizer::Tokenize(int anIteration) {
  CToken* theToken=0;
  PRInt32 result=kNoError;
  PRBool  done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
  

-  while((PR_FALSE==done) && (kInterrupted!=kInterrupted)) {
+  while((PR_FALSE==done) && (kNoError==result)) {
+    mScanner->Mark();
    result=GetToken(theToken);
-    if(theToken) {
-      if(mDelegate->WillAddToken(*theToken)) {
-        mTokenDeque.Push(theToken);
+    if(kNoError==result) {
+      if(theToken) {
+
+  #ifdef VERBOSE_DEBUG
+          theToken->DebugDumpToken(cout);
+  #endif
+
+        if(mDelegate->WillAddToken(*theToken)) {
+          mTokenDeque.Push(theToken);
+        }
      }
+
+    }
+    else {
+      if(theToken)
+        delete theToken;
+      mScanner->RewindToMark();
    }
-    else done=PR_TRUE;
  } 
  if((PR_TRUE==done)  && (kInterrupted!=result))
    DidTokenize(PR_TRUE);
--- a/mozilla/htmlparser/src/nsTokenizer.h
+++ b/mozilla/htmlparser/src/nsTokenizer.h
@ -50,19 +50,13 @@ class nsIURL;

 class  CTokenizer {
  public:
-    CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode);
+
+    CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
+    CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
+    CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
+
    ~CTokenizer();
    
-    /**
-     *  This control routine causes the entire stream to be
-     *  tokenized. You probably want to call TokenizeAvailable()
-     *  instead (for incremental tokenization).
-     *  
-     *  @update  gess 3/25/98
-     *  @return  TRUE if it's ok to proceed
-     */
-    PRInt32 Tokenize(void);
-
    /**
     *  This method incrementally tokenizes as much content as
     *  it can get its hands on.
@ -70,7 +64,14 @@ class  CTokenizer {
     *  @update  gess 3/25/98
     *  @return  TRUE if it's ok to proceed
     */
-    PRInt32 TokenizeAvailable(int anIteration); //your friendly incremental version
+    PRInt32 Tokenize(int anIteration); //your friendly incremental version
+
+    /**
+     *  
+     *  @update  gess 3/25/98
+     *  @return  TRUE if it's ok to proceed
+     */
+    PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE); 

    /**
     *  Cause the tokenizer to consume the next token, and 
@ -98,6 +99,23 @@ class  CTokenizer {
     */
    nsDeque& GetDeque(void);

+    /**
+     *
+     * @update  gess 4/20/98
+     * @return  deque reference
+     */
+    PRBool Append(nsString& aBuffer);
+
+
+    /**
+     *  
+     *  
+     *  @update  gess 5/13/98
+     *  @param   
+     *  @return  
+     */
+    PRBool SetBuffer(nsString& aBuffer);
+
    /**
     *  This debug routine is used to cause the tokenizer to
     *  iterate its token list, asking each token to dump its
--- a/mozilla/parser/htmlparser/src/CNavDelegate.cpp
+++ b/mozilla/parser/htmlparser/src/CNavDelegate.cpp
@ -81,42 +81,49 @@ nsIDTD* CNavDelegate::GetDTD(void) const{
 *  and we know we're at the start of some kind of tagged 
 *  element. We don't know yet if it's a tag or a comment.
 *  
- *  @update  gess 3/25/98
- *  @param   
- *  @return  
+ *  @update  gess 5/12/98
+ *  @param   aChar is the last char read
+ *  @param   aScanner is represents our input source
+ *  @param   aToken is the out arg holding our new token
+ *  @return  error code (may return kInterrupted).
 */
 PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {

  nsAutoString empty("");
  PRInt32 result=aScanner.GetChar(aChar);

-  switch(aChar) {
-    case kForwardSlash:
-      PRUnichar ch; 
-      result=aScanner.Peek(ch);
-      if(nsString::IsAlpha(ch))
-        aToken=new CEndToken(empty);
-      else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
-      break;
-    case kExclamation:
-      aToken=new CCommentToken(empty);
-      break;
-    default:
-      if(nsString::IsAlpha(aChar))
-        return ConsumeStartTag(aChar,aScanner,aToken);
-      else if(kEOF!=aChar) {
-        nsAutoString temp("<");
-        return ConsumeText(temp,aScanner,aToken);
-      }
-  } //switch
+  if(kNoError==result) {

-  if(0!=aToken) {
-    result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
-    if(result) {
-      delete aToken;
-      aToken=0;
-    }
-  }
+    switch(aChar) {
+      case kForwardSlash:
+        PRUnichar ch; 
+        result=aScanner.Peek(ch);
+        if(kNoError==result) {
+          if(nsString::IsAlpha(ch))
+            aToken=new CEndToken(empty);
+          else aToken=new CCommentToken(empty); //Special case: </ ...> is treated as a comment
+        }//if
+        break;
+      case kExclamation:
+        aToken=new CCommentToken(empty);
+        break;
+      default:
+        if(nsString::IsAlpha(aChar))
+          return ConsumeStartTag(aChar,aScanner,aToken);
+        else if(kEOF!=aChar) {
+          nsAutoString temp("<");
+          return ConsumeText(temp,aScanner,aToken);
+        }
+    } //switch
+
+    if((0!=aToken) && (kNoError==result)) {
+      result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
+      if(result) {
+        delete aToken;
+        aToken=0;
+      }
+    } //if
+  } //if
  return result;
 }

@ -131,20 +138,26 @@ PRInt32 CNavDelegate::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aTo
 */
 PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner) {
  PRBool done=PR_FALSE;
-  nsAutoString as("");
  PRInt32 result=kNoError;
+  nsAutoString as("");
+
  while((!done) && (result==kNoError)) {
-     CToken* theToken= new CAttributeToken(as);
-      if(theToken){
-        result= theToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
+    CToken* theToken= new CAttributeToken(as);
+    if(theToken){
+      result=theToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
+      if(kNoError==result){
        mTokenDeque.Push(theToken);
-      }
-    aScanner.Peek(aChar);
-    if(aChar==kGreaterThan) { //you just ate the '>'
-      aScanner.GetChar(aChar); //skip the '>'
-      done=PR_TRUE;
-    }
-  }
+      }//if
+    }//if
+    
+    if(kNoError==result){
+      result=aScanner.Peek(aChar);
+      if(aChar==kGreaterThan) { //you just ate the '>'
+        aScanner.GetChar(aChar); //skip the '>'
+        done=PR_TRUE;
+      }//if
+    }//if
+  }//while
  return result;
 }

@ -166,8 +179,7 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
  endTag.Append(aString);
  endTag.Append(">");
  aToken=new CSkippedContentToken(endTag);
-  PRInt32 result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
-  return result;
+  return aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
 }

 /**
@ -183,38 +195,43 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a
 PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
  aToken=new CStartToken(nsAutoString(""));
  PRInt32 result=kNoError;
+
  if(aToken) {
    result= aToken->Consume(aChar,aScanner);  //tell new token to finish consuming text...    
-    if(((CStartToken*)aToken)->IsAttributed()) {
-      result=ConsumeAttributes(aChar,aScanner);
-    }
-    //now that that's over with, we have one more problem to solve.
-    //In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
-    //consume all the content itself.
-    nsString& str=aToken->GetText();
-    CToken*   skippedToken=0;
-    if(str.EqualsIgnoreCase("SCRIPT") ||
-       str.EqualsIgnoreCase("STYLE") ||
-       str.EqualsIgnoreCase("TITLE") ||
-       str.EqualsIgnoreCase("TEXTAREA")) {
-      result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
-      
-      if(skippedToken){
-          //now we strip the ending sequence from our new SkippedContent token...
-        PRInt32 slen=str.Length()+3;
-        nsString& skippedText=skippedToken->GetText();
-      
-        skippedText.Cut(skippedText.Length()-slen,slen);
-        mTokenDeque.Push(skippedToken);
+    if(kNoError==result) {
+      if(((CStartToken*)aToken)->IsAttributed()) {
+        result=ConsumeAttributes(aChar,aScanner);
+      }
+      //now that that's over with, we have one more problem to solve.
+      //In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
+      //consume all the content itself.
+      if(kNoError==result) {
+        nsString& str=aToken->GetText();
+        CToken*   skippedToken=0;
+        if(str.EqualsIgnoreCase("SCRIPT") ||
+           str.EqualsIgnoreCase("STYLE") ||
+           str.EqualsIgnoreCase("TITLE") ||
+           str.EqualsIgnoreCase("TEXTAREA")) {
+          result=ConsumeContentToEndTag(str,aChar,aScanner,skippedToken);
    
-        //In the case that we just read a given tag, we should go and
-        //consume all the tag content itself (and throw it all away).
+          if((kNoError==result) && skippedToken){
+              //now we strip the ending sequence from our new SkippedContent token...
+            PRInt32 slen=str.Length()+3;
+            nsString& skippedText=skippedToken->GetText();
+    
+            skippedText.Cut(skippedText.Length()-slen,slen);
+            mTokenDeque.Push(skippedToken);
+  
+            //In the case that we just read a given tag, we should go and
+            //consume all the tag content itself (and throw it all away).

-        CEndToken* endtoken=new CEndToken(str);
-        mTokenDeque.Push(endtoken);
+            CEndToken* endtoken=new CEndToken(str);
+            mTokenDeque.Push(endtoken);
+          } //if
+        } //if
      } //if
    } //if
-  }
+  } //if
  return result;
 }

@ -231,19 +248,22 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*
 PRInt32 CNavDelegate::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
   PRUnichar  ch;
   PRInt32 result=aScanner.GetChar(ch);
-   if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or &#000.
-     aToken = new CEntityToken(nsAutoString(""));
-     result = aToken->Consume(ch,aScanner);  //tell new token to finish consuming text...    
-   }
-   else if(kHashsign==ch) {
-     aToken = new CEntityToken(nsAutoString(""));
-     result=aToken->Consume(0,aScanner);
-   }
-   else {
-     //oops, we're actually looking at plain text...
-     nsAutoString temp("&");
-     result=ConsumeText(temp,aScanner,aToken);
-   }
+
+   if(kNoError==result) {
+     if(nsString::IsAlpha(ch)) { //handle common enity references &xxx; or &#000.
+       aToken = new CEntityToken(nsAutoString(""));
+       result = aToken->Consume(ch,aScanner);  //tell new token to finish consuming text...    
+     }
+     else if(kHashsign==ch) {
+       aToken = new CEntityToken(nsAutoString(""));
+       result=aToken->Consume(0,aScanner);
+     }
+     else {
+       //oops, we're actually looking at plain text...
+       nsAutoString temp("&");
+       result=ConsumeText(temp,aScanner,aToken);
+     }
+   }//if
   return result;
 }

@ -336,36 +356,54 @@ PRInt32 CNavDelegate::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*&
 *  @return new token or null 
 */
 PRInt32 CNavDelegate::GetToken(CScanner& aScanner,CToken*& aToken){
-  PRInt32   result=kNoError;
-  PRUnichar aChar;
-
+  
+  aToken=0;
  if(mTokenDeque.GetSize()>0) {
    aToken=(CToken*)mTokenDeque.Pop();
-    return result;
+    return kNoError;
  }
-  aToken=0;
-  while(!aScanner.Eof()) {
+
+  PRInt32 result=kNoError;
+  if(kNoError==result){
+    
+    PRUnichar aChar;
    result=aScanner.GetChar(aChar);
-    switch(aChar) {
-      case kAmpersand:
-        return ConsumeEntity(aChar,aScanner,aToken);
-      case kLessThan:
-        return ConsumeTag(aChar,aScanner,aToken);
-      case kCR: case kLF:
-        return ConsumeNewline(aChar,aScanner,aToken);
-      case kNotFound:
+    switch(result) {
+      case kEOF:
        break;
+
+      case kInterrupted:
+        aScanner.RewindToMark();
+        break; 
+
+      case kNoError:
      default:
-        if(!nsString::IsSpace(aChar)) {
-          nsAutoString temp(aChar);
-          return ConsumeText(temp,aScanner,aToken);
-        }
-        else return ConsumeWhitespace(aChar,aScanner,aToken);
-        break;
+        switch(aChar) {
+          case kLessThan:
+            return ConsumeTag(aChar,aScanner,aToken);
+
+          case kAmpersand:
+            return ConsumeEntity(aChar,aScanner,aToken);
+          
+          case kCR: case kLF:
+            return ConsumeNewline(aChar,aScanner,aToken);
+          
+          case kNotFound:
+            break;
+          
+          default:
+            if(!nsString::IsSpace(aChar)) {
+              nsAutoString temp(aChar);
+              return ConsumeText(temp,aScanner,aToken);
+            }
+            else return ConsumeWhitespace(aChar,aScanner,aToken);
+            break;
+        } //switch
+        break; 
    } //switch
-    if(result==kEOF)
-      result=0;
-   } //while
+    if(kNoError==result)
+      result=aScanner.Eof();
+  } //while
  return result;
 }

--- a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp
+++ b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp
@ -31,6 +31,7 @@
 #include "CNavDTD.h"
 #include "prenv.h"  //this is here for debug reasons...
 #include "plstr.h"
+#include <fstream.h>
 #ifdef XP_PC
 #include <direct.h> //this is here for debug reasons...
 #endif
@ -40,11 +41,13 @@ static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
 static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);

 static const char* kNullURL = "Error: Null URL given";
+static const char* kNullFilename= "Error: Null filename given";
 static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
 static const char* kNullToken = "Error: Null token given";
 static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

-static char* gVerificationOutputDir=0;
+static char*  gVerificationOutputDir=0;
+static int    rickGDebug=0;

 /**
 *  This method is defined in nsIParser. It is used to 
@ -456,92 +459,198 @@ PRBool nsHTMLParser::IterateTokens() {
  return result;
 }

-
 /**
- *  This is the main controlling routine in the parsing process. 
- *  Note that it may get called multiple times for the same scanner, 
- *  since this is a pushed based system, and all the tokens may 
- *  not have been consumed by the scanner during a given invocation 
- *  of this method. 
- *
- *  @update  gess 3/25/98
- *  @param   aFilename -- const char* containing file to be parsed.
- *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ *  
+ *  
+ *  @update  gess 5/13/98
+ *  @param   
+ *  @return  
 */
-PRBool nsHTMLParser::Parse(nsIURL* aURL){
-  eParseMode  theMode=eParseMode_navigator;
+eParseMode DetermineParseMode() {
  const char* theModeStr= PR_GetEnv("PARSE_MODE");
  const char* other="other";
+  eParseMode  result=eParseMode_navigator;

  if(theModeStr) 
    if(0==nsCRT::strcasecmp(other,theModeStr))
-      theMode=eParseMode_other;
-
-  return Parse(aURL,theMode);
+      result=eParseMode_other;    
+  return result;
 }

+
 /**
- *  This is the main controlling routine in the parsing process. 
- *  Note that it may get called multiple times for the same scanner, 
- *  since this is a pushed based system, and all the tokens may 
- *  not have been consumed by the scanner during a given invocation 
- *  of this method. 
- *
- *  @update  gess 3/25/98
- *  @param   aFilename -- const char* containing file to be parsed.
- *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ *  
+ *  
+ *  @update  gess 5/13/98
+ *  @param   
+ *  @return  
 */
-PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
-  NS_PRECONDITION(0!=aURL,kNullURL);
-  
-  PRBool result=PR_FALSE;
-  if(aURL) {
+void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& aDTD) {
+  switch(aMode) {
+    case eParseMode_navigator:
+      aDelegate=new CNavDelegate(); break;
+    case eParseMode_other:
+      aDelegate=new COtherDelegate(); break;
+    default:
+      break;
+  }
+  if(aDelegate)
+    aDTD=aDelegate->GetDTD();
+}

-    result=PR_TRUE;
-    mParseMode=aMode;
-    ITokenizerDelegate* theDelegate=0;
-    
-    mDTD=0;
-    switch(mParseMode) {
-      case eParseMode_navigator:
-        theDelegate=new CNavDelegate();
-        if(theDelegate)
-          mDTD=theDelegate->GetDTD();
-        break;
-      case eParseMode_other:
-        theDelegate=new COtherDelegate();
-        if(theDelegate)
-          mDTD=theDelegate->GetDTD();
-        break;
-      default:
-        break;
-    }
-    if(!theDelegate) {
-      NS_ERROR(kNullTokenizer);
-      return PR_FALSE;
-    }

-    if(mDTD)
-      mDTD->SetParser(this);
-    mTokenizer=new CTokenizer(aURL, theDelegate, mParseMode);
+/**
+ *  This DEBUG ONLY method is used to simulate a network-based
+ *  i/o model where data comes in incrementally.
+ *  
+ *  @update  gess 5/13/98
+ *  @param   aFilename is the name of the disk file to use for testing.
+ *  @return  error code (kNoError means ok)
+ */
+PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
+  PRInt32   result=kBadFilename;
+  fstream*  mFileStream;
+  nsString  theBuffer;
+  PRInt32   iter=-1;
+  const int kBufSize=10;
+
+  mFileStream=new fstream(aFilename,ios::in|ios::binary);
+  if(mFileStream) {
+    result=kNoError;
+    while((kNoError==result) || (kInterrupted==result)) {
+      //read some data from the file...
+
+      char buf[kBufSize];
+      buf[kBufSize]=0;
+
+      if(mFileStream) {
+        mFileStream->read(buf,kBufSize);
+        PRInt32 numread=mFileStream->gcount();
+        if(numread>0) {
+          theBuffer.Truncate();
+          theBuffer.Append(buf);
+          mTokenizer->Append(theBuffer);
+          result=ResumeParse(++iter);
+        }
+      }

-    mSink->WillBuildModel();
-#ifdef __INCREMENTAL 
-    int iter=-1;
-    for(;;){
-      mSink->WillResume();
-      mTokenizer->TokenizeAvailable(++iter);
-      mSink->WillInterrupt();
    }
-#else
-    mTokenizer->Tokenize();
-#endif
-    result=IterateTokens();
-    mSink->DidBuildModel();
+    mFileStream->close();
+    delete mFileStream;
  }
  return result;
 }

+/**
+ *  This is the main controlling routine in the parsing process. 
+ *  Note that it may get called multiple times for the same scanner, 
+ *  since this is a pushed based system, and all the tokens may 
+ *  not have been consumed by the scanner during a given invocation 
+ *  of this method. 
+ *
+ *  @update  gess 3/25/98
+ *  @param   aFilename -- const char* containing file to be parsed.
+ *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ */
+PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
+  NS_PRECONDITION(0!=aFilename,kNullFilename);
+
+  PRInt32 status=kBadFilename;
+
+  mIncremental=aIncremental;
+  mParseMode=DetermineParseMode();  
+
+  if(aFilename) {
+
+    GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
+    if(mDelegate) {
+
+      if(mDTD)
+        mDTD->SetParser(this);
+
+      mSink->WillBuildModel();
+
+      //ok, time to create our tokenizer and begin the process
+      if(aIncremental) {
+        mTokenizer=new CTokenizer(mDelegate,mParseMode);
+        status=ParseFileIncrementally(aFilename);
+      }
+      else {
+        //ok, time to create our tokenizer and begin the process
+        mTokenizer=new CTokenizer(aFilename,mDelegate,mParseMode);
+        status=ResumeParse(0);
+      }
+      mSink->DidBuildModel();
+    }//if
+  }
+  return status;
+}
+
+/**
+ *  This is the main controlling routine in the parsing process. 
+ *  Note that it may get called multiple times for the same scanner, 
+ *  since this is a pushed based system, and all the tokens may 
+ *  not have been consumed by the scanner during a given invocation 
+ *  of this method. 
+ *
+ *  @update  gess 3/25/98
+ *  @param   aFilename -- const char* containing file to be parsed.
+ *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
+ */
+PRInt32 nsHTMLParser::Parse(nsIURL* aURL,PRBool aIncremental ){
+  NS_PRECONDITION(0!=aURL,kNullURL);
+
+  PRInt32 status=kBadURL;
+
+  if(rickGDebug)
+    status=Parse("c:/temp/temp.html",PR_TRUE);
+
+  mIncremental=aIncremental;
+  mParseMode=DetermineParseMode();  
+
+  if(aURL) {
+
+    GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
+    if(mDelegate) {
+
+      if(mDTD)
+        mDTD->SetParser(this);
+
+      mSink->WillBuildModel();
+
+      //ok, time to create our tokenizer and begin the process
+      if(aIncremental) {
+        mTokenizer=new CTokenizer(mDelegate,mParseMode);
+        status=aURL->Open(this);
+      }
+      else {
+        mTokenizer=new CTokenizer(aURL,mDelegate,mParseMode);
+        status=ResumeParse(0);
+        mSink->DidBuildModel();
+      }
+    }//if
+  }
+  return status;
+}
+
+/**
+ * Call this method if all you want to do is parse 1 string full of HTML text.
+ *
+ * @update	gess5/11/98
+ * @param   anHTMLString contains a string-full of real HTML
+ * @param   appendTokens tells us whether we should insert tokens inline, or append them.
+ * @return  TRUE if all went well -- FALSE otherwise
+ */
+PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
+  PRInt32 result=kNoError;
+  
+  mSink->WillBuildModel();
+  mTokenizer->Append(aSourceBuffer);
+  result=ResumeParse(0);
+  mSink->DidBuildModel();
+  
+  return result;
+}

 /**
 *  This routine is called to cause the parser to continue
@ -553,17 +662,21 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL,eParseMode aMode){
 *  @param   
 *  @return  PR_TRUE if parsing concluded successfully.
 */
-PRBool nsHTMLParser::ResumeParse() {
+PRInt32 nsHTMLParser::ResumeParse(PRInt32 anIteration) {
+  PRInt32 result=kNoError;
+
  mSink->WillResume();
-  int iter=0;
-  PRInt32 errcode=mTokenizer->TokenizeAvailable(iter);
-  if(kInterrupted==errcode)
-    mSink->WillInterrupt();
-  PRBool result=IterateTokens();
+  if(kNoError==result) {
+    result=mTokenizer->Tokenize(anIteration);
+    if(kInterrupted==result)
+      mSink->WillInterrupt();
+
+    if(!rickGDebug)
+      IterateTokens();
+  }
  return result;
 }

-
 /**
 * 
 * @update  gess4/22/98
@ -1388,5 +1501,64 @@ PRBool nsHTMLParser::ReduceContextStackFor(PRInt32 aChildTag){
 }


+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::GetBindInfo(void){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnStartBinding(void){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
+  nsresult result=0;
+  return result;
+}
+
+/**
+ *  
+ *  
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+nsresult nsHTMLParser::OnStopBinding(void){
+  nsresult result=0;
+  return result;
+}


--- a/mozilla/parser/htmlparser/src/nsHTMLParser.h
+++ b/mozilla/parser/htmlparser/src/nsHTMLParser.h
@ -61,6 +61,9 @@
 #include "nsParserNode.h"
 #include "nsTokenHandler.h"
 #include "nsParserTypes.h"
+#include "nsIURL.h"
+#include "nsIStreamListener.h"
+#include "nsITokenizerDelegate.h"


 #define NS_IHTML_PARSER_IID      \
@ -75,7 +78,7 @@ class nsIURL;
 class nsIDTD;


-class nsHTMLParser : public nsIParser {
+class nsHTMLParser : public nsIParser, public nsIStreamListener {
            
  public:
 friend class CTokenHandler;
@ -104,14 +107,6 @@ friend class CTokenHandler;
     */
    virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);

-    /**
-     * Cause parser to parse input from given URL
-     * @update	gess5/11/98
-     * @param   aURL is a descriptor for source document
-     * @return  TRUE if all went well -- FALSE otherwise
-     */
-    virtual PRBool Parse(nsIURL* aURL);
-
    /**
     * Cause parser to parse input from given URL in given mode
     * @update	gess5/11/98
@ -119,14 +114,31 @@ friend class CTokenHandler;
     * @param   aMode is the desired parser mode (Nav, other, etc.)
     * @return  TRUE if all went well -- FALSE otherwise
     */
-    virtual PRBool Parse(nsIURL* aURL,eParseMode aMode);
+    virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE);
+
+    /**
+     * Cause parser to parse input from given file in given mode
+     * @update	gess5/11/98
+     * @param   aFilename is a path for file document
+     * @param   aMode is the desired parser mode (Nav, other, etc.)
+     * @return  TRUE if all went well -- FALSE otherwise
+     */
+    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
+
+    /**
+     * @update	gess5/11/98
+     * @param   anHTMLString contains a string-full of real HTML
+     * @param   appendTokens tells us whether we should insert tokens inline, or append them.
+     * @return  TRUE if all went well -- FALSE otherwise
+     */
+    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens);

    /**
     * This method gets called (automatically) during incremental parsing
     * @update	gess5/11/98
     * @return  TRUE if all went well, otherwise FALSE
     */
-    virtual PRBool ResumeParse();
+    virtual PRInt32 ResumeParse(PRInt32 anIteration);

    /**
     * Retrieve ptr to internal context vector stack
@ -230,6 +242,15 @@ friend class CTokenHandler;
     */
    PRBool HandleStyleToken(CToken* aToken);

+      //*********************************************
+      // These methods are callback methods used by
+      // net lib to let us know about our inputstream.
+      //*********************************************
+    NS_IMETHOD GetBindInfo(void);
+    NS_IMETHOD OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg);
+    NS_IMETHOD OnStartBinding(void);
+    NS_IMETHOD OnDataAvailable(nsIInputStream *pIStream, PRInt32 length);
+    NS_IMETHOD OnStopBinding(void);

 protected:

@ -485,6 +506,10 @@ protected:
     */
    PRBool CreateContextStackFor(PRInt32 aChildTag);

+private:
+    PRInt32 ParseFileIncrementally(const char* aFilename);  //XXX ONLY FOR DEBUG PURPOSES...
+
+protected:
    //*********************************************
    // And now, some data members...
    //*********************************************
@ -502,6 +527,8 @@ protected:
    nsIDTD*             mDTD;
    eParseMode          mParseMode;
    PRBool              mHasOpenForm;
+    PRBool              mIncremental;
+    ITokenizerDelegate* mDelegate;
 };


--- a/mozilla/parser/htmlparser/src/nsIParser.h
+++ b/mozilla/parser/htmlparser/src/nsIParser.h
@ -40,6 +40,7 @@


 class nsIContentSink;
+class nsString;

 /**
 *  This class defines the iparser interface. This XPCOM
@ -52,10 +53,14 @@ class nsIParser : public nsISupports {
  public:

    virtual nsIContentSink* SetContentSink(nsIContentSink* aContentSink)=0;
-    virtual PRBool          Parse(nsIURL* aURL)=0;
-    virtual PRBool          ResumeParse()=0;
-    virtual PRInt32         GetStack(PRInt32* aStackPtr)=0;
-    virtual PRBool          HasOpenContainer(PRInt32 aContainer) const=0;
+
+    virtual PRInt32 Parse(nsIURL* aURL,PRBool aIncremental=PR_FALSE)=0;
+    virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
+    virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
+
+    virtual PRInt32 ResumeParse(PRInt32 anIterator)=0;
+    virtual PRInt32 GetStack(PRInt32* aStackPtr)=0;
+    virtual PRBool  HasOpenContainer(PRInt32 aContainer) const=0;
 };

 extern NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult);
--- a/mozilla/parser/htmlparser/src/nsParserTypes.h
+++ b/mozilla/parser/htmlparser/src/nsParserTypes.h
@ -37,13 +37,16 @@ enum  eParseMode {
  
  eParseMode_unknown=0,
  eParseMode_navigator,
-  eParseMode_other
+  eParseMode_other,
+  eParseMode_autodetect
 };

 const PRInt32   kEOF          = 1000000L;
+const PRInt32   kBadFilename  = -4;
+const PRInt32   kBadURL       = -3;
+const PRInt32   kInterrupted  = -2;
 const PRInt32   kNotFound     = -1;
 const PRInt32   kNoError      = 0;
-const PRInt32   kInterrupted  = 2;

 const PRUint32  kNewLine      = '\n';
 const PRUint32  kCR           = '\r';
--- a/mozilla/parser/htmlparser/src/nsScanner.cpp
+++ b/mozilla/parser/htmlparser/src/nsScanner.cpp
@ -22,9 +22,8 @@
 #include "nsIURL.h" 
 #include "nsDebug.h"

-const char* gURLRef;
-const char* kBadHTMLText1="<HTML><BODY><H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
-const char* kBadHTMLText2="</BODY></HTML>";
+const char* gURLRef=0;
+const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";

 #ifdef __INCREMENTAL
 const int   kBufsize=1;
@ -33,31 +32,63 @@ const int   kBufsize=64;
 #endif

 /**
- *  default constructor
- *  
- *  @update  gess 3/25/98
- *  @param   aURL -- pointer to URL to be loaded
+ *  Use this constructor if you want an incremental (callback)
+ *  based input stream.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
+ *  @return  
+ */
+CScanner::CScanner(eParseMode aMode) : mBuffer("") {
+  mOffset=0;
+  mMarkPos=-1;
+  mTotalRead=0;
+  mParseMode=aMode;
+  mNetStream=0;
+  mFileStream=0;
+  mIncremental=PR_TRUE;
+}
+
+/**
+ *  Use this constructor if you want i/o to be file based.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
+ *  @return  
+ */
+CScanner::CScanner(const char* aFilename,eParseMode aMode) : mBuffer("") {
+  NS_ASSERTION(0!=aFilename,"Error: Null filename!");
+  mOffset=0;
+  mMarkPos=-1;
+  mTotalRead=0;
+  mParseMode=aMode;
+  mNetStream=0;
+  mIncremental=PR_FALSE;
+  mFileStream=new fstream(aFilename,ios::in|ios::binary);
+}
+
+/**
+ *  Use this constructor if you want i/o to be based on a
+ *  non-incremental netstream.
+ *
+ *  @update  gess 5/12/98
+ *  @param   aMode represents the parser mode (nav, other)
 *  @return  
 */
 CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
  NS_ASSERTION(0!=aURL,"Error: Null URL!");
  mOffset=0;
-  mStream=0;
+  mMarkPos=-1;
  mTotalRead=0;
  mParseMode=aMode;
-  if(aURL) {
-
-    gURLRef=aURL->GetSpec();
-
-#ifdef  __INCREMENTAL
-    mStream=new fstream("c:/temp/temp.html",ios::in|ios::binary);
-#else
-    int error;
-    mStream=aURL->Open(&error);
-#endif
-  }
+  mFileStream=0;
+  PRInt32 error=0;
+  mIncremental=PR_FALSE;
+  mNetStream=aURL->Open(&error);
+  gURLRef=aURL->GetSpec();
 }

+
 /**
 *  default destructor
 *  
@ -66,19 +97,107 @@ CScanner::CScanner(nsIURL* aURL,eParseMode aMode) : mBuffer("") {
 *  @return  
 */
 CScanner::~CScanner() {
-#ifdef __INCREMENTAL
-  mStream->close();
-  delete mStream;
-  mStream=0;
-#else
-  if(mStream) {
-    mStream->Close();
-    mStream->Release();
-    mStream=0;
+  if(mFileStream) {
+    mFileStream->close();
+    delete mFileStream;
  }
-#endif
+  else if(mNetStream) {
+    mNetStream->Close();
+    mNetStream->Release();
+  }
+  mFileStream=0;
+  mNetStream=0;
+  gURLRef=0;
 }

+/**
+ *  Resets current offset position of input stream to marked position. 
+ *  This allows us to back up to this point if the need should arise, 
+ *  such as when tokenization gets interrupted.
+ *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+ *
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+PRInt32 CScanner::RewindToMark(void){
+  mOffset=mMarkPos;
+  return mOffset;
+}
+
+/**
+ *  Records current offset position in input stream. This allows us
+ *  to back up to this point if the need should arise, such as when
+ *  tokenization gets interrupted.
+ *
+ *  @update  gess 5/12/98
+ *  @param   
+ *  @return  
+ */
+PRInt32 CScanner::Mark(void){
+  mMarkPos=mOffset;
+  return mMarkPos;
+}
+
+
+/**
+ *  
+ *  @update  gess 5/12/98
+ */
+void _PreCompressBuffer(nsString& aBuffer,PRInt32& anOffset,PRInt32& aMarkPos){
+  //To determine how much of our internal buffer to truncate, 
+  //we should check mMarkPos. That represents the point at which
+  //we've guaranteed the client we can back up to, so make sure
+  //you don't lose any of the data beyond that point.
+  if((anOffset!=aMarkPos) && (0<=aMarkPos)) {
+    if(aMarkPos>0) {
+      aBuffer.Cut(0,aMarkPos);
+      if(anOffset>aMarkPos)
+        anOffset-=aMarkPos;
+    }
+  }
+  else aBuffer.Truncate();
+  aMarkPos=0;
+}
+
+
+/**
+ *  This method should only be called by the parser when
+ *  we're doing incremental i/o over the net.
+ *  
+ *  @update  gess 5/12/98
+ *  @param   aBuffer contains next blob of i/o data
+ *  @param   aSize contains size of buffer
+ *  @return  0 if all went well, otherwise error code.
+ */
+PRInt32 CScanner::IncrementalAppend(const char* aBuffer,PRInt32 aSize){
+  NS_ASSERTION(((!mFileStream) && (!mNetStream)),"Error: Should only be called during incremental net i/o!");
+
+  PRInt32 result=0;
+  if((!mFileStream) && (!mNetStream)) {
+
+    _PreCompressBuffer(mBuffer,mOffset,mMarkPos);
+
+    //now that the buffer is (possibly) shortened, let's append the new data.
+    if(0<aSize) {
+      mBuffer.Append(aBuffer,aSize);
+      mTotalRead+=aSize;
+    }
+  }
+  return result;
+}
+
+/** 
+ * Grab data from underlying stream.
+ *
+ * @update  gess4/3/98
+ * @return  error code
+ */
+PRBool CScanner::Append(nsString& aBuffer) {
+  _PreCompressBuffer(mBuffer,mOffset,mMarkPos);
+  mBuffer.Append(aBuffer);
+  return PR_TRUE;
+}

 /** 
 * Grab data from underlying stream.
@ -89,55 +208,63 @@ CScanner::~CScanner() {
 PRInt32 CScanner::FillBuffer(void) {
  PRInt32 anError=0;

-  mBuffer.Truncate();
-  if(!mStream) {
+  _PreCompressBuffer(mBuffer,mOffset,mMarkPos);
+
+  if((!mIncremental) && (!mNetStream) && (!mFileStream)) {
    //This is DEBUG code!!!!!!  XXX DEBUG XXX
    //If you're here, it means someone tried to load a
    //non-existent document. So as a favor, we emit a
    //little bit of HTML explaining the error.
    if(0==mTotalRead) {
-      mBuffer.Append((const char*)kBadHTMLText1);
+      mBuffer.Append((const char*)kBadHTMLText);
      mBuffer.Append((const char*)gURLRef);
-      mBuffer.Append((const char*)kBadHTMLText2);
    }
    else return 0;
  }
-  else {
+  else if(!mIncremental) {
    PRInt32 numread=0;
    char buf[kBufsize+1];
    buf[kBufsize]=0;

-#ifdef __INCREMENTAL 
-    mStream->read(buf,kBufsize);
-    numread=mStream->gcount();
-#else
-    numread=mStream->Read(&anError,buf,0,kBufsize);
-#endif
+    if(mFileStream) {
+      mFileStream->read(buf,kBufsize);
+      numread=mFileStream->gcount();
+    }
+    else if(mNetStream) {
+      numread=mNetStream->Read(&anError,buf,0,kBufsize);
+      if(1==anError)
+        anError=kEOF;
+    }
+    mOffset=mBuffer.Length();
    if((0<numread) && (0==anError))
      mBuffer.Append((const char*)buf,numread);
+    mTotalRead+=mBuffer.Length();
  }
-  mTotalRead+=mBuffer.Length();
+  else anError=kInterrupted;
+
  return anError;
 }

 /**
 *  determine if the scanner has reached EOF
 *  
- *  @update  gess 3/25/98
+ *  @update  gess 5/12/98
 *  @param   
- *  @return  PR_TRUE upon eof condition
+ *  @return  0=!eof 1=eof kInterrupted=interrupted
 */
-PRBool CScanner::Eof() {
+PRInt32 CScanner::Eof() {
  PRInt32 theError=0;
+
  if(mOffset>=mBuffer.Length()) {
-    theError=FillBuffer();
-    mOffset=0;
+    if(!mIncremental)
+      theError=FillBuffer();  
+    else return kInterrupted;
  }
-  PRBool result=PR_TRUE;
-  if(0==theError) {
-    result=PRBool(0==mBuffer.Length());
-  }
-  return result;
+  
+  if(0==theError) 
+    return (0==mBuffer.Length());
+
+  return theError;
 }

 /**
@ -148,11 +275,12 @@ PRBool CScanner::Eof() {
 *  @return  error code reflecting read status
 */
 PRInt32 CScanner::GetChar(PRUnichar& aChar) {
-  if(!Eof()) {
+  PRInt32 result=Eof();
+  if(!result) {
    aChar=mBuffer[mOffset++];
-    return kNoError;
+    result=kNoError;
  }
-  return kEOF;
+  return result;
 }


@ -165,11 +293,12 @@ PRInt32 CScanner::GetChar(PRUnichar& aChar) {
 *  @return  
 */
 PRInt32 CScanner::Peek(PRUnichar& aChar){
-  if(!Eof()) {
+  PRInt32 result=Eof();
+  if(!result) {
    aChar=mBuffer[mOffset];        
-    return kNoError;
+    result=kNoError;
  }
-  return kEOF;
+  return result;
 }


@ -181,7 +310,9 @@ PRInt32 CScanner::Peek(PRUnichar& aChar){
 *  @return  error code
 */
 PRInt32 CScanner::PutBack(PRUnichar aChar) {
-  mOffset--;
+  if(mOffset>0)
+    mOffset--;
+  else mBuffer.Insert(aChar,0);
  return kNoError;
 }

@ -301,8 +432,8 @@ PRInt32 CScanner::ReadUntil(nsString& aString,nsString& aTerminalSet,PRBool addT
  PRUnichar ch=0;
  PRInt32   result=kNoError;

-  while(!Eof()) {
-     result=GetChar(ch);
+  while(!result) {
+    result=GetChar(ch);
    if(kNoError==result) {
      PRInt32 pos=aTerminalSet.Find(ch);
      if(kNotFound!=pos) {
--- a/mozilla/parser/htmlparser/src/nsScanner.h
+++ b/mozilla/parser/htmlparser/src/nsScanner.h
@ -43,7 +43,36 @@ class ifstream;

 class CScanner {
  public:
+
+      /**
+       *  Use this constructor if you want an incremental (callback)
+       *  based input stream.
+       *
+       *  @update  gess 5/12/98
+       *  @param   aMode represents the parser mode (nav, other)
+       *  @return  
+       */
+      CScanner(eParseMode aMode=eParseMode_navigator);
+      
+      /**
+       *  Use this constructor if you want i/o to be based on a
+       *  non-incremental netstream.
+       *
+       *  @update  gess 5/12/98
+       *  @param   aMode represents the parser mode (nav, other)
+       *  @return  
+       */
      CScanner(nsIURL* aURL,eParseMode aMode=eParseMode_navigator);
+
+      /**
+       *  Use this constructor if you want i/o to be file based.
+       *
+       *  @update  gess 5/12/98
+       *  @param   aMode represents the parser mode (nav, other)
+       *  @return  
+       */
+      CScanner(const char* aFilename,eParseMode aMode=eParseMode_navigator);
+
      ~CScanner();

      /**
@ -117,7 +146,7 @@ class CScanner {
       *  @update  gess 3/25/98
       *  @return  PR_TRUE upon eof condition
       */
-      PRBool Eof(void);
+      PRInt32 Eof(void);

      /**
       *  Consume characters until you find the terminal char
@ -153,6 +182,48 @@ class CScanner {
       */
      PRInt32 ReadWhile(nsString& aString,nsString& anInputSet,PRBool addTerminal);

+      /**
+       *  Records current offset position in input stream. This allows us
+       *  to back up to this point if the need should arise, such as when
+       *  tokenization gets interrupted.
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      PRInt32 Mark(void);
+
+      /**
+       *  Resets current offset position of input stream to marked position. 
+       *  This allows us to back up to this point if the need should arise, 
+       *  such as when tokenization gets interrupted.
+       *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      PRInt32 RewindToMark(void);
+
+
+      /**
+       *  
+       *  
+       *  @update  gess 5/13/98
+       *  @param   
+       *  @return  
+       */
+      PRBool Append(nsString& aBuffer);
+
+      /**
+       *  
+       *  
+       *  @update  gess 5/12/98
+       *  @param   
+       *  @return  
+       */
+      PRInt32 IncrementalAppend(const char* aBuffer,PRInt32 aSize);
+
      static void SelfTest();

  protected:
@ -167,15 +238,15 @@ class CScanner {
       */
      PRInt32 FillBuffer(void);

-#ifdef __INCREMENTAL
-      fstream*        mStream;
-#else
-      nsIInputStream* mStream;
-#endif
+
+      fstream*        mFileStream;
+      nsIInputStream* mNetStream;
      nsString        mBuffer;
      PRInt32         mOffset;
+      PRInt32         mMarkPos;
      PRInt32         mTotalRead;
      eParseMode      mParseMode;
+      PRBool          mIncremental;
 };

 #endif
--- a/mozilla/parser/htmlparser/src/nsTokenizer.cpp
+++ b/mozilla/parser/htmlparser/src/nsTokenizer.cpp
@ -39,6 +39,35 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo
  mParseMode=aMode;
 }

+/**
+ *  Default constructor
+ *  
+ *  @update gess 3/25/98
+ *  @param  aFilename -- name of file to be tokenized
+ *  @param  aDelegate -- ref to delegate to be used to tokenize
+ *  @return 
+ */
+CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) :
+  mTokenDeque() {
+  mDelegate=aDelegate;
+  mScanner=new CScanner(aFilename,aMode);
+  mParseMode=aMode;
+}
+
+/**
+ *  Default constructor
+ *  
+ *  @update gess 3/25/98
+ *  @param  aFilename -- name of file to be tokenized
+ *  @param  aDelegate -- ref to delegate to be used to tokenize
+ *  @return 
+ */
+CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) :
+  mTokenDeque() {
+  mDelegate=aDelegate;
+  mScanner=new CScanner(aMode);
+  mParseMode=aMode;
+}

 /**
 *  default destructor
@ -54,6 +83,19 @@ CTokenizer::~CTokenizer() {
 }


+/**
+ *  
+ *  
+ *  @update  gess 5/13/98
+ *  @param   
+ *  @return  
+ */
+PRBool CTokenizer::Append(nsString& aBuffer) {
+  if(mScanner)
+    return mScanner->Append(aBuffer);
+  return PR_FALSE;
+}
+
 /**
 * Retrieve a reference to the internal token deque.
 *
@ -105,31 +147,31 @@ PRBool CTokenizer::WillTokenize(PRBool aIncremental){
 }

 /**
- *  This is the primary control routine. It iteratively
- *  consumes tokens until an error occurs or you run out
- *  of data.
 *  
 *  @update  gess 3/25/98
- *  @return  error code 
+ *  @return  TRUE if it's ok to proceed
 */
-PRInt32 CTokenizer::Tokenize(void) {
+PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
  CToken* theToken=0;
  PRInt32 result=kNoError;
+  
+  WillTokenize(PR_TRUE);
+
+  while(kNoError==result) {
+    result=GetToken(theToken);
+    if(theToken && (kNoError==result)) {

-  if(WillTokenize(PR_FALSE)) {
-    do {
-      result=GetToken(theToken);
-      if(theToken) {
 #ifdef VERBOSE_DEBUG
        theToken->DebugDumpToken(cout);
 #endif
-        if(mDelegate->WillAddToken(*theToken)) {
-          mTokenDeque.Push(theToken);
-        }
+      if(mDelegate->WillAddToken(*theToken)) {
+        mTokenDeque.Push(theToken);
      }
-    } while(0!=theToken);
-    result=DidTokenize(PR_FALSE);
-  }
+    }
+  } 
+  if(kEOF==result)
+    result=kNoError;
+  DidTokenize(PR_TRUE);
  return result;
 }

@ -141,20 +183,33 @@ PRInt32 CTokenizer::Tokenize(void) {
 *  @update  gess 3/25/98
 *  @return  error code 
 */
-PRInt32 CTokenizer::TokenizeAvailable(int anIteration) {
+PRInt32 CTokenizer::Tokenize(int anIteration) {
  CToken* theToken=0;
  PRInt32 result=kNoError;
  PRBool  done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
  

-  while((PR_FALSE==done) && (kInterrupted!=kInterrupted)) {
+  while((PR_FALSE==done) && (kNoError==result)) {
+    mScanner->Mark();
    result=GetToken(theToken);
-    if(theToken) {
-      if(mDelegate->WillAddToken(*theToken)) {
-        mTokenDeque.Push(theToken);
+    if(kNoError==result) {
+      if(theToken) {
+
+  #ifdef VERBOSE_DEBUG
+          theToken->DebugDumpToken(cout);
+  #endif
+
+        if(mDelegate->WillAddToken(*theToken)) {
+          mTokenDeque.Push(theToken);
+        }
      }
+
+    }
+    else {
+      if(theToken)
+        delete theToken;
+      mScanner->RewindToMark();
    }
-    else done=PR_TRUE;
  } 
  if((PR_TRUE==done)  && (kInterrupted!=result))
    DidTokenize(PR_TRUE);
--- a/mozilla/parser/htmlparser/src/nsTokenizer.h
+++ b/mozilla/parser/htmlparser/src/nsTokenizer.h
@ -50,19 +50,13 @@ class nsIURL;

 class  CTokenizer {
  public:
-    CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode);
+
+    CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
+    CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
+    CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator);
+
    ~CTokenizer();
    
-    /**
-     *  This control routine causes the entire stream to be
-     *  tokenized. You probably want to call TokenizeAvailable()
-     *  instead (for incremental tokenization).
-     *  
-     *  @update  gess 3/25/98
-     *  @return  TRUE if it's ok to proceed
-     */
-    PRInt32 Tokenize(void);
-
    /**
     *  This method incrementally tokenizes as much content as
     *  it can get its hands on.
@ -70,7 +64,14 @@ class  CTokenizer {
     *  @update  gess 3/25/98
     *  @return  TRUE if it's ok to proceed
     */
-    PRInt32 TokenizeAvailable(int anIteration); //your friendly incremental version
+    PRInt32 Tokenize(int anIteration); //your friendly incremental version
+
+    /**
+     *  
+     *  @update  gess 3/25/98
+     *  @return  TRUE if it's ok to proceed
+     */
+    PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE); 

    /**
     *  Cause the tokenizer to consume the next token, and 
@ -98,6 +99,23 @@ class  CTokenizer {
     */
    nsDeque& GetDeque(void);

+    /**
+     *
+     * @update  gess 4/20/98
+     * @return  deque reference
+     */
+    PRBool Append(nsString& aBuffer);
+
+
+    /**
+     *  
+     *  
+     *  @update  gess 5/13/98
+     *  @param   
+     *  @return  
+     */
+    PRBool SetBuffer(nsString& aBuffer);
+
    /**
     *  This debug routine is used to cause the tokenizer to
     *  iterate its token list, asking each token to dump its