Mozilla/mozilla/parser/htmlparser/src/nsParser.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */


#include "nsParser.h"
#include "nsIContentSink.h"
#include "nsString.h"
#include "nsCRT.h"
#include "nsScanner.h"
#include "prenv.h"  //this is here for debug reasons...
#include "plstr.h"
#include <fstream.h>
#include "nsIParserFilter.h"
#include "nshtmlpars.h"
#include "nsWellFormedDTD.h"
#include "nsViewSourceHTML.h" //uncomment this to partially enable viewsource...

#undef rickgdebug
#ifdef  rickgdebug
#include "CRtfDTD.h"
#endif


static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kClassIID, NS_PARSER_IID);
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
static NS_DEFINE_IID(kIStreamListenerIID, NS_ISTREAMLISTENER_IID);

static const char* kNullURL = "Error: Null URL given";
static const char* kOnStartNotCalled = "Error: OnStartBinding() must be called before OnDataAvailable()";
static const char* kOnStopNotCalled  = "Error: OnStopBinding() must be called upon termination of netlib process";
static const char* kBadListenerInit  = "Error: Parser's IStreamListener API was not setup correctly in constructor.";
static nsString    kUnknownFilename("unknown");
static nsString    kEmptyString("unknown");

static const int  gTransferBufferSize=4096;  //size of the buffer used in moving data from iistream


class CTokenDeallocator: public nsDequeFunctor{
public:
  virtual void* operator()(void* anObject) {
    CToken* aToken = (CToken*)anObject;
    delete aToken;
    return 0;
  }
};

CTokenDeallocator gTokenDeallocator2;

class CDTDDeallocator: public nsDequeFunctor{
public:
  virtual void* operator()(void* anObject) {
    nsIDTD* aDTD =(nsIDTD*)anObject;
    NS_RELEASE(aDTD);
    return 0;
  }
};

class CDTDFinder: public nsDequeFunctor{
public:
  CDTDFinder(nsIDTD* aDTD) {
    NS_IF_ADDREF(mTargetDTD=aDTD);
  }
  virtual ~CDTDFinder() {
    NS_IF_RELEASE(mTargetDTD);
  }
  virtual void* operator()(void* anObject) {
    return (anObject==(void*)mTargetDTD) ? anObject : 0;
  }
  nsIDTD* mTargetDTD;
};

class CSharedParserObjects {
public:

  CSharedParserObjects() : mDeallocator(), mDTDDeque(mDeallocator) {
    nsIDTD* theDTD;
    NS_NewWellFormed_DTD(&theDTD);
    RegisterDTD(theDTD);

    NS_NewViewSourceHTML(&theDTD);
    RegisterDTD(theDTD);
  }

  ~CSharedParserObjects() {
  }

  void RegisterDTD(nsIDTD* aDTD){
    CDTDFinder theFinder(aDTD);
    if(!mDTDDeque.FirstThat(theFinder))
      mDTDDeque.Push(aDTD);
  }

  nsIDTD*  FindDTD(nsIDTD* aDTD){
    return 0;
  }

  CDTDDeallocator mDeallocator;
  nsDeque mDTDDeque;
};

CSharedParserObjects gSharedParserObjects;

//----------------------------------------

#define NOT_USED 0xfffd

static PRUint16 PA_HackTable[] = {
	NOT_USED,
	NOT_USED,
	0x201a,  /* SINGLE LOW-9 QUOTATION MARK */
	0x0192,  /* LATIN SMALL LETTER F WITH HOOK */
	0x201e,  /* DOUBLE LOW-9 QUOTATION MARK */
	0x2026,  /* HORIZONTAL ELLIPSIS */
	0x2020,  /* DAGGER */
	0x2021,  /* DOUBLE DAGGER */
	0x02c6,  /* MODIFIER LETTER CIRCUMFLEX ACCENT */
	0x2030,  /* PER MILLE SIGN */
	0x0160,  /* LATIN CAPITAL LETTER S WITH CARON */
	0x2039,  /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
	0x0152,  /* LATIN CAPITAL LIGATURE OE */
	NOT_USED,
	NOT_USED,
	NOT_USED,

	NOT_USED,
	0x2018,  /* LEFT SINGLE QUOTATION MARK */
	0x2019,  /* RIGHT SINGLE QUOTATION MARK */
	0x201c,  /* LEFT DOUBLE QUOTATION MARK */
	0x201d,  /* RIGHT DOUBLE QUOTATION MARK */
	0x2022,  /* BULLET */
	0x2013,  /* EN DASH */
	0x2014,  /* EM DASH */
	0x02dc,  /* SMALL TILDE */
	0x2122,  /* TRADE MARK SIGN */
	0x0161,  /* LATIN SMALL LETTER S WITH CARON */
	0x203a,  /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
	0x0153,  /* LATIN SMALL LIGATURE OE */
	NOT_USED,
	NOT_USED,
	0x0178   /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
};

static PRUnichar gToUCS2[256];

static void
MakeConversionTable()
{
  static PRBool firstTime = PR_TRUE;
  if (firstTime) {
    firstTime = PR_FALSE;
    PRUnichar* cp = gToUCS2;
    PRInt32 i;
    for (i = 0; i < 256; i++) {
      *cp++ = PRUnichar(i);
    }
    cp = gToUCS2;
    for (i = 0; i < 32; i++) {
      cp[0x80 + i] = PA_HackTable[i];
    }
  }
}

//----------------------------------------

/**
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
nsParser::nsParser() : mCommand() {
  NS_INIT_REFCNT();
  mStreamListenerState=eNone;
  mParserFilter = 0;
  mObserver = 0;
  mSink=0;
  mParserContext=0;
  mDTDVerification=PR_FALSE;
  MakeConversionTable();
}


/**
 *  Default destructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
nsParser::~nsParser() {
  NS_IF_RELEASE(mObserver);
  NS_RELEASE(mSink);

  NS_POSTCONDITION(eOnStop==mStreamListenerState,kOnStopNotCalled);
  //don't forget to add code here to delete
  //what may be several contexts...
  delete mParserContext;
}


NS_IMPL_ADDREF(nsParser)
NS_IMPL_RELEASE(nsParser)
//NS_IMPL_ISUPPORTS(nsParser,NS_IHTML_PARSER_IID)


/**
 *  This method gets called as part of our COM-like interfaces.
 *  Its purpose is to create an interface to parser object
 *  of some type.
 *
 *  @update   gess 3/25/98
 *  @param    nsIID  id of object to discover
 *  @param    aInstancePtr ptr to newly discovered interface
 *  @return   NS_xxx result code
 */
nsresult nsParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
  if (NULL == aInstancePtr) {
    return NS_ERROR_NULL_POINTER;
  }

  if(aIID.Equals(kISupportsIID))    {  //do IUnknown...
    *aInstancePtr = (nsIParser*)(this);
  }
  else if(aIID.Equals(kIParserIID)) {  //do IParser base class...
    *aInstancePtr = (nsIParser*)(this);
  }
  else if(aIID.Equals(kIStreamListenerIID)) {  //do IStreamListener base class...
    *aInstancePtr = (nsIStreamListener*)(this);
  }
  else if(aIID.Equals(kClassIID)) {  //do this class...
    *aInstancePtr = (nsParser*)(this);
  }
  else {
    *aInstancePtr=0;
    return NS_NOINTERFACE;
  }
  NS_ADDREF_THIS();
  return NS_OK;
}


/**
 *
 * @update	gess6/18/98
 * @param
 * @return
 */
nsIParserFilter * nsParser::SetParserFilter(nsIParserFilter * aFilter)
{
  nsIParserFilter* old=mParserFilter;
  if(old)
    NS_RELEASE(old);
  if(aFilter) {
    mParserFilter=aFilter;
    NS_ADDREF(aFilter);
  }
  return old;
}

/**
 *  Call this method once you've created a parser, and want to instruct it
 *  about the command which caused the parser to be constructed. For example,
 *  this allows us to select a DTD which can do, say, view-source.
 *
 *  @update  gess 3/25/98
 *  @param   aContentSink -- ptr to content sink that will receive output
 *  @return	 ptr to previously set contentsink (usually null)
 */
void nsParser::SetCommand(const char* aCommand){
  mCommand=aCommand;
}

/**
 *  This method gets called in order to set the content
 *  sink for this parser to dump nodes to.
 *
 *  @update  gess 3/25/98
 *  @param   nsIContentSink interface for node receiver
 *  @return
 */
nsIContentSink* nsParser::SetContentSink(nsIContentSink* aSink) {
  NS_PRECONDITION(0!=aSink,"sink cannot be null!");
  nsIContentSink* old=mSink;
  if(old)
    NS_RELEASE(old);
  if(aSink) {
    mSink=aSink;
    NS_ADDREF(aSink);
  }
  return old;
}

/**
 *  Call this static method when you want to
 *  register your dynamic DTD's with the parser.
 *
 *  @update  gess 6/9/98
 *  @param   aDTD  is the object to be registered.
 *  @return  nothing.
 */
void nsParser::RegisterDTD(nsIDTD* aDTD){

#ifdef rickgdebug
  nsIDTD* rv=0;
  NS_NewRTF_DTD(&rv);
  gSharedParserObjects.RegisterDTD(rv);
  NS_NewWellFormed_DTD(&rv);
  gSharedParserObjects.RegisterDTD(rv);
#endif

  gSharedParserObjects.RegisterDTD(aDTD);
}

/**
 *  Retrieve scanner from topmost parsecontext
 *
 *  @update  gess 6/9/98
 *  @return  ptr to internal scanner
 */
CScanner* nsParser::GetScanner(void){
  if(mParserContext)
    return mParserContext->mScanner;
  return 0;
}


/**
 *  Retrieve parsemode from topmost parser context
 *
 *  @update  gess 6/9/98
 *  @return  parsemode
 */
eParseMode nsParser::GetParseMode(void){
  if(mParserContext)
    return mParserContext->mParseMode;
  return eParseMode_unknown;
}


/**
 *
 *
 *  @update  gess 5/13/98
 *  @param
 *  @return
 */
PRBool FindSuitableDTD( CParserContext& aParserContext,nsString& aCommand) {

    //Let's start by tring the defaultDTD, if one exists...
  if(aParserContext.mDTD && (aParserContext.mDTD->CanParse(aParserContext.mSourceType,aCommand,0)))
    return PR_TRUE;

  PRBool  result=PR_FALSE;

  nsDequeIterator b=gSharedParserObjects.mDTDDeque.Begin();
  nsDequeIterator e=gSharedParserObjects.mDTDDeque.End();

  while(b<e){
    nsIDTD* theDTD=(nsIDTD*)b.GetCurrent();
    if(theDTD) {
      result=theDTD->CanParse(aParserContext.mSourceType,aCommand,0);
      if(result){
        theDTD->CreateNewInstance(&aParserContext.mDTD);
        break;
      }
    }
    b++;
  }

  return result;
}

/**
 * Call this method if you want the known DTD's to try
 * to detect the document type based through analysis
 * of the underlying stream.
 *
 * @update	gess6/22/98
 * @param   aBuffer -- nsString containing sample data to be analyzed.
 * @param   aType -- may hold typename given from netlib; will hold result given by DTD's.
 * @return  auto-detect result: eValid, eInvalid, eUnknown
 */
eAutoDetectResult nsParser::AutoDetectContentType(nsString& aBuffer,nsString& aType) {

    //The process:
    //  You should go out and ask each DTD if they
    //  recognize the content in the scanner.
    //  Somebody should say yes, or we can't continue.

    //This method may change mSourceType and mParserContext->mDTD.
    //It absolutely changes mParserContext->mAutoDetectStatus

  nsDequeIterator b=gSharedParserObjects.mDTDDeque.Begin();
  nsDequeIterator e=gSharedParserObjects.mDTDDeque.End();

  mParserContext->mAutoDetectStatus=eUnknownDetect;
  while(b<e){
    nsIDTD* theDTD=(nsIDTD*)b.GetCurrent();
    if(theDTD) {
      mParserContext->mAutoDetectStatus=theDTD->AutoDetectContentType(aBuffer,aType);
      if(eValidDetect==mParserContext->mAutoDetectStatus)
        break;
    }
    b++;
  }

  return mParserContext->mAutoDetectStatus;
}


/**
 *  This is called (by willBuildModel) when it's time to find out
 *  what mode the parser/DTD should run for this document.
 *  (Each parsercontext can have it's own mode).
 *
 *  @update  gess 5/13/98
 *  @return  parsermode (define in nsIParser.h)
 */
eParseMode DetermineParseMode(nsParser& aParser) {
  const char* theModeStr= PR_GetEnv("PARSE_MODE");
  const char* other="other";

  CScanner* theScanner=aParser.GetScanner();
  if(theScanner){
    nsString& theBuffer=theScanner->GetBuffer();
    PRInt32 theIndex=theBuffer.Find("HTML 4.0");
    if(kNotFound==theIndex)
      theIndex=theBuffer.Find("html 4.0");
    if(kNotFound<theIndex)
      return eParseMode_raptor;
    else {
      PRInt32 theIndex=theBuffer.Find("noquirks");
      if(kNotFound==theIndex)
        theIndex=theBuffer.Find("NOQUIRKS");
      if(kNotFound<theIndex)
        return eParseMode_noquirks;
    }
  }

  if(theModeStr)
    if(0==nsCRT::strcasecmp(other,theModeStr))
      return eParseMode_other;
  return eParseMode_navigator;
}


/**
 * This gets called just prior to the model actually
 * being constructed. It's important to make this the
 * last thing that happens right before parsing, so we
 * can delay until the last moment the resolution of
 * which DTD to use (unless of course we're assigned one).
 *
 * @update	gess5/18/98
 * @param
 * @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::WillBuildModel(nsString& aFilename){

  mMajorIteration=-1;
  mMinorIteration=-1;
  PRInt32 result=kNoError;
  if(mParserContext){
    mParserContext->mParseMode=DetermineParseMode(*this);
    if(PR_TRUE==FindSuitableDTD(*mParserContext,mCommand)) {
      mParserContext->mDTD->SetParser(this);
      mParserContext->mDTD->SetContentSink(mSink);
      mParserContext->mDTD->WillBuildModel(aFilename,PRBool(0==mParserContext->mPrevContext));
    }
  }
  else result=kInvalidParserContext;
  return result;
}

/**
 *
 * @update	gess5/18/98
 * @param
 * @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::DidBuildModel(PRInt32 anErrorCode) {
  //One last thing...close any open containers.
  PRInt32 result=anErrorCode;
  if(mParserContext->mDTD) {
    result=mParserContext->mDTD->DidBuildModel(anErrorCode,PRBool(0==mParserContext->mPrevContext));

    //Now recycle any tokens that are still hanging around.
    //Come to think of it, there really shouldn't be any.
    nsDeque&  theDeque=mParserContext->mTokenDeque;
    nsITokenRecycler* theRecycler=mParserContext->mDTD->GetTokenRecycler();
    if(theRecycler) {
      CToken* theToken=(CToken*)theDeque.Pop();
      while(theToken) {
        theRecycler->RecycleToken(theToken);
        theToken=(CToken*)theDeque.Pop();
      }
    }
  }

  return result;
}


/**
 * This method adds a new parser context to the list,
 * pushing the current one to the next position.
 * @update	gess7/22/98
 * @param   ptr to new context
 * @return  nada
 */
void nsParser::PushContext(CParserContext& aContext) {
  aContext.mPrevContext=mParserContext;
  mParserContext=&aContext;
}

/**
 * This method pops the topmost context off the stack,
 * returning it to the user. The next context  (if any)
 * becomes the current context.
 * @update	gess7/22/98
 * @return  prev. context
 */
CParserContext* nsParser::PopContext() {
  CParserContext* oldContext=mParserContext;
  if(oldContext) {
    mParserContext=oldContext->mPrevContext;
  }
  return oldContext;
}

/**
 *  Call this when you want control whether or not the parser will parse
 *  and tokenize input (TRUE), or whether it just caches input to be
 *  parsed later (FALSE).
 *
 *  @update  gess 9/1/98
 *  @param   aState determines whether we parse/tokenize or just cache.
 *  @return  current state
 */
PRBool nsParser::EnableParser(PRBool aState){
  mParserEnabled=aState;
  return mParserEnabled;
}


/**
 *  This is the main controlling routine in the parsing process.
 *  Note that it may get called multiple times for the same scanner,
 *  since this is a pushed based system, and all the tokens may
 *  not have been consumed by the scanner during a given invocation
 *  of this method.
 *
 *  NOTE: We don't call willbuildmodel here, because it will happen
 *        as a result of calling OnStartBinding later on.
 *
 *  @update  gess 3/25/98
 *  @param   aFilename -- const char* containing file to be parsed.
 *  @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
  NS_PRECONDITION(0!=aURL,kNullURL);

  PRInt32 status=kBadURL;
  mDTDVerification=aVerifyEnabled;

  if(aURL) {
    nsAutoString theName(aURL->GetSpec());
    CParserContext* cp=new CParserContext(new CScanner(theName,PR_FALSE),aURL,aListener);
    PushContext(*cp);
    status=NS_OK;
  }
  return status;
}


/**
 * Cause parser to parse input from given stream
 * @update	gess5/11/98
 * @param   aStream is the i/o source
 * @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::Parse(fstream& aStream,PRBool aVerifyEnabled){

  PRInt32 status=kNoError;
  mDTDVerification=aVerifyEnabled;

  //ok, time to create our tokenizer and begin the process
  CParserContext* pc=new CParserContext(new CScanner(kUnknownFilename,aStream,PR_FALSE),&aStream,0);
  PushContext(*pc);
  pc->mSourceType="text/html";
  mParserContext->mScanner->Eof();
  if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),
                                         mParserContext->mSourceType)) {
    WillBuildModel(mParserContext->mScanner->GetFilename());
    status=ResumeParse();
    DidBuildModel(status);
  } //if

  pc=PopContext();
  delete pc;

  return status;
}


/**
 * Call this method if all you want to do is parse 1 string full of HTML text.
 * In particular, this method should be called by the DOM when it has an HTML
 * string to feed to the parser in real-time.
 *
 * @update	gess5/11/98
 * @param   aSourceBuffer contains a string-full of real content
 * @param   anHTMLString tells us whether we should assume the content is HTML (usually true)
 * @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::Parse(nsString& aSourceBuffer,PRBool anHTMLString,PRBool aVerifyEnabled){
  PRInt32 result=kNoError;
  mDTDVerification=aVerifyEnabled;

  CParserContext* pc=new CParserContext(new CScanner(aSourceBuffer),&aSourceBuffer,0);

  PushContext(*pc);
  if(PR_TRUE==anHTMLString)
    pc->mSourceType="text/html";
  if(eValidDetect==AutoDetectContentType(aSourceBuffer,mParserContext->mSourceType)) {
    WillBuildModel(mParserContext->mScanner->GetFilename());
    result=ResumeParse();
    DidBuildModel(result);
  }
  pc=PopContext();
  delete pc;
  return result;
}


/**
 *  This routine is called to cause the parser to continue
 *  parsing it's underling stream. This call allows the
 *  parse process to happen in chunks, such as when the
 *  content is push based, and we need to parse in pieces.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::ResumeParse() {
  PRInt32 result=kNoError;

  mParserContext->mDTD->WillResumeParse();
  if(kNoError==result) {
    result=Tokenize();
    BuildModel();
    if(kInterrupted==result)
      mParserContext->mDTD->WillInterruptParse();
  }
  return result;
}

/**
 *  This is where we loop over the tokens created in the
 *  tokenization phase, and try to make sense out of them.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::BuildModel() {

  nsDequeIterator e=mParserContext->mTokenDeque.End();
  nsDequeIterator theMarkPos(e);

  if(!mParserContext->mCurrentPos)
    mParserContext->mCurrentPos=new nsDequeIterator(mParserContext->mTokenDeque.Begin());

    //Get the root DTD for use in model building...
  CParserContext* theRootContext=mParserContext;
  while(theRootContext->mPrevContext)
    theRootContext=theRootContext->mPrevContext;

  nsIDTD* theRootDTD=theRootContext->mDTD;

  PRInt32 result=kNoError;
  while((kNoError==result) && ((*mParserContext->mCurrentPos<e))){
    mMinorIteration++;
    CToken* theToken=(CToken*)mParserContext->mCurrentPos->GetCurrent();
    theMarkPos=*mParserContext->mCurrentPos;
    ++(*mParserContext->mCurrentPos);
    result=theRootDTD->HandleToken(theToken);
    if(mDTDVerification)
      theRootDTD->Verify(kEmptyString);
  }

    //Now it's time to recycle our used tokens.
    //The current context has a deque full of them,
    //and the ones that preceed currentpos are no
    //longer needed. Let's recycle them.
  nsITokenRecycler* theRecycler=theRootDTD->GetTokenRecycler();
  if(theRecycler) {
    nsDeque&  theDeque=mParserContext->mTokenDeque;
    CToken* theCurrentToken=(CToken*)mParserContext->mCurrentPos->GetCurrent();
    for(;;) {
      CToken* theToken=(CToken*)theDeque.Peek();
      if(theToken && (theToken!=theCurrentToken)){
        theDeque.Pop();
        theRecycler->RecycleToken(theToken);
      }
      else break;
    }
    mParserContext->mCurrentPos->First();
  }

  return result;
}


/**
 * This method provides access to the topmost token in the tokenDeque.
 * The token is not really removed from the list.
 * @update	gess8/2/98
 * @return  ptr to token
 */
CToken* nsParser::PeekToken() {
  CToken* theToken=(CToken*)mParserContext->mCurrentPos->GetCurrent();
  return theToken;
}


/**
 * This method provides access to the topmost token in the tokenDeque.
 * The token is really removed from the list; if the list is empty we return 0.
 * @update	gess8/2/98
 * @return  ptr to token or NULL
 */
CToken* nsParser::PopToken() {
  CToken* theToken=(CToken*)mParserContext->mCurrentPos->GetCurrent();
  ++(*mParserContext->mCurrentPos);
  return theToken;
}


/**
 *
 * @update	gess8/2/98
 * @param
 * @return
 */
CToken* nsParser::PushToken(CToken* theToken) {
  mParserContext->mTokenDeque.Push(theToken);
	return theToken;
}

/*******************************************************************
  These methods are used to talk to the netlib system...
 *******************************************************************/

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return  error code -- 0 if ok, non-zero if error.
 */
nsresult nsParser::GetBindInfo(nsIURL* aURL){
  nsresult result=0;
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return  error code -- 0 if ok, non-zero if error.
 */
nsresult
nsParser::OnProgress(nsIURL* aURL, PRInt32 aProgress, PRInt32 aProgressMax)
{
  nsresult result=0;
  if (nsnull != mObserver) {
    mObserver->OnProgress(aURL, aProgress, aProgressMax);
  }
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return  error code -- 0 if ok, non-zero if error.
 */
nsresult
nsParser::OnStatus(nsIURL* aURL, const nsString &aMsg)
{
  nsresult result=0;
  if (nsnull != mObserver) {
    mObserver->OnStatus(aURL, aMsg);
  }
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return  error code -- 0 if ok, non-zero if error.
 */
nsresult nsParser::OnStartBinding(nsIURL* aURL, const char *aSourceType){
  NS_PRECONDITION((eNone==mStreamListenerState),kBadListenerInit);

  if (nsnull != mObserver) {
    mObserver->OnStartBinding(aURL, aSourceType);
  }
  mStreamListenerState=eOnStart;
  mParserContext->mAutoDetectStatus=eUnknownDetect;
  mParserContext->mDTD=0;
  mParserContext->mSourceType=aSourceType;
  return kNoError;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param   pIStream contains the input chars
 *  @param   length is the number of bytes waiting input
 *  @return  error code (usually 0)
 */
nsresult nsParser::OnDataAvailable(nsIURL* aURL, nsIInputStream *pIStream, PRInt32 length){
/*  if (nsnull != mListener) {
      //Rick potts removed this.
      //Does it need to be here?
    mListener->OnDataAvailable(pIStream, length);
  }
*/
  NS_PRECONDITION(((eOnStart==mStreamListenerState)||(eOnDataAvail==mStreamListenerState)),kOnStartNotCalled);

  mStreamListenerState=eOnDataAvail;
  if(eInvalidDetect==mParserContext->mAutoDetectStatus) {
    if(mParserContext->mScanner) {
      mParserContext->mScanner->GetBuffer().Truncate();
    }
  }

  int len=1; //init to a non-zero value

  if(!mParserContext->mTransferBuffer)
    mParserContext->mTransferBuffer = new char[CParserContext::eTransferBufferSize+1];

  while (len > 0) {
    nsresult rv = pIStream->Read(mParserContext->mTransferBuffer, 0,
                                 mParserContext->eTransferBufferSize, &len);
    if((rv == NS_OK) && (len>0)) {
      if(mParserFilter)
         mParserFilter->RawBuffer(mParserContext->mTransferBuffer, &len);

      // XXX kipp was here: this is a temporary piece of code that
      // fixes up the data in the transfer buffer so that the 8 bit
      // ascii is mapped to ucs2 properly. The problem is that for the
      // default character set, some web pages use illegal codes (0x80
      // to 0x9f, inclusive); we already have code to map entities
      // properly in this range. This code maps raw stream data the
      // same way.
      PRUnichar buf[CParserContext::eTransferBufferSize];
      PRUnichar* dst = buf;
      const PRUnichar* table = gToUCS2;
      const char* src = mParserContext->mTransferBuffer;
      const char* end = src + len;
      while (src < end) {
        unsigned char ch = *(unsigned char*)src;
        *dst++ = table[ch];
        src++;
      }

      mParserContext->mScanner->Append(buf, len);

      if(eUnknownDetect==mParserContext->mAutoDetectStatus) {
        if(eValidDetect==AutoDetectContentType(mParserContext->mScanner->GetBuffer(),mParserContext->mSourceType)) {
          WillBuildModel(mParserContext->mScanner->GetFilename());
        } //if
      }
    } //if
  }

  return ResumeParse();
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return
 */
nsresult nsParser::OnStopBinding(nsIURL* aURL, PRInt32 status, const nsString& aMsg){
  mStreamListenerState=eOnStop;
  nsresult result=DidBuildModel(status);
  if (nsnull != mObserver) {
    mObserver->OnStopBinding(aURL, status, aMsg);
  }
  return result;
}


/*******************************************************************
  Here comes the tokenization methods...
 *******************************************************************/


/**
 *  Part of the code sandwich, this gets called right before
 *  the tokenization process begins. The main reason for
 *  this call is to allow the delegate to do initialization.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  TRUE if it's ok to proceed
 */
PRBool nsParser::WillTokenize(){
  PRBool result=PR_TRUE;
  return result;
}


/**
 *  This is the primary control routine to consume tokens.
 *	It iteratively consumes tokens until an error occurs or
 *	you run out of data.
 *
 *  @update  gess 3/25/98
 *  @return  error code -- 0 if ok, non-zero if error.
 */
PRInt32 nsParser::Tokenize(){
  CToken* theToken=0;
  PRInt32 result=kNoError;

  ++mMajorIteration;

  WillTokenize();
  while(kNoError==result) {
    mParserContext->mScanner->Mark();
    result=mParserContext->mDTD->ConsumeToken(theToken);
    if(kNoError==result) {
      if(theToken) {

  #ifdef VERBOSE_DEBUG
          theToken->DebugDumpToken(cout);
  #endif
        mParserContext->mTokenDeque.Push(theToken);
      }

    }
    else {
      if(theToken)
        delete theToken;
      mParserContext->mScanner->RewindToMark();
    }
  }
  if(kProcessComplete==result)
    result=NS_OK;
  DidTokenize();
  return result;
}

/**
 *  This is the tail-end of the code sandwich for the
 *  tokenization process. It gets called once tokenziation
 *  has completed.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  TRUE if all went well
 */
PRBool nsParser::DidTokenize(){
  PRBool result=PR_TRUE;
  return result;
}

/**
 *  This debug routine is used to cause the tokenizer to
 *  iterate its token list, asking each token to dump its
 *  contents to the given output stream.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
void nsParser::DebugDumpTokens(ostream& out) {
  nsDequeIterator b=mParserContext->mTokenDeque.Begin();
  nsDequeIterator e=mParserContext->mTokenDeque.End();

  CToken* theToken;
  while(b!=e) {
    theToken=(CToken*)(b++);
    theToken->DebugDumpToken(out);
  }
}


/**
 *  This debug routine is used to cause the tokenizer to
 *  iterate its token list, asking each token to dump its
 *  contents to the given output stream.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
void nsParser::DebugDumpSource(ostream& out) {
  nsDequeIterator b=mParserContext->mTokenDeque.Begin();
  nsDequeIterator e=mParserContext->mTokenDeque.End();

  CToken* theToken;
  while(b!=e) {
    theToken=(CToken*)(b++);
    theToken->DebugDumpSource(out);
  }

}