Mozilla/mozilla/parser/htmlparser/src/nsHTMLParser.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */

//#define __INCREMENTAL 1

#include "nsHTMLParser.h"
#include "nsHTMLContentSink.h"
#include "nsTokenizer.h"
#include "nsHTMLTokens.h"
#include "nsString.h"
#include "nsIURL.h"
#include "nsCRT.h"
#include "COtherDelegate.h"
#include "COtherDTD.h"
#include "CNavDelegate.h"
#include "CNavDTD.h"
#include "prenv.h"  //this is here for debug reasons...
#include "plstr.h"
#include <fstream.h>
#include "nsIInputStream.h"
#ifdef XP_PC
#include <direct.h> //this is here for debug reasons...
#endif

static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);

static const char* kNullURL = "Error: Null URL given";
static const char* kNullFilename= "Error: Null filename given";
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
static const char* kNullToken = "Error: Null token given";
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";

static char*  gVerificationOutputDir=0;
static int    rickGDebug=0;
static const int gTransferBufferSize=4096;  //size of the buffer used in moving data from iistream


/**
 *  This method is defined in nsIParser. It is used to
 *  cause the COM-like construction of an nsHTMLParser.
 *
 *  @update  gess 3/25/98
 *  @param   nsIParser** ptr to newly instantiated parser
 *  @return  NS_xxx error result
 */
NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult)
{
  nsHTMLParser *it = new nsHTMLParser();

  if (it == 0) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  return it->QueryInterface(kClassIID, (void **) aInstancePtrResult);
}


/**
 *  This big dispatch method is used to route token handler calls to the right place.
 *  What's wrong with it? This table, and the dispatch methods themselves need to be
 *  moved over to the delegate. Ah, so much to do...
 *
 *  @update  gess 5/21/98
 *  @param   aType
 *  @param   aToken
 *  @param   aParser
 *  @return
 */
PRInt32 DispatchTokenHandler(eHTMLTokenTypes aType,CToken* aToken,nsHTMLParser* aParser){
  PRInt32 result=0;

  if(aToken && aParser) {
    switch(aType) {
      case eToken_start:
        result=aParser->HandleStartToken(aToken); break;
      case eToken_end:
        result=aParser->HandleEndToken(aToken); break;
      case eToken_comment:
        result=aParser->HandleCommentToken(aToken); break;
      case eToken_entity:
        result=aParser->HandleEntityToken(aToken); break;
      case eToken_whitespace:
        result=aParser->HandleStartToken(aToken); break;
      case eToken_newline:
        result=aParser->HandleStartToken(aToken); break;
      case eToken_text:
        result=aParser->HandleStartToken(aToken); break;
      case eToken_attribute:
        result=aParser->HandleAttributeToken(aToken); break;
      case eToken_style:
        result=aParser->HandleStyleToken(aToken); break;
      case eToken_skippedcontent:
        result=aParser->HandleSkippedContentToken(aToken); break;
      default:
        result=0;
    }//switch
  }//if
  return result;
}

/**
 *  init the set of default token handlers...
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
void nsHTMLParser::InitializeDefaultTokenHandlers() {
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_start));

  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_end));
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_comment));
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_entity));

  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_whitespace));
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_newline));
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_text));

  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_attribute));
//  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_script));
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_style));
  AddTokenHandler(new CTokenHandler(DispatchTokenHandler,eToken_skippedcontent));
}

/**
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
nsHTMLParser::nsHTMLParser() {
  NS_INIT_REFCNT();
  mTransferBuffer=0;
  mSink=0;
  mContextStackPos=0;
  mCurrentPos=0;
  mMarkPos=0;
  mParseMode=eParseMode_unknown;
  nsCRT::zero(mContextStack,sizeof(mContextStack));
  nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
  mDTD=0;
  mHasOpenForm=PR_FALSE;
  InitializeDefaultTokenHandlers();
  gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
}


/**
 *  Default destructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
nsHTMLParser::~nsHTMLParser() {
  DeleteTokenHandlers();
  if(mTransferBuffer)
    delete [] mTransferBuffer;
  mTransferBuffer=0;
  NS_RELEASE(mSink);
  if(mCurrentPos)
    delete mCurrentPos;
  mCurrentPos=0;
  if(mTokenizer)
    delete mTokenizer;
  if(mDTD)
    delete mDTD;
  mTokenizer=0;
  mDTD=0;
}


NS_IMPL_ADDREF(nsHTMLParser)
NS_IMPL_RELEASE(nsHTMLParser)
//NS_IMPL_ISUPPORTS(nsHTMLParser,NS_IHTML_PARSER_IID)


/**
 *  This method gets called as part of our COM-like interfaces.
 *  Its purpose is to create an interface to parser object
 *  of some type.
 *
 *  @update   gess 3/25/98
 *  @param    nsIID  id of object to discover
 *  @param    aInstancePtr ptr to newly discovered interface
 *  @return   NS_xxx result code
 */
nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
  if (NULL == aInstancePtr) {
    return NS_ERROR_NULL_POINTER;
  }

  if(aIID.Equals(kISupportsIID))    {  //do IUnknown...
    *aInstancePtr = (nsIParser*)(this);
  }
  else if(aIID.Equals(kIParserIID)) {  //do IParser base class...
    *aInstancePtr = (nsIParser*)(this);
  }
  else if(aIID.Equals(kClassIID)) {  //do this class...
    *aInstancePtr = (nsHTMLParser*)(this);
  }
  else {
    *aInstancePtr=0;
    return NS_NOINTERFACE;
  }
  ((nsISupports*) *aInstancePtr)->AddRef();
  return NS_OK;
}

/**
 *  This method allows the caller to determine if a form
 *  element is currently open.
 *
 *  @update  gess 4/2/98
 *  @param
 *  @return
 */
PRBool nsHTMLParser::HasOpenContainer(PRInt32 aContainer) const {
  PRBool result=PR_FALSE;

  switch((eHTMLTags)aContainer) {
    case eHTMLTag_form:
      result=mHasOpenForm; break;

    default:
      result=(kNotFound!=GetTopmostIndex((eHTMLTags)aContainer)); break;
  }
  return result;
}

/**
 *  This method retrieves the HTMLTag type of the topmost
 *  container on the stack.
 *
 *  @update  gess 4/2/98
 *  @param
 *  @return
 */
eHTMLTags nsHTMLParser::GetTopNode() const {
  if(mContextStackPos)
    return (eHTMLTags)mContextStack[mContextStackPos-1];
  return eHTMLTag_unknown;
}


/**
 *  Determine whether the given tag is open anywhere
 *  in our context stack.
 *
 *  @update  gess 4/2/98
 *  @param   eHTMLTags tag to be searched for in stack
 *  @return  topmost index of tag on stack
 */
PRInt32 nsHTMLParser::GetTopmostIndex(eHTMLTags aTag) const {
  int i=0;
  for(i=mContextStackPos-1;i>=0;i--){
    if(mContextStack[i]==aTag)
      return i;
  }
  return kNotFound;
}


/**
 *  Destroy the list of token handlers
 *
 *  @update  gess 4/2/98
 *  @return  this
 */
nsHTMLParser& nsHTMLParser::DeleteTokenHandlers(void) {
  int i=0;
  for(i=eToken_unknown;i<eToken_last;i++){
    delete mTokenHandlers[i];
    mTokenHandlers[i]=0;
  }
  return *this;
}


/**
 *  Finds a tag handler for the given tag type.
 *
 *  @update  gess 4/2/98
 *  @param   aTagType type of tag to be handled
 *  @return  valid tag handler (if found) or null
 */
CTokenHandler* nsHTMLParser::GetTokenHandler(eHTMLTokenTypes aType) const {
  CTokenHandler* result=0;
  if((aType>0) && (aType<eToken_last)) {
    result=mTokenHandlers[aType];
  }
  else {
  }
  return result;
}


/**
 *  Register a handler.
 *
 *  @update  gess 4/2/98
 *  @param
 *  @return
 */
CTokenHandler* nsHTMLParser::AddTokenHandler(CTokenHandler* aHandler) {
  NS_ASSERTION(0!=aHandler,"Error: Null handler argument");

  if(aHandler)  {
    eHTMLTokenTypes type=aHandler->GetTokenType();
    if(type<eToken_last) {
      CTokenHandler* old=mTokenHandlers[type];
      mTokenHandlers[type]=aHandler;
    }
    else {
      //add code here to handle dynamic tokens...
    }
  }
  return 0;
}

/**
 *  This method gets called in order to set the content
 *  sink for this parser to dump nodes to.
 *
 *  @update  gess 3/25/98
 *  @param   nsIContentSink interface for node receiver
 *  @return
 */
nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) {
  NS_PRECONDITION(0!=aSink,"sink cannot be null!");
  nsIContentSink* old=mSink;
  if(old)
    NS_RELEASE(old);
  if(aSink) {
    mSink=(nsHTMLContentSink*)(aSink);
    NS_ADDREF(aSink);
  }
  return old;
}


/**
 * This debug method allows us to determine whether or not
 * we've seen (and can handle) the given context vector.
 *
 * @update  gess4/22/98
 * @param   tags is an array of eHTMLTags
 * @param   count represents the number of items in the tags array
 * @param   aDTD is the DTD we plan to ask for verification
 * @return  TRUE if we know how to handle it, else false
 */
PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) {

  PRBool  result=PR_TRUE;

  if(0!=gVerificationOutputDir) {

    if(aDTD){

#ifdef XP_PC
      char    path[_MAX_PATH+1];
      strcpy(path,gVerificationOutputDir);
#endif

      int i=0;
      for(i=0;i<count;i++){

#ifdef NS_WIN32
        strcat(path,"/");
        const char* name=GetTagName(aTags[i]);
        strcat(path,name);
        mkdir(path);
#endif
      }
      //ok, now see if we understand this vector
      result=aDTD->VerifyContextVector(aTags,count);
    }
    if(PR_FALSE==result){
      //add debugging code here to record the fact that we just encountered
      //a context vector we don't know how to handle.
    }
  }

  return result;
}


/**
 *  This is where we loop over the tokens created in the
 *  tokenization phase, and try to make sense out of them.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
 */
PRInt32 nsHTMLParser::IterateTokens() {
  nsDeque& deque=mTokenizer->GetDeque();
  nsDequeIterator e=deque.End();
  nsDequeIterator theMarkPos(e);

  if(!mCurrentPos)
    mCurrentPos=new nsDequeIterator(deque.Begin());

  PRInt32 result=kNoError;

  while((kNoError==result) && ((*mCurrentPos<e))){

    CToken* theToken=(CToken*)mCurrentPos->GetCurrent();

    eHTMLTokenTypes type=eHTMLTokenTypes(theToken->GetTokenType());
    CTokenHandler* aHandler=GetTokenHandler(type);

    if(aHandler) {
      theMarkPos=*mCurrentPos;
      result=(*aHandler)(theToken,this);
      VerifyContextVector(mContextStack,mContextStackPos,mDTD);
    }
    ++(*mCurrentPos);
  }

  if(kInterrupted==result)
    *mCurrentPos=theMarkPos;

  return result;
}


/**
 *
 *
 *  @update  gess 5/13/98
 *  @param
 *  @return
 */
eParseMode DetermineParseMode() {
  const char* theModeStr= PR_GetEnv("PARSE_MODE");
  const char* other="other";
  eParseMode  result=eParseMode_navigator;

  if(theModeStr)
    if(0==nsCRT::strcasecmp(other,theModeStr))
      result=eParseMode_other;
  return result;
}


/**
 *
 *
 *  @update  gess 5/13/98
 *  @param
 *  @return
 */
void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& aDTD) {
  switch(aMode) {
    case eParseMode_navigator:
      aDelegate=new CNavDelegate(); break;
    case eParseMode_other:
      aDelegate=new COtherDelegate(); break;
    default:
      break;
  }
  if(aDelegate)
    aDTD=aDelegate->GetDTD();
}


/**
 *
 * @update	gess5/18/98
 * @param
 * @return
 */
PRInt32 nsHTMLParser::WillBuildModel(void) {
  mIteration=-1;
  if(mSink)
    mSink->WillBuildModel();
  return kNoError;
}

/**
 *
 * @update	gess5/18/98
 * @param
 * @return
 */
PRInt32 nsHTMLParser::DidBuildModel(PRInt32 anErrorCode) {
  //One last thing...close any open containers.
  if((kNoError==anErrorCode) && (mContextStackPos>0)) {
    CloseContainersTo(0);
  }
  if(mSink) {
    mSink->DidBuildModel();
  }
  return anErrorCode;
}

/**
 *  This DEBUG ONLY method is used to simulate a network-based
 *  i/o model where data comes in incrementally.
 *
 *  @update  gess 5/13/98
 *  @param   aFilename is the name of the disk file to use for testing.
 *  @return  error code (kNoError means ok)
 */
PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
  PRInt32   result=kBadFilename;
  fstream*  mFileStream;
  nsString  theBuffer;
  const int kLocalBufSize=10;

  mIteration=-1;
#if defined(XP_UNIX) && defined(IRIX)
  /* XXX: IRIX does not support ios::binary */
  mFileStream=new fstream(aFilename,ios::in);
#else
  mFileStream=new fstream(aFilename,ios::in|ios::binary);
#endif
  if(mFileStream) {
    result=kNoError;
    while((kNoError==result) || (kInterrupted==result)) {
      //read some data from the file...

      char buf[kLocalBufSize];
      buf[kLocalBufSize]=0;

      if(mFileStream) {
        mFileStream->read(buf,kLocalBufSize);
        PRInt32 numread=mFileStream->gcount();
        if(numread>0) {
          buf[numread]=0;
          theBuffer.Truncate();
          theBuffer.Append(buf);
          mTokenizer->Append(theBuffer);
          result=ResumeParse();
        }
        else break;
      }

    }
    mFileStream->close();
    delete mFileStream;
  }
  return result;
}

/**
 *  This is the main controlling routine in the parsing process.
 *  Note that it may get called multiple times for the same scanner,
 *  since this is a pushed based system, and all the tokens may
 *  not have been consumed by the scanner during a given invocation
 *  of this method.
 *
 *  @update  gess 3/25/98
 *  @param   aFilename -- const char* containing file to be parsed.
 *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
 */
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
  NS_PRECONDITION(0!=aFilename,kNullFilename);

  PRInt32 status=kBadFilename;

  mIncremental=aIncremental;
  mParseMode=DetermineParseMode();

  if(aFilename) {

    GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
    if(mDelegate) {

      if(mDTD)
        mDTD->SetParser(this);

      WillBuildModel();

      //ok, time to create our tokenizer and begin the process
      if(aIncremental) {
        mTokenizer=new CTokenizer(mDelegate,mParseMode);
        status=ParseFileIncrementally(aFilename);
      }
      else {
        //ok, time to create our tokenizer and begin the process
        mTokenizer=new CTokenizer(aFilename,mDelegate,mParseMode);
        status=ResumeParse();
      }
      DidBuildModel(status);
    }//if
  }
  return status;
}

/**
 *  This is the main controlling routine in the parsing process.
 *  Note that it may get called multiple times for the same scanner,
 *  since this is a pushed based system, and all the tokens may
 *  not have been consumed by the scanner during a given invocation
 *  of this method.
 *
 *  @update  gess 3/25/98
 *  @param   aFilename -- const char* containing file to be parsed.
 *  @return  PR_TRUE if parse succeeded, PR_FALSE otherwise.
 */
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,PRBool aIncremental ){
  NS_PRECONDITION(0!=aURL,kNullURL);

  PRInt32 status=kBadURL;

  if(rickGDebug)
    return Parse("c:/temp/temp.html",PR_TRUE);

  mIncremental=aIncremental;
  mParseMode=DetermineParseMode();

  if(aURL) {

    GetDelegateAndDTD(mParseMode,mDelegate,mDTD);
    if(mDelegate) {

      if(mDTD)
        mDTD->SetParser(this);

      WillBuildModel();

      //ok, time to create our tokenizer and begin the process
      if(mIncremental) {
        mTokenizer=new CTokenizer(mDelegate,mParseMode);
        status=aURL->Open(this);
      }
      else {
        mTokenizer=new CTokenizer(aURL,mDelegate,mParseMode);
        WillBuildModel();
        status=ResumeParse();
        DidBuildModel(status);
      }
    }//if
  }
  return status;
}

/**
 * Call this method if all you want to do is parse 1 string full of HTML text.
 *
 * @update	gess5/11/98
 * @param   anHTMLString contains a string-full of real HTML
 * @param   appendTokens tells us whether we should insert tokens inline, or append them.
 * @return  TRUE if all went well -- FALSE otherwise
 */
PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
  PRInt32 result=kNoError;

  WillBuildModel();
  mTokenizer->Append(aSourceBuffer);
  result=ResumeParse();
  DidBuildModel(result);

  return result;
}

/**
 *  This routine is called to cause the parser to continue
 *  parsing it's underling stream. This call allows the
 *  parse process to happen in chunks, such as when the
 *  content is push based, and we need to parse in pieces.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  PR_TRUE if parsing concluded successfully.
 */
PRInt32 nsHTMLParser::ResumeParse() {
  PRInt32 result=kNoError;

  mSink->WillResume();
  if(kNoError==result) {
    result=mTokenizer->Tokenize(++mIteration);
    if(kInterrupted==result)
      mSink->WillInterrupt();
    IterateTokens();
  }
  return result;
}

/**
 *
 * @update  gess4/22/98
 * @param
 * @return
 */
PRInt32 nsHTMLParser::GetStack(PRInt32* aStackPtr) {
  aStackPtr=&mContextStack[0];
  return mContextStackPos;
}


/**
 * Retrieve the attributes for this node, and add then into
 * the node.
 *
 * @update  gess4/22/98
 * @param   aNode is the node you want to collect attributes for
 * @param   aCount is the # of attributes you're expecting
 * @return error code (should be 0)
 */
PRInt32 nsHTMLParser::CollectAttributes(nsCParserNode& aNode,PRInt32 aCount){
  nsDeque&        deque=mTokenizer->GetDeque();
  nsDequeIterator end=deque.End();

  int attr=0;
  for(attr=0;attr<aCount;attr++) {
    if(*mCurrentPos<end) {
      CHTMLToken* tkn=(CHTMLToken*)(++(*mCurrentPos));
      if(tkn){
        if(eToken_attribute==eHTMLTokenTypes(tkn->GetTokenType())){
          aNode.AddAttribute(tkn);
        }
        else (*mCurrentPos)--;
      }
      else kInterrupted;
    }
    else return kInterrupted;
  }
  return kNoError;
}


/**
 *
 * @update  gess4/22/98
 * @param
 * @return
 */
PRInt32 nsHTMLParser::CollectSkippedContent(nsCParserNode& aNode){
  eHTMLTokenTypes subtype=eToken_attribute;
  nsDeque&         deque=mTokenizer->GetDeque();
  nsDequeIterator  end=deque.End();
  PRInt32         count=0;

  while((*mCurrentPos!=end) && (eToken_attribute==subtype)) {
    CHTMLToken* tkn=(CHTMLToken*)(++(*mCurrentPos));
    subtype=eHTMLTokenTypes(tkn->GetTokenType());
    if(eToken_skippedcontent==subtype) {
      aNode.SetSkippedContent(tkn);
      count++;
    }
    else (*mCurrentPos)--;
  }
  return count;
}


/**
 *  This method gets called when a start token has been
 *  encountered in the parse process. If the current container
 *  can contain this tag, then add it. Otherwise, you have
 *  two choices: 1) create an implicit container for this tag
 *                  to be stored in
 *               2) close the top container, and add this to
 *                  whatever container ends up on top.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @param   aNode -- CParserNode representing this start token
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsCParserNode& aNode) {
  NS_PRECONDITION(0!=aToken,kNullToken);

  eHTMLTags parentTag=(eHTMLTags)GetTopNode();
  PRInt32   result=kNoError;
  PRBool    contains=mDTD->CanContain(parentTag,aChildTag);

  if(PR_FALSE==contains){
    result=CreateContextStackFor(aChildTag);
    if(PR_FALSE==result) {
      //if you're here, then the new topmost container can't contain aToken.
      //You must determine what container hierarchy you need to hold aToken,
      //and create that on the parsestack.
      result=ReduceContextStackFor(aChildTag);
      if(PR_FALSE==mDTD->CanContain(GetTopNode(),aChildTag)) {
        //we unwound too far; now we have to recreate a valid context stack.
        result=CreateContextStackFor(aChildTag);
      }
    }
  }

  if(mDTD->IsContainer(aChildTag)){
    result=OpenContainer(aNode);
  }
  else {
    result=AddLeaf(aNode);
  }
  return result;
}

/**
 *  This method gets called when a start token has been
 *  encountered in the parse process. If the current container
 *  can contain this tag, then add it. Otherwise, you have
 *  two choices: 1) create an implicit container for this tag
 *                  to be stored in
 *               2) close the top container, and add this to
 *                  whatever container ends up on top.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @param   aNode -- CParserNode representing this start token
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleStartToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);

  CStartToken*  st= (CStartToken*)(aToken);
  eHTMLTags     tokenTagType=st->GetHTMLTag();

  //Begin by gathering up attributes...
  nsCParserNode attrNode((CHTMLToken*)aToken);
  PRInt16       attrCount=aToken->GetAttributeCount();
  PRInt32       result=(0==attrCount) ? kNoError : CollectAttributes(attrNode,attrCount);

  if(kNoError==result) {
      //now check to see if this token should be omitted...
    if(PR_FALSE==mDTD->CanOmit(GetTopNode(),tokenTagType)) {

      switch(tokenTagType) {

        case eHTMLTag_html:
          result=OpenHTML(attrNode); break;

        case eHTMLTag_title:
          {
            nsCParserNode theNode(st);
            result=OpenHead(theNode); //open the head...
            if(kNoError==result) {
              CollectSkippedContent(attrNode);
              mSink->SetTitle(attrNode.GetSkippedContent());
              result=CloseHead(theNode); //close the head...
            }
          }
          break;

        case eHTMLTag_textarea:
          {
            CollectSkippedContent(attrNode);
            result=AddLeaf(attrNode);
          }
          break;

        case eHTMLTag_form:
          result = OpenForm(attrNode);
          break;

        case eHTMLTag_meta:
        case eHTMLTag_link:
          {
            nsCParserNode theNode((CHTMLToken*)aToken);
            result=OpenHead(theNode);
            if(kNoError==result)
              result=AddLeaf(theNode);
            if(kNoError==result)
              result=CloseHead(theNode);
          }
          break;

        case eHTMLTag_style:
          {
            nsCParserNode theNode((CHTMLToken*)aToken);
            result=OpenHead(theNode);
            if(kNoError==result) {
              CollectSkippedContent(attrNode);
              if(kNoError==result) {
                result=AddLeaf(attrNode);
                if(kNoError==result)
                  result=CloseHead(theNode);
              }
            }
          }
          break;

        case eHTMLTag_script:
          result=HandleScriptToken(st); break;


        case eHTMLTag_map:
          // Put map into the head section
          result=OpenHead(attrNode);
          if(kNoError==result)
            result=OpenContainer(attrNode);
          break;

        case eHTMLTag_head:
          break; //ignore head tags...

        default:
          result=HandleDefaultStartToken(aToken,tokenTagType,attrNode);
          break;
      } //switch
    } //if
  } //if
  return result;
}

/**
 *  This method gets called when an end token has been
 *  encountered in the parse process. If the end tag matches
 *  the start tag on the stack, then simply close it. Otherwise,
 *  we have a erroneous state condition. This can be because we
 *  have a close tag with no prior open tag (user error) or because
 *  we screwed something up in the parse process. I'm not sure
 *  yet how to tell the difference.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleEndToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);

  PRInt32     result=kNoError;
  CEndToken*  et = (CEndToken*)(aToken);
  eHTMLTags   tokenTagType=et->GetHTMLTag();

    //now check to see if this token should be omitted...
  if(PR_TRUE==mDTD->CanOmitEndTag(GetTopNode(),tokenTagType)) {
    return result;
  }

  nsCParserNode theNode((CHTMLToken*)aToken);
  switch(tokenTagType) {

    case eHTMLTag_style:
    case eHTMLTag_link:
    case eHTMLTag_meta:
    case eHTMLTag_textarea:
    case eHTMLTag_title:
    case eHTMLTag_head:
    case eHTMLTag_script:
      break;

    case eHTMLTag_map:
      result=CloseContainer(theNode);
      if(kNoError==result)
        result=CloseHead(theNode);
      break;

    case eHTMLTag_form:
      {
        nsCParserNode aNode((CHTMLToken*)aToken);
        result=CloseForm(aNode);
      }
      break;

    default:
      if(mDTD->IsContainer(tokenTagType)){
        result=CloseContainersTo(tokenTagType);
      }
      //
      break;
  }
  return result;
}

/**
 *  This method gets called when an entity token has been
 *  encountered in the parse process.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleEntityToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);
  CEntityToken*  et = (CEntityToken*)(aToken);
  PRInt32 result=kNoError;
  nsCParserNode aNode((CHTMLToken*)aToken);
  result=AddLeaf(aNode);
  return result;
}

/**
 *  This method gets called when a comment token has been
 *  encountered in the parse process. After making sure
 *  we're somewhere in the body, we handle the comment
 *  in the same code that we use for text.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleCommentToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);
  return kNoError;
}

/**
 *  This method gets called when a skippedcontent token has
 *  been encountered in the parse process. After verifying
 *  that the topmost container can contain text, we call
 *  AddLeaf to store this token in the top container.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleSkippedContentToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);

  PRInt32 result=kNoError;

  if(HasOpenContainer(eHTMLTag_body)) {
    nsCParserNode aNode((CHTMLToken*)aToken);
    result=AddLeaf(aNode);
  }
  return result;
}

/**
 *  This method gets called when an attribute token has been
 *  encountered in the parse process. This is an error, since
 *  all attributes should have been accounted for in the prior
 *  start or end tokens
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleAttributeToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);
  NS_ERROR("attribute encountered -- this shouldn't happen!");

  CAttributeToken*  at = (CAttributeToken*)(aToken);
  PRInt32 result=kNoError;
  return result;
}

/**
 *  This method gets called when a script token has been
 *  encountered in the parse process.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleScriptToken(CToken* aToken) {
  NS_PRECONDITION(0!=aToken,kNullToken);

  CScriptToken*  st = (CScriptToken*)(aToken);
  PRInt32 result=kNoError;
  return result;
}

/**
 *  This method gets called when a style token has been
 *  encountered in the parse process.
 *
 *  @update  gess 3/25/98
 *  @param   aToken -- next (start) token to be handled
 *  @return  PR_TRUE if all went well; PR_FALSE if error occured
 */
PRInt32 nsHTMLParser::HandleStyleToken(CToken* aToken){
  NS_PRECONDITION(0!=aToken,kNullToken);

  CStyleToken*  st = (CStyleToken*)(aToken);
  PRInt32 result=kNoError;
  return result;
}


/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 *
 * @update  gess4/22/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::OpenHTML(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos >= 0, kInvalidTagStackPos);

  PRInt32 result=mSink->OpenHTML(aNode);
  mContextStack[mContextStackPos++]=(eHTMLTags)aNode.GetNodeType();
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 *
 * @update  gess4/6/98
 * @param   aNode -- next node to be removed from our model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseHTML(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);
  PRInt32 result=mSink->CloseHTML(aNode);
  mContextStack[--mContextStackPos]=eHTMLTag_unknown;
  return result;
}


/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::OpenHead(const nsIParserNode& aNode){
  mContextStack[mContextStackPos++]=eHTMLTag_head;
  PRInt32 result=mSink->OpenHead(aNode);
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be removed from our model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseHead(const nsIParserNode& aNode){
  PRInt32 result=mSink->CloseHead(aNode);
  mContextStack[--mContextStackPos]=eHTMLTag_unknown;
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::OpenBody(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos >= 0, kInvalidTagStackPos);

  PRInt32 result=kNoError;
  eHTMLTags topTag=(eHTMLTags)nsHTMLParser::GetTopNode();

  if(eHTMLTag_html!=topTag) {

    //ok, there are two cases:
    //  1. Nobody opened the html container
    //  2. Someone left the head (or other) open
    PRInt32 pos=GetTopmostIndex(eHTMLTag_html);
    if(kNotFound!=pos) {
      //if you're here, it means html is open,
      //but some other tag(s) are in the way.
      //So close other tag(s).
      result=CloseContainersTo(pos+1);
    } else {
      //if you're here, it means that there is
      //no HTML tag in document. Let's open it.

      result=CloseContainersTo(0);  //close current stack containers.

      nsAutoString  empty;
      CHTMLToken    token(empty);
      nsCParserNode htmlNode(&token);

      token.SetHTMLTag(eHTMLTag_html);  //open the html container...
      result=OpenHTML(htmlNode);
    }
  }

  if(kNoError==result) {
    result=mSink->OpenBody(aNode);
    mContextStack[mContextStackPos++]=(eHTMLTags)aNode.GetNodeType();
  }
  return result;
}

/**
 * This method does two things: 1st, help close
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be removed from our model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseBody(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos >= 0, kInvalidTagStackPos);
  PRInt32 result=mSink->CloseBody(aNode);
  mContextStack[--mContextStackPos]=eHTMLTag_unknown;
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::OpenForm(const nsIParserNode& aNode){
  if(mHasOpenForm)
    CloseForm(aNode);
  PRInt32 result=mSink->OpenForm(aNode);
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be removed from our model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseForm(const nsIParserNode& aNode){
  PRInt32 result=kNoError;
  if(mHasOpenForm) {
    mHasOpenForm=PR_FALSE;
    result=mSink->CloseForm(aNode);
  }
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::OpenFrameset(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos >= 0, kInvalidTagStackPos);
  PRInt32 result=mSink->OpenFrameset(aNode);
  mContextStack[mContextStackPos++]=(eHTMLTags)aNode.GetNodeType();
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be removed from our model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseFrameset(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);
  PRInt32 result=mSink->CloseFrameset(aNode);
  mContextStack[--mContextStackPos]=eHTMLTag_unknown;
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::OpenContainer(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);
  PRInt32 result=kNoError; //was false

  //XXX Hack! We know this is wrong, but it works
  //for the general case until we get it right.
  switch(aNode.GetNodeType()) {

    case eHTMLTag_html:
      result=OpenHTML(aNode); break;

    case eHTMLTag_body:
      result=OpenBody(aNode); break;

    case eHTMLTag_style:
    case eHTMLTag_textarea:
    case eHTMLTag_head:
    case eHTMLTag_title:
      break;

    case eHTMLTag_form:
      result=OpenForm(aNode); break;

    default:
      result=mSink->OpenContainer(aNode);
      mContextStack[mContextStackPos++]=(eHTMLTags)aNode.GetNodeType();
      break;
  }
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be removed from our model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseContainer(const nsIParserNode& aNode){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);
  PRInt32 result=kNoError; //was false

  //XXX Hack! We know this is wrong, but it works
  //for the general case until we get it right.
  switch(aNode.GetNodeType()) {

    case eHTMLTag_html:
      result=CloseHTML(aNode); break;

    case eHTMLTag_style:
    case eHTMLTag_textarea:
      break;

    case eHTMLTag_head:
      //result=CloseHead(aNode);
      break;

    case eHTMLTag_body:
      result=CloseBody(aNode); break;

    case eHTMLTag_form:
      result=CloseForm(aNode); break;

    case eHTMLTag_title:
    default:
      result=mSink->CloseContainer(aNode);
      mContextStack[--mContextStackPos]=eHTMLTag_unknown;
      break;
  }
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseContainersTo(PRInt32 anIndex){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);
  PRInt32 result=kNoError;

  nsAutoString empty;
  CEndToken aToken(empty);
  nsCParserNode theNode(&aToken);

  if((anIndex<mContextStackPos) && (anIndex>=0)) {
    while(mContextStackPos>anIndex) {
      aToken.SetHTMLTag((eHTMLTags)mContextStack[mContextStackPos-1]);
      result=CloseContainer(theNode);
    }
  }
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseContainersTo(eHTMLTags aTag){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);

  PRInt32 pos=GetTopmostIndex(aTag);
  PRInt32 result=kNoError;

  if(kNotFound!=pos) {
    //the tag is indeed open, so close it.
    result=CloseContainersTo(pos);
  }
  else {
    eHTMLTags theParentTag=(eHTMLTags)mDTD->GetDefaultParentTagFor(aTag);
    pos=GetTopmostIndex(theParentTag);
    if(kNotFound!=pos) {
      //the parent container is open, so close it instead
      result=CloseContainersTo(pos+1);
    }
    else {
      //XXX HACK! This is a real problem -- the unhandled case.!
      result=kUnknownError;
    }
  }
  return result;
}

/**
 * This method causes the topmost container on the stack
 * to be closed.
 * @update  gess4/6/98
 * @see     CloseContainer()
 * @param
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::CloseTopmostContainer(){
  NS_PRECONDITION(mContextStackPos > 0, kInvalidTagStackPos);

  nsAutoString empty;
  CEndToken aToken(empty);
  aToken.SetHTMLTag((eHTMLTags)mContextStack[mContextStackPos-1]);
  nsCParserNode theNode(&aToken);
  PRInt32 result=CloseContainer(theNode);
  return result;
}

/**
 * This method does two things: 1st, help construct
 * our own internal model of the content-stack; and
 * 2nd, pass this message on to the sink.
 * @update  gess4/6/98
 * @param   aNode -- next node to be added to model
 * @return  TRUE if ok, FALSE if error
 */
PRInt32 nsHTMLParser::AddLeaf(const nsIParserNode& aNode){
  PRInt32 result=mSink->AddLeaf(aNode);
  return result;
}

/**
 *  This method gets called to create a valid context stack
 *  for the given child. We compare the current stack to the
 *  default needs of the child, and push new guys onto the
 *  stack until the child can be properly placed.
 *
 *  @update  gess 4/8/98
 *  @param   aChildTag is the child for whom we need to
 *           create a new context vector
 *  @return  true if we succeeded, otherwise false
 */
PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){
  nsAutoString  theVector;

  PRInt32 result=kNoError;
  PRInt32 pos=0;
  PRInt32 cnt=0;
  PRInt32 theTop=GetTopNode();

  if(PR_TRUE==mDTD->ForwardPropagate(theVector,theTop,aChildTag)){
    //add code here to build up context stack based on forward propagated context vector...
    pos=0;
    cnt=theVector.Length()-1;
    if(mContextStack[mContextStackPos-1]==theVector[cnt])
      result=kNoError;
    else result=kContextMismatch;
  }
  else {
    PRBool tempResult;
    if(eHTMLTag_unknown!=theTop) {
      tempResult=mDTD->BackwardPropagate(theVector,theTop,aChildTag);
      if(eHTMLTag_html!=theTop)
        mDTD->BackwardPropagate(theVector,eHTMLTag_html,theTop);
    }
    else tempResult=mDTD->BackwardPropagate(theVector,eHTMLTag_html,aChildTag);

    if(PR_TRUE==tempResult) {

      //propagation worked, so pop unwanted containers, push new ones, then exit...
      pos=0;
      cnt=theVector.Length();
      result=kNoError;
      while(pos<mContextStackPos) {
        if(mContextStack[pos]==theVector[cnt-1-pos]) {
          pos++;
        }
        else {
          //if you're here, you have something on the stack
          //that doesn't match your needed tags order.
          result=CloseContainersTo(pos);
          break;
        }
      } //while
    } //elseif
  } //elseif

    //now, build up the stack according to the tags
    //you have that aren't in the stack...
  if(kNoError==result){
    nsAutoString  empty;
    int i=0;
    for(i=pos;i<cnt;i++) {
      CStartToken* st=new CStartToken(empty);
      st->SetHTMLTag((eHTMLTags)theVector[cnt-1-i]);
      HandleStartToken(st);
    }
  }
  return result;
}


/**
 *  This method gets called to ensure that the context
 *  stack is properly set up for the given child.
 *  We pop containers off the stack (all the way down
 *  html) until we get a container that can contain
 *  the given child.
 *
 *  @update  gess 4/8/98
 *  @param
 *  @return
 */
PRInt32 nsHTMLParser::ReduceContextStackFor(PRInt32 aChildTag){
  PRInt32    result=kNoError;
  eHTMLTags topTag=(eHTMLTags)nsHTMLParser::GetTopNode();

  while( (topTag!=kNotFound) &&
         (PR_FALSE==mDTD->CanContain(topTag,aChildTag)) &&
         (PR_FALSE==mDTD->CanContainIndirect(topTag,aChildTag))) {
    CloseTopmostContainer();
    topTag=(eHTMLTags)nsHTMLParser::GetTopNode();
  }
  return result;
}


/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return
 */
nsresult nsHTMLParser::GetBindInfo(void){
  nsresult result=0;
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return
 */
nsresult nsHTMLParser::OnProgress(PRInt32 Progress, PRInt32 ProgressMax, const char *msg){
  nsresult result=0;
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return
 */
nsresult nsHTMLParser::OnStartBinding(void){
  nsresult result=WillBuildModel();
  if(!mTransferBuffer) {
    mTransferBuffer=new char[gTransferBufferSize+1];
  }
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return
 */
nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){

  int len=0;

  do {
      PRInt32 err;
      len = pIStream->Read(&err, mTransferBuffer, 0, gTransferBufferSize);
      if(len>0) {
        mTransferBuffer[len]=0;
        mTokenizer->Append(mTransferBuffer,len);
      }
  } while (len > 0);

  nsresult result=ResumeParse();
  return result;
}

/**
 *
 *
 *  @update  gess 5/12/98
 *  @param
 *  @return
 */
nsresult nsHTMLParser::OnStopBinding(PRInt32 status, const char *msg){
  nsresult result=DidBuildModel(status);
  return result;
}