Mozilla/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * The contents of this file are subject to the Netscape Public License
 * Version 1.0 (the "NPL"); you may not use this file except in
 * compliance with the NPL.  You may obtain a copy of the NPL at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the NPL is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
 * for the specific language governing rights and limitations under the
 * NPL.
 *
 * The Initial Developer of this code under the NPL is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1998 Netscape Communications Corporation.  All Rights
 * Reserved.
 */

#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsHTMLTokens.h"
#include "nsParserTypes.h"
#include "prtypes.h"
#include "nsDebug.h"
#include "nsHTMLTags.h"
#include "nsHTMLEntities.h"
#include "nsCRT.h"

//#define GESS_MACHINE
#ifdef GESS_MACHINE
#include "nsEntityEx.cpp"
#endif

static nsString     gIdentChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
static nsString     gAttrTextChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-%.");
static nsString     gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
static nsAutoString gDigits("0123456789");
static nsAutoString gWhitespace(" \t\b");
static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:");
static const char*  gUserdefined = "userdefined";
static const char*  gEmpty = "";


const PRInt32 kMAXNAMELEN=10;


/**************************************************************
  And now for the token classes...
 **************************************************************/

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CHTMLToken::CHTMLToken(const nsString& aName,eHTMLTags aTag) : CToken(aName) {
  mTypeID=aTag;
}

/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {

}

/**
 * Setter method that changes the string value of this token
 * @update	gess5/11/98
 * @param   name is a char* value containing new string value
 */
void CHTMLToken::SetStringValue(const char* name){
  if(name) {
    mTextValue=name;
    mTypeID = NS_TagToEnum(name);
  }
}


/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
  mAttributed=PR_FALSE;
  mEmpty=PR_FALSE;
}

/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CStartToken::CStartToken(nsString& aString,eHTMLTags aTag) : CHTMLToken(aString,aTag) {
  mAttributed=PR_FALSE;
  mEmpty=PR_FALSE;
}


/**
 *
 * @update	gess8/4/98
 * @param
 * @return
 */
void CStartToken::Reinitialize(PRInt32 aTag, const nsString& aString){
  CToken::Reinitialize(aTag,aString);
  mAttributed=PR_FALSE;
  mEmpty=PR_FALSE;
}

/*
 *  This method returns the typeid (the tag type) for this token.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CStartToken::GetTypeID(){
  if(eHTMLTag_unknown==mTypeID) {
    nsAutoString tmp(mTextValue);
    tmp.ToUpperCase();
    char cbuf[20];
    tmp.ToCString(cbuf, sizeof(cbuf));
    mTypeID = NS_TagToEnum(cbuf);
    switch(mTypeID) {
      case eHTMLTag_dir:
      case eHTMLTag_menu:
        mTypeID=eHTMLTag_ul;
        break;
      default:
        break;
    }
  }
  return mTypeID;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CStartToken::GetClassName(void) {
  return "start";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CStartToken::GetTokenType(void) {
  return eToken_start;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
void CStartToken::SetAttributed(PRBool aValue) {
  mAttributed=aValue;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRBool CStartToken::IsAttributed(void) {
  return mAttributed;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
void CStartToken::SetEmpty(PRBool aValue) {
  mEmpty=aValue;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRBool CStartToken::IsEmpty(void) {
  return mEmpty;
}

/*
 *  Consume the identifier portion of the start tag
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CStartToken::Consume(PRUnichar aChar, CScanner& aScanner) {

  //if you're here, we've already Consumed the < char, and are
   //ready to Consume the rest of the open tag identifier.
   //Stop consuming as soon as you see a space or a '>'.
   //NOTE: We don't Consume the tag attributes here, nor do we eat the ">"

  mTextValue=aChar;
  nsresult result=aScanner.ReadWhile(mTextValue,gIdentChars,PR_FALSE);
  char buffer[300];
  mTextValue.ToCString(buffer,sizeof(buffer)-1);
  mTypeID = NS_TagToEnum(buffer);

   //Good. Now, let's skip whitespace after the identifier,
   //and see if the next char is ">". If so, we have a complete
   //tag without attributes.
  if(NS_OK==result) {
    result=aScanner.SkipWhitespace();
    if(NS_OK==result) {
      result=aScanner.GetChar(aChar);
      if(NS_OK==result) {
        if(kGreaterThan!=aChar) { //look for '>'
         //push that char back, since we apparently have attributes...
          aScanner.PutBack(aChar);
          mAttributed=PR_TRUE;
        } //if
      } //if
    }//if
  }
  return result;
};


/*
 *  Dump contents of this token to givne output stream
 *
 *  @update  gess 3/25/98
 *  @param   out -- ostream to output content
 *  @return
 */
void CStartToken::DebugDumpSource(ostream& out) {
  char buffer[200];
  mTextValue.ToCString(buffer,sizeof(buffer)-1);
  out << "<" << buffer;
  if(!mAttributed)
    out << ">";
}


/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}


/*
 *  default constructor for end token
 *
 *  @update  gess 3/25/98
 *  @param   aName -- char* containing token name
 *  @return
 */
CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) {
}

/*
 *  Consume the identifier portion of the end tag
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CEndToken::Consume(PRUnichar aChar, CScanner& aScanner) {

  //if you're here, we've already Consumed the <! chars, and are
   //ready to Consume the rest of the open tag identifier.
   //Stop consuming as soon as you see a space or a '>'.
   //NOTE: We don't Consume the tag attributes here, nor do we eat the ">"

  mTextValue="";
  static nsAutoString terminals(">");
  nsresult result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE);

  char buffer[300];
  mTextValue.ToCString(buffer,sizeof(buffer)-1);
  mTypeID= NS_TagToEnum(buffer);

  if(NS_OK==result)
    result=aScanner.GetChar(aChar); //eat the closing '>;
  return result;
};


/*
 *  Asks the token to determine the <i>HTMLTag type</i> of
 *  the token. This turns around and looks up the tag name
 *  in the tag dictionary.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  eHTMLTag id of this endtag
 */
PRInt32 CEndToken::GetTypeID(){
  if(eHTMLTag_unknown==mTypeID) {
    nsAutoString tmp(mTextValue);
    tmp.ToUpperCase();
    char cbuf[200];
    tmp.ToCString(cbuf, sizeof(cbuf));
    mTypeID = NS_TagToEnum(cbuf);
    switch(mTypeID) {
      case eHTMLTag_dir:
      case eHTMLTag_menu:
        mTypeID=eHTMLTag_ul;
        break;
      default:
        break;
    }
  }
  return mTypeID;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CEndToken::GetClassName(void) {
  return "/end";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CEndToken::GetTokenType(void) {
  return eToken_end;
}

/*
 *  Dump contents of this token to givne output stream
 *
 *  @update  gess 3/25/98
 *  @param   out -- ostream to output content
 *  @return
 */
void CEndToken::DebugDumpSource(ostream& out) {
  char buffer[200];
  mTextValue.ToCString(buffer,sizeof(buffer)-1);
  out << "</" << buffer << ">";
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
}


/*
 *  string based constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CTextToken::CTextToken(const nsString& aName) : CHTMLToken(aName) {
  mTypeID=eHTMLTag_text;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CTextToken::GetClassName(void) {
  return "text";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CTextToken::GetTokenType(void) {
  return eToken_text;
}

/*
 *  Consume as much clear text from scanner as possible.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CTextToken::Consume(PRUnichar, CScanner& aScanner) {
  static nsAutoString terminals("&<\r\n");
  nsresult result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE);
  return result;
};

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
}


/*
 *  Default constructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CCommentToken::CCommentToken(const nsString& aName) : CHTMLToken(aName) {
  mTypeID=eHTMLTag_comment;
}

/*
 *  Consume the identifier portion of the comment.
 *  Note that we've already eaten the "<!" portion.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CCommentToken::Consume(PRUnichar aChar, CScanner& aScanner) {

  nsresult  result=NS_OK;

  static nsAutoString terminals(">");

  aScanner.GetChar(aChar);
  mTextValue="<!";
  if(kMinus==aChar) {
    mTextValue+="-";
    result=aScanner.GetChar(aChar);
    if(NS_OK==result) {
      if(kMinus==aChar) {
           //in this case, we're reading a long-form comment <-- xxx -->
        mTextValue+="-";
        PRInt32 findpos=-1;
        while((findpos==kNotFound) && (NS_OK==result)) {
          result=aScanner.ReadUntil(mTextValue,terminals,PR_TRUE);
          findpos=mTextValue.RFind("-->");
        }
        return result;
      }
    }
  }

  if(NS_OK==result) {
     //if you're here, we're consuming a "short-form" comment
    mTextValue+=aChar;
    result=aScanner.ReadUntil(mTextValue,terminals,PR_TRUE);
  }
  return result;
}


/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char* CCommentToken::GetClassName(void){
  return "/**/";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CCommentToken::GetTokenType(void) {
  return eToken_comment;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
}


/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CNewlineToken::CNewlineToken(const nsString& aName) : CHTMLToken(aName) {
  mTypeID=eHTMLTag_newline;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CNewlineToken::GetClassName(void) {
  return "crlf";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CNewlineToken::GetTokenType(void) {
  return eToken_newline;
}

/**
 *  This method retrieves the value of this internal string.
 *
 *  @update gess 3/25/98
 *  @return nsString reference to internal string value
 */
nsString& CNewlineToken::GetStringValueXXX(void) {
  static nsAutoString theStr("\n");
  return theStr;
}

/*
 *  Consume as many cr/lf pairs as you can find.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CNewlineToken::Consume(PRUnichar aChar, CScanner& aScanner) {
  mTextValue=aChar;

    //we already read the \r or \n, let's see what's next!
  PRUnichar nextChar;
  nsresult result=aScanner.Peek(nextChar);

  if(NS_OK==result) {
    switch(aChar) {
      case kNewLine:
        if(kCR==nextChar) {
          result=aScanner.GetChar(nextChar);
          mTextValue+=nextChar;
        }
        break;
      case kCR:
        if(kNewLine==nextChar) {
          result=aScanner.GetChar(nextChar);
          mTextValue+=nextChar;
        }
        break;
      default:
        break;
    }
  }
  return result;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
}

/*
 *  string based constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CAttributeToken::CAttributeToken(const nsString& aName) : CHTMLToken(aName),
  mTextKey() {
  mLastAttribute=PR_FALSE;
}

/*
 *  construct initializing data to
 *  key value pair
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CAttributeToken::CAttributeToken(const nsString& aKey, const nsString& aName) : CHTMLToken(aName) {
  mTextKey = aKey;
  mLastAttribute=PR_FALSE;
}

/**
 *
 * @update	gess8/4/98
 * @param
 * @return
 */
void CAttributeToken::Reinitialize(PRInt32 aTag, const nsString& aString){
  CHTMLToken::Reinitialize(aTag,aString);
  mTextKey.Truncate();
  mLastAttribute=PR_FALSE;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CAttributeToken::GetClassName(void) {
  return "attr";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CAttributeToken::GetTokenType(void) {
  return eToken_attribute;
}

/*
 *  Dump contents of this token to givne output stream
 *
 *  @update  gess 3/25/98
 *  @param   out -- ostream to output content
 *  @return
 */
void CAttributeToken::DebugDumpToken(ostream& out) {
  char buffer[200];
  mTextKey.ToCString(buffer,sizeof(buffer)-1);
  out << "[" << GetClassName() << "] " << buffer << "=";
  mTextValue.ToCString(buffer,sizeof(buffer)-1);
  out << buffer << ": " << mTypeID << endl;
}


/*
 *  This general purpose method is used when you want to
 *  consume a known quoted string.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
PRInt32 ConsumeQuotedString(PRUnichar aChar,nsString& aString,CScanner& aScanner){
  static nsAutoString terminals1(">'");
  static nsAutoString terminals2(">\"");

  PRInt32 result=kNotFound;
  switch(aChar) {
    case kQuote:
      result=aScanner.ReadUntil(aString,terminals2,PR_TRUE);
      break;
    case kApostrophe:
      result=aScanner.ReadUntil(aString,terminals1,PR_TRUE);
      break;
    default:
      break;
  }
  PRUnichar ch=aString.Last();
  if(ch!=aChar)
    aString+=aChar;
  return result;
}

/*
 *  This general purpose method is used when you want to
 *  consume attributed text value.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
PRInt32 ConsumeAttributeValueText(PRUnichar,nsString& aString,CScanner& aScanner){

  PRInt32 result=kNotFound;
  static nsAutoString terminals(" \t\b\r\n>");
  result=aScanner.ReadUntil(aString,terminals,PR_FALSE);
  return result;
}


/*
 *  Consume the key and value portions of the attribute.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CAttributeToken::Consume(PRUnichar aChar, CScanner& aScanner) {

  aScanner.SkipWhitespace();             //skip leading whitespace
  static nsAutoString kAllButEqualOrGT("=>");
  nsresult result=aScanner.Peek(aChar);
  if(NS_OK==result) {
    if(kQuote==aChar) {               //if you're here, handle quoted key...
      result=aScanner.GetChar(aChar);        //skip the quote sign...
      if(NS_OK==result) {
        mTextKey=aChar;
        result=ConsumeQuotedString(aChar,mTextKey,aScanner);
      }
    }
    else if(kHashsign==aChar) {
      result=aScanner.GetChar(aChar);        //skip the hash sign...
      if(NS_OK==result) {
        mTextKey=aChar;
        result=aScanner.ReadWhile(mTextKey,gDigits,PR_TRUE);
      }
    }
    else {
        //If you're here, handle an unquoted key.
        //Don't forget to reduce entities inline!
      static nsAutoString terminals(" >=\t\b\r\n\"");
      result=aScanner.ReadUntil(mTextKey,terminals,PR_FALSE);
    }

      //now it's time to Consume the (optional) value...
    if(NS_OK == (result=aScanner.SkipWhitespace())) {
      //Skip ahead until you find an equal sign or a '>'...
//    if(NS_OK == (result=aScanner.SkipTo(kAllButEqualOrGT))) {
        if(NS_OK == (result=aScanner.Peek(aChar))) {
          if(kEqual==aChar){
            result=aScanner.GetChar(aChar);  //skip the equal sign...
            if(NS_OK==result) {
              result=aScanner.SkipWhitespace();     //now skip any intervening whitespace
              if(NS_OK==result) {
                result=aScanner.GetChar(aChar);  //and grab the next char.
                if(NS_OK==result) {
                  if((kQuote==aChar) || (kApostrophe==aChar)) {
                    mTextValue=aChar;
                    result=ConsumeQuotedString(aChar,mTextValue,aScanner);
                  }
                  else {
                    mTextValue=aChar;       //it's an alphanum attribute...
                    result=ConsumeAttributeValueText(aChar,mTextValue,aScanner);
                  }
                }//if
                if(NS_OK==result)
                  result=aScanner.SkipWhitespace();
              }//if
            }//if
          }//if
        }//if
//      }if
    }
    if(NS_OK==result) {
      result=aScanner.Peek(aChar);
      mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
    }
  }
  return result;
}

/*
 *  Dump contents of this token to givne output stream
 *
 *  @update  gess 3/25/98
 *  @param   out -- ostream to output content
 *  @return
 */
void CAttributeToken::DebugDumpSource(ostream& out) {
  char buffer[200];
  mTextKey.ToCString(buffer,sizeof(buffer)-1);
  out << " " << buffer;
  if(mTextValue.Length()){
    mTextValue.ToCString(buffer,sizeof(buffer)-1);
    out << "=" << buffer;
  }
  if(mLastAttribute)
    out<<">";
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
}


/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CWhitespaceToken::CWhitespaceToken(const nsString& aName) : CHTMLToken(aName) {
  mTypeID=eHTMLTag_whitespace;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CWhitespaceToken::GetClassName(void) {
  return "ws";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CWhitespaceToken::GetTokenType(void) {
  return eToken_whitespace;
}

/*
 *  This general purpose method is used when you want to
 *  consume an aribrary sequence of whitespace.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CWhitespaceToken::Consume(PRUnichar aChar, CScanner& aScanner) {

  mTextValue=aChar;

  nsresult result=aScanner.ReadWhile(mTextValue,gWhitespace,PR_FALSE);
  if(NS_OK==result) {
    mTextValue.StripChars("\r");
  }
  return result;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CEntityToken::CEntityToken(const nsString& aName) : CHTMLToken(aName) {
  mTypeID=eHTMLTag_entity;
#ifdef VERBOSE_DEBUG
  if(!VerifyEntityTable())  {
    cout<<"Entity table is invalid!" << endl;
  }
#endif
}


/*
 *  Consume the rest of the entity. We've already eaten the "&".
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CEntityToken::Consume(PRUnichar aChar, CScanner& aScanner) {
  if(aChar)
    mTextValue=aChar;
  nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
  return result;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CEntityToken::GetClassName(void) {
  return "&entity";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CEntityToken::GetTokenType(void) {
  return eToken_entity;
}

/*
 *  This general purpose method is used when you want to
 *  consume an entity &xxxx;. Keep in mind that entities
 *  are <i>not</i> reduced inline.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,CScanner& aScanner){

  PRInt32 result=aScanner.Peek(aChar);
  if(kNoError==result) {
    if(kLeftBrace==aChar) {
      //you're consuming a script entity...
      static nsAutoString terminals("}>");
      result=aScanner.ReadUntil(aString,terminals,PR_FALSE);
      if(kNoError==result) {
        result=aScanner.Peek(aChar);
        if(kNoError==result) {
          if(kRightBrace==aChar) {
            aString+=kRightBrace;   //append rightbrace, and...
            result=aScanner.GetChar(aChar);//yank the closing right-brace
          }
        }
      }
    } //if
    else {
      result=aScanner.ReadWhile(aString,gIdentChars,PR_FALSE);
      if(kNoError==result) {
        result=aScanner.Peek(aChar);
        if(kNoError==result) {
          if (kSemicolon == aChar) {
            // consume semicolon that stopped the scan
            result=aScanner.GetChar(aChar);
          }
        }
      }//if
    } //else
  } //if
  return result;
}


/*
 *  This method converts this entity into its underlying
 *  unicode equivalent.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
  PRInt32 value=0;
  if(nsString::IsDigit(mTextValue[0])) {
    PRInt32 err=0;
    value=mTextValue.ToInteger(&err);
    if(0==err)
      aString.Append(PRUnichar(value));
  }
  else {
    char cbuf[30];
    mTextValue.ToCString(cbuf, sizeof(cbuf));
    value = NS_EntityToUnicode(cbuf);
    if(-1 != value) {
      aString = PRUnichar(value);
    }
  }
  return value;
}

/*
 *  Dump contents of this token to givne output stream
 *
 *  @update  gess 3/25/98
 *  @param   out -- ostream to output content
 *  @return
 */
void CEntityToken::DebugDumpSource(ostream& out) {
  char* cp=mTextValue.ToNewCString();
  out << "&" << *cp;
  delete cp;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CScriptToken::GetClassName(void) {
  return "script";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CScriptToken::GetTokenType(void) {
  return eToken_script;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CStyleToken::GetClassName(void) {
  return "style";
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CStyleToken::GetTokenType(void) {
  return eToken_style;
}


/*
 *  string based constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CSkippedContentToken::CSkippedContentToken(const nsString& aName) : CAttributeToken(aName) {
  mTextKey = "$skipped-content";/* XXX need a better answer! */
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
const char*  CSkippedContentToken::GetClassName(void) {
  return "skipped";
}

/*
 *  Retrieve the token type as an int.
 *  @update  gess 3/25/98
 *  @return
 */
PRInt32 CSkippedContentToken::GetTokenType(void) {
  return eToken_skippedcontent;
}

/*
 *  Consume content until you find an end sequence that matches
 *  this objects current mTextValue. Note that this is complicated
 *  by the fact that you can be parsing content that itself
 *  contains quoted content of the same type (like <SCRIPT>).
 *  That means we have to look for quote-pairs, and ignore the
 *  content inside them.
 *
 *  @update  gess 7/25/98
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CSkippedContentToken::Consume(PRUnichar,CScanner& aScanner) {
  PRBool      done=PR_FALSE;
  PRInt32     result=kNoError;
  nsString    temp;

  while((!done) && (kNoError==result)) {
    static nsAutoString terminals(">");
    result=aScanner.ReadUntil(temp,terminals,PR_TRUE);
    done=PRBool(kNotFound!=temp.RFind(mTextValue,PR_TRUE));
  }
  int len=temp.Length();
  temp.Truncate(len-mTextValue.Length());
  mTextKey=temp;
  return result;
}


/**
 *
 * @update	gess4/25/98
 * @param
 * @return
 */
const char* GetTagName(PRInt32 aTag) {
  const char* result = NS_EnumToTag((nsHTMLTag) aTag);
  if (0 == result) {
    if(aTag>=eHTMLTag_userdefined)
      result = gUserdefined;
    else result= gEmpty;
  }
  return result;
}