Mozilla/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 sw=2 et tw=78: */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Blake Kaplan <mrbkap@gmail.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsIAtom.h"
#include "nsHTMLTokens.h"
#include "prtypes.h"
#include "nsDebug.h"
#include "nsHTMLTags.h"
#include "nsHTMLEntities.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsScanner.h"


static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f',
                                         'i', 'n', 'e', 'd', 0};

static const PRUnichar kAttributeTerminalChars[] = {
  PRUnichar('&'), PRUnichar('\b'), PRUnichar('\t'),
  PRUnichar('\n'), PRUnichar('\r'), PRUnichar(' '),
  PRUnichar('>'),
  PRUnichar(0)
};

static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue);
/*
 *  @param   aScanner -- controller of underlying input source
 *  @param   aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
 *  @return  error result
 *
 */
static
nsresult ConsumeEntity(nsScannerSharedSubstring& aString,
                       nsScanner& aScanner,
                       PRInt32 aFlag)
{
  nsresult result=NS_OK;

  PRUnichar ch;
  result=aScanner.Peek(ch, 1);

  if (NS_SUCCEEDED(result)) {
    PRUnichar amp=0;
    PRInt32 theNCRValue=0;
    nsAutoString entity;

    if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
      if (NS_SUCCEEDED(result)) {
        theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
        PRUnichar theTermChar=entity.Last();
        // If an entity value is greater than 255 then:
        // Nav 4.x does not treat it as an entity,
        // IE treats it as an entity if terminated with a semicolon.
        // Resembling IE!!

        nsSubstring &writable = aString.writable();
        if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
          // Looks like we're not dealing with an entity
          writable.Append(kAmpersand);
          writable.Append(entity);
        }
        else {
          // A valid entity so reduce it.
          writable.Append(PRUnichar(theNCRValue));
        }
      }
    }
    else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
      if (NS_SUCCEEDED(result)) {
        nsSubstring &writable = aString.writable();
        if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
          // Looked like an entity but it's not
          aScanner.GetChar(amp);
          writable.Append(amp);
          result = NS_OK; // just being safe..
        }
        else {
          PRInt32 err;
          theNCRValue=entity.ToInteger(&err,kAutoDetect);
          AppendNCR(writable, theNCRValue);
        }
      }
    }
    else {
      // What we thought as entity is not really an entity...
      aScanner.GetChar(amp);
      aString.writable().Append(amp);
    }//if
  }

  return result;
}

/*
 *  This general purpose method is used when you want to
 *  consume attributed text value.
 *  Note: It also reduces entities.
 *
 *  @param   aNewlineCount -- the newline count to increment when hitting newlines
 *  @param   aScanner -- controller of underlying input source
 *  @param   aTerminalChars -- characters that stop consuming attribute.
 *  @param   aAllowNewlines -- whether to allow newlines in the value.
 *                             XXX it would be nice to roll this info into
 *                             aTerminalChars somehow....
 *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
 *  @return  error result
 */
static
nsresult ConsumeUntil(nsScannerSharedSubstring& aString,
                      PRInt32& aNewlineCount,
                      nsScanner& aScanner,
                      const nsReadEndCondition& aEndCondition,
                      PRBool aAllowNewlines,
                      PRInt32 aFlag)
{
  nsresult result = NS_OK;
  PRBool   done = PR_FALSE;

  do {
    result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE);
    if(NS_SUCCEEDED(result)) {
      PRUnichar ch;
      aScanner.Peek(ch);
      if(ch == kAmpersand) {
        result = ConsumeEntity(aString,aScanner,aFlag);
      }
      else if(ch == kCR && aAllowNewlines) {
        aScanner.GetChar(ch);
        result = aScanner.Peek(ch);
        if (NS_SUCCEEDED(result)) {
          nsSubstring &writable = aString.writable();
          if(ch == kNewLine) {
            writable.AppendLiteral("\r\n");
            aScanner.GetChar(ch);
          }
          else {
            writable.Append(PRUnichar('\r'));
          }
          ++aNewlineCount;
        }
      }
      else if(ch == kNewLine && aAllowNewlines) {
        aScanner.GetChar(ch);
        aString.writable().Append(PRUnichar('\n'));
        ++aNewlineCount;
      }
      else {
        done = PR_TRUE;
      }
    }
  } while (NS_SUCCEEDED(result) && !done);

  return result;
}

/**************************************************************
  And now for the token classes...
 **************************************************************/

/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
}


CHTMLToken::~CHTMLToken() {

}

/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
  mEmpty=PR_FALSE;
  mContainerInfo=eFormUnknown;
#ifdef DEBUG
  mAttributed = PR_FALSE;
#endif
}

CStartToken::CStartToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
  mEmpty=PR_FALSE;
  mContainerInfo=eFormUnknown;
  mTextValue.Assign(aName);
#ifdef DEBUG
  mAttributed = PR_FALSE;
#endif
}

CStartToken::CStartToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
  mEmpty=PR_FALSE;
  mContainerInfo=eFormUnknown;
  mTextValue.Assign(aName);
#ifdef DEBUG
  mAttributed = PR_FALSE;
#endif
}

/*
 *  This method returns the typeid (the tag type) for this token.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CStartToken::GetTypeID(){
  if(eHTMLTag_unknown==mTypeID) {
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }
  return mTypeID;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CStartToken::GetTokenType(void) {
  return eToken_start;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
void CStartToken::SetEmpty(PRBool aValue) {
  mEmpty=aValue;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRBool CStartToken::IsEmpty(void) {
  return mEmpty;
}


/*
 *  Consume the identifier portion of the start tag
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
 *  @return  error result
 */
nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {

  //if you're here, we've already Consumed the < char, and are
   //ready to Consume the rest of the open tag identifier.
   //Stop consuming as soon as you see a space or a '>'.
   //NOTE: We don't Consume the tag attributes here, nor do we eat the ">"

  nsresult result=NS_OK;
  nsScannerSharedSubstring tagIdent;

  if (aFlag & NS_IPARSER_FLAG_HTML) {
    result = aScanner.ReadTagIdentifier(tagIdent);
    mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
    // Save the original tag string if this is user-defined or if we
    // are viewing source
    if(eHTMLTag_userdefined==mTypeID || (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      mTextValue = tagIdent.str();
    }
  }
  else {
    result = aScanner.ReadTagIdentifier(tagIdent);
    mTextValue = tagIdent.str();
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }

  if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // Take what we can get.
    result = NS_OK;
  }

  return result;
}


const nsSubstring& CStartToken::GetStringValue()
{
  if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
    if(!mTextValue.Length()) {
      mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
    }
  }
  return mTextValue;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param   anOutputString will recieve the result
 *  @return  nada
 */
void CStartToken::GetSource(nsString& anOutputString){
  anOutputString.Truncate();
  AppendSourceTo(anOutputString);
}

/*
 *
 *
 *  @update  harishd 03/23/00
 *  @param   result appended to the output string.
 *  @return  nada
 */
void CStartToken::AppendSourceTo(nsAString& anOutputString){
  anOutputString.Append(PRUnichar('<'));
  /*
   * Watch out for Bug 15204
   */
  if(!mTextValue.IsEmpty())
    anOutputString.Append(mTextValue);
  else
    anOutputString.Append(GetTagName(mTypeID));

  anOutputString.Append(PRUnichar('>'));
}

/*
 *  constructor from tag id
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}

CEndToken::CEndToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
  mTextValue.Assign(aName);
}

CEndToken::CEndToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
  mTextValue.Assign(aName);
}

/*
 *  Consume the identifier portion of the end tag
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
 *  @return  error result
 */
nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
{
  nsresult result = NS_OK;
  nsScannerSharedSubstring tagIdent;

  if (aFlag & NS_IPARSER_FLAG_HTML) {
    result = aScanner.ReadTagIdentifier(tagIdent);

    mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
    // Save the original tag string if this is user-defined or if we
    // are viewing source
    if(eHTMLTag_userdefined==mTypeID ||
       (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      mTextValue = tagIdent.str();
    }
  }
  else {
    result = aScanner.ReadTagIdentifier(tagIdent);
    mTextValue = tagIdent.str();
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }

  if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // Take what we can get.
    result = NS_OK;
  }

  return result;
}


/*
 *  Asks the token to determine the <i>HTMLTag type</i> of
 *  the token. This turns around and looks up the tag name
 *  in the tag dictionary.
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return  eHTMLTag id of this endtag
 */
PRInt32 CEndToken::GetTypeID(){
  if(eHTMLTag_unknown==mTypeID) {
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
    switch(mTypeID) {
      case eHTMLTag_dir:
      case eHTMLTag_menu:
        mTypeID=eHTMLTag_ul;
        break;
      default:
        break;
    }
  }
  return mTypeID;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CEndToken::GetTokenType(void) {
  return eToken_end;
}

const nsSubstring& CEndToken::GetStringValue()
{
  if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
    if(!mTextValue.Length()) {
      mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
    }
  }
  return mTextValue;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param   anOutputString will recieve the result
 *  @return  nada
 */
void CEndToken::GetSource(nsString& anOutputString){
  anOutputString.Truncate();
  AppendSourceTo(anOutputString);
}

/*
 *
 *
 *  @update  harishd 03/23/00
 *  @param   result appended to the output string.
 *  @return  nada
 */
void CEndToken::AppendSourceTo(nsAString& anOutputString){
  anOutputString.AppendLiteral("</");
  if(!mTextValue.IsEmpty())
    anOutputString.Append(mTextValue);
  else
    anOutputString.Append(GetTagName(mTypeID));

  anOutputString.Append(PRUnichar('>'));
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
}


/*
 *  string based constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CTextToken::CTextToken(const nsAString& aName) : CHTMLToken(eHTMLTag_text) {
  mTextValue.Rebind(aName);
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CTextToken::GetTokenType(void) {
  return eToken_text;
}

PRInt32 CTextToken::GetTextLength(void) {
  return mTextValue.Length();
}

/*
 *  Consume as much clear text from scanner as possible.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
  static const PRUnichar theTerminalsChars[] =
    { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'),
      PRUnichar(0) };
  static const nsReadEndCondition theEndCondition(theTerminalsChars);
  nsresult  result=NS_OK;
  PRBool    done=PR_FALSE;
  nsScannerIterator origin, start, end;

  // Start scanning after the first character, because we know it to
  // be part of this text token (we wouldn't have come here if it weren't)
  aScanner.CurrentPosition(origin);
  start = origin;
  aScanner.EndReading(end);

  NS_ASSERTION(start != end, "Calling CTextToken::Consume when already at the "
                             "end of a document is a bad idea.");

  aScanner.SetPosition(++start);

  while((NS_OK==result) && (!done)) {
    result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
    if(NS_OK==result) {
      result=aScanner.Peek(aChar);

      if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) {
        result=aScanner.GetChar(aChar); //strip off the char
        PRUnichar theNextChar;
        result=aScanner.Peek(theNextChar);    //then see what's next.
        switch(aChar) {
          case kCR:
            // result=aScanner.GetChar(aChar);
            if(kLF==theNextChar) {
              // If the "\r" is followed by a "\n", don't replace it and
              // let it be ignored by the layout system
              end.advance(2);
              result=aScanner.GetChar(theNextChar);
            }
            else {
              // If it standalone, replace the "\r" with a "\n" so that
              // it will be considered by the layout system
              aScanner.ReplaceCharacter(end, kLF);
              ++end;
            }
            ++mNewlineCount;
            break;
          case kLF:
            ++end;
            ++mNewlineCount;
            break;
        } //switch
      }
      else done=PR_TRUE;
    }
  }

  aScanner.BindSubstring(mTextValue, origin, end);

  return result;
}

/*
 *  Consume as much clear text from scanner as possible.
 *  The scanner is left on the < of the perceived end tag.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aConservativeConsume -- controls our handling of content with no
 *                                   terminating string.
 *  @param   aIgnoreComments -- whether or not we should take comments into
 *                              account in looking for the end tag.
 *  @param   aScanner -- controller of underlying input source
 *  @param   aEndTagname -- the terminal tag name.
 *  @param   aFlag -- dtd modes and such.
 *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
 *  @return  error result
 */
nsresult CTextToken::ConsumeCharacterData(PRBool aConservativeConsume,
                                          PRBool aIgnoreComments,
                                          nsScanner& aScanner,
                                          const nsAString& aEndTagName,
                                          PRInt32 aFlag,
                                          PRBool& aFlushTokens) {
  nsresult      result=NS_OK;
  nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
  PRBool        done=PR_FALSE;
  PRBool        theLastIteration=PR_FALSE;

  aScanner.CurrentPosition(theStartOffset);
  theCurrOffset = theStartOffset;
  aScanner.EndReading(endPos);
  theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;

  // ALGORITHM: *** The performance is based on correctness of the document ***
  // 1. Look for a '<' character.  This could be
  //    a) Start of a comment (<!--), b) Start of the terminal string, or c) a start of a tag.
  //    We are interested in a) and b). c) is ignored because in CDATA we don't care for tags.
  //    NOTE: Technically speaking in CDATA we should ignore the comments too!! But for compatibility
  //          we don't.
  // 2. Having the offset, for '<', search for the terminal string from there on and record its offset.
  // 3. From the same '<' offset also search for start of a comment '<!--'. If found search for
  //    end comment '-->' between the terminal string and '<!--'.  If you did not find the end
  //    comment, then we have a malformed document, i.e., this section has a prematured terminal string
  //    Ex. <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But record terminal string's
  //    offset if this is the first premature terminal string, and update the current offset to the terminal
  //    string (prematured) offset and goto step 1.
  // 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
  // 5. If the end of the document is reached and if we still don't have the condition in step 4. then
  //    assume that the prematured terminal string is the actual terminal string and goto step 1. This
  //    will be our last iteration. If there is no premature terminal string and we're being
  //    conservative in our consumption (aConservativeConsume), then don't consume anything
  //    from the scanner. Otherwise, we consume all the way until the end (for <xmp>).

  NS_NAMED_LITERAL_STRING(ltslash, "</");
  const nsString theTerminalString = ltslash + aEndTagName;

  PRUint32 termStrLen=theTerminalString.Length();
  while((result == NS_OK) && !done) {
    PRBool found = PR_FALSE;
    nsScannerIterator gtOffset,ltOffset = theCurrOffset;
    while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
           ((PRUint32)ltOffset.size_forward() >= termStrLen ||
            Distance(ltOffset, endPos) >= termStrLen)) {
      // Make a copy of the (presumed) end tag and
      // do a case-insensitive comparison

      nsScannerIterator start(ltOffset), end(ltOffset);
      end.advance(termStrLen);

      if (CaseInsensitiveFindInReadable(theTerminalString,start,end) &&
          (end == endPos || (*end == '>'  || *end == ' '  ||
                             *end == '\t' || *end == '\n' ||
                             *end == '\r' || *end == '\b'))) {
        gtOffset = end;
        // Note that aIgnoreComments is only not set for <script>. We don't
        // want to execute scripts that aren't in the form of: <script\s.*>
        if ((end == endPos && aIgnoreComments) ||
            FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
          found = PR_TRUE;
          theTermStrPos = start;
        }
        break;
      }
      ltOffset.advance(1);
    }

    if (found && theTermStrPos != endPos) {
      if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
         !theLastIteration && !aIgnoreComments) {
        nsScannerIterator endComment(ltOffset);
        endComment.advance(5);

        if ((theStartCommentPos == endPos) &&
            FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
          theStartCommentPos = theCurrOffset;
        }

        if (theStartCommentPos != endPos) {
          // Search for --> between <!-- and </TERMINALSTRING>.
          theCurrOffset = theStartCommentPos;
          nsScannerIterator terminal(theTermStrPos);
          if (!RFindInReadable(NS_LITERAL_STRING("-->"),
                               theCurrOffset, terminal)) {
            // If you're here it means that we have a bogus terminal string.
            // Even though it is bogus, the position of the terminal string
            // could be helpful in case we hit the rock bottom.
            if (theAltTermStrPos == endPos) {
              // But we only want to remember the first bogus terminal string.
              theAltTermStrPos = theTermStrPos;
            }

            // We did not find '-->' so keep searching for terminal string.
            theCurrOffset = theTermStrPos;
            theCurrOffset.advance(termStrLen);
            continue;
          }
        }
      }

      aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
      aScanner.SetPosition(ltOffset);

      // We found </SCRIPT> or </STYLE>...permit flushing -> Ref: Bug 22485
      aFlushTokens=PR_TRUE;
      done = PR_TRUE;
    }
    else {
      // We end up here if:
      // a) when the buffer runs out ot data.
      // b) when the terminal string is not found.
      if(!aScanner.IsIncremental()) {
        if(theAltTermStrPos != endPos && aConservativeConsume) {
          // If you're here it means..we hit the rock bottom and therefore switch to plan B.
          theCurrOffset = theAltTermStrPos;
          theLastIteration = PR_TRUE;
        }
        else if (!aConservativeConsume) {
          done = PR_TRUE; // Do this to fix Bug. 35456
          result = kFakeEndTag;
          aScanner.BindSubstring(mTextValue, theStartOffset, endPos);
          aScanner.SetPosition(endPos);
        }
        else {
          done = PR_TRUE;
          result = kFakeEndTag;
          // We need to bind our value to a non-empty string.
          aScanner.BindSubstring(mTextValue, theStartOffset, theStartOffset);
        }
      }
      else {
        result=kEOF;
      }
    }
  }

  return result;
}

/*
 *  Consume as much clear text from scanner as possible. Reducing entities.
 *  The scanner is left on the < of the perceived end tag.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aConservativeConsume -- controls our handling of content with no
 *                                   terminating string.
 *  @param   aScanner -- controller of underlying input source
 *  @param   aEndTagname -- the terminal tag name.
 *  @param   aFlag -- dtd modes and such.
 *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
 *  @return  error result
 */
nsresult CTextToken::ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
                                                PRBool aConservativeConsume,
                                                nsScanner& aScanner,
                                                const nsAString& aEndTagName,
                                                PRInt32 aFlag,
                                                PRBool& aFound)
{
  // This function is fairly straightforward except if there is no terminating
  // string. If there is, we simply loop through all of the entities, reducing
  // them as necessary and skipping over non-terminal strings starting with <.
  // If there is *no* terminal string, then we examine aConservativeConsume.
  // If we want to be conservative, we backtrack to the first place in the
  // document that looked like the end of PCDATA (i.e., the first tag). This
  // is for compatibility and so we don't regress bug 42945. If we are not
  // conservative, then we consume everything, all the way up to the end of
  // the document.

  static const PRUnichar terminalChars[] = {
    PRUnichar('&'), PRUnichar('<'),
    PRUnichar(0)
  };
  static const nsReadEndCondition theEndCondition(terminalChars);

  nsScannerIterator currPos, endPos, altEndPos;
  PRUint32 truncPos = 0;
  aScanner.CurrentPosition(currPos);
  aScanner.EndReading(endPos);

  altEndPos = endPos;

  nsScannerSharedSubstring theContent;
  PRUnichar ch = 0;

  NS_NAMED_LITERAL_STRING(commentStart, "<!--");
  NS_NAMED_LITERAL_STRING(ltslash, "</");
  const nsString theTerminalString = ltslash + aEndTagName;
  PRUint32 termStrLen = theTerminalString.Length();
  PRUint32 commentStartLen = commentStart.Length();

  nsresult result = NS_OK;

  // Note that if we're already at the end of the document, the ConsumeUntil
  // will fail, and we'll do the right thing.
  do {
    result = ConsumeUntil(theContent, mNewlineCount, aScanner,
                          theEndCondition, PR_TRUE, aFlag);

    if (aDiscardFirstNewline &&
        (NS_SUCCEEDED(result) || !aScanner.IsIncremental()) &&
        !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      // Check if the very first character is a newline, and if so discard it.
      // Note that we don't want to discard it in view source!
      // Also note that this has to happen here (as opposed to before the
      // ConsumeUntil) because we have to expand any entities.
      // XXX It would be nice to be able to do this without calling
      // writable()!
      const nsSubstring &firstChunk = theContent.str();
      if (!firstChunk.IsEmpty()) {
        PRUint32 where = 0;
        PRUnichar newline = firstChunk.First();

        if (newline == kCR || newline == kNewLine) {
          ++where;

          if (firstChunk.Length() > 1) {
            if (newline == kCR && firstChunk.CharAt(1) == kNewLine) {
              // Handle \r\n = 1 newline.
              ++where;
            }
            // Note: \n\r = 2 newlines.
          }
        }

        if (where != 0) {
          theContent.writable() = Substring(firstChunk, where);
        }
      }
    }
    aDiscardFirstNewline = PR_FALSE;

    if (NS_FAILED(result)) {
      if (kEOF == result && !aScanner.IsIncremental()) {
        aFound = PR_TRUE; // this is as good as it gets.
        result = kFakeEndTag;

        if (aConservativeConsume && altEndPos != endPos) {
          // We ran out of room looking for a </title>. Go back to the first
          // place that looked like a tag and use that as our stopping point.
          theContent.writable().Truncate(truncPos);
          aScanner.SetPosition(altEndPos, PR_FALSE, PR_TRUE);
        }
        // else we take everything we consumed.
        mTextValue.Rebind(theContent.str());
      }
      else {
        aFound = PR_FALSE;
      }

      return result;
    }

    aScanner.CurrentPosition(currPos);
    aScanner.GetChar(ch); // this character must be '&' or '<'

    if (ch == kLessThan && altEndPos == endPos) {
      // Keep this position in case we need it for later.
      altEndPos = currPos;
      truncPos = theContent.str().Length();
    }

    if (Distance(currPos, endPos) >= termStrLen) {
      nsScannerIterator start(currPos), end(currPos);
      end.advance(termStrLen);

      if (CaseInsensitiveFindInReadable(theTerminalString,start,end)) {
        if (end != endPos && (*end == '>'  || *end == ' '  ||
                              *end == '\t' || *end == '\n' ||
                              *end == '\r' || *end == '\b')) {
          aFound = PR_TRUE;
          mTextValue.Rebind(theContent.str());

          // Note: This SetPosition() is actually going backwards from the
          // scanner's mCurrentPosition (so we pass aReverse == PR_TRUE). This
          // is because we call GetChar() above after we get the current
          // position.
          aScanner.SetPosition(currPos, PR_FALSE, PR_TRUE);
          break;
        }
      }
    }
    // IE only consumes <!-- --> as comments in PCDATA.
    if (Distance(currPos, endPos) >= commentStartLen) {
      nsScannerIterator start(currPos), end(currPos);
      end.advance(commentStartLen);

      if (CaseInsensitiveFindInReadable(commentStart,start,end)) {
        CCommentToken consumer; // stack allocated.

        // CCommentToken expects us to be on the '-'
        aScanner.SetPosition(currPos.advance(2));

        // In quirks mode we consume too many things as comments, so pretend
        // that we're not by modifying aFlag.
        result = consumer.Consume(*currPos, aScanner,
	  (aFlag & ~NS_IPARSER_FLAG_QUIRKS_MODE) | NS_IPARSER_FLAG_STRICT_MODE);
        if (kEOF == result) {
          return kEOF; // this can only happen if we're really out of space.
        }
        else if (kNotAComment == result) {
          // Fall through and consume this as text.
          aScanner.CurrentPosition(currPos);
          aScanner.SetPosition(currPos.advance(1));
        }
        else {
          consumer.AppendSourceTo(theContent.writable());
          mNewlineCount += consumer.GetNewlineCount();
          continue;
        }
      }
    }

    result = kEOF;
    // We did not find the terminal string yet so
    // include the character that stopped consumption.
    theContent.writable().Append(ch);
  } while (currPos != endPos);

  return result;
}

void CTextToken::CopyTo(nsAString& aStr)
{
  nsScannerIterator start, end;
  mTextValue.BeginReading(start);
  mTextValue.EndReading(end);
  CopyUnicodeTo(start, end, aStr);
}

const nsSubstring& CTextToken::GetStringValue(void)
{
  return mTextValue.AsString();
}

void CTextToken::Bind(nsScanner* aScanner, nsScannerIterator& aStart, nsScannerIterator& aEnd)
{
  aScanner->BindSubstring(mTextValue, aStart, aEnd);
}

void CTextToken::Bind(const nsAString& aStr)
{
  mTextValue.Rebind(aStr);
}

/*
 *  default constructor
 *
 *  @update  vidur 11/12/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CCDATASectionToken::CCDATASectionToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}


/*
 *  string based constructor
 *
 *  @update  vidur 11/12/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CCDATASectionToken::CCDATASectionToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
  mTextValue.Assign(aName);
}

/*
 *
 *  @update  vidur 11/12/98
 *  @param
 *  @return
 */
PRInt32 CCDATASectionToken::GetTokenType(void) {
  return eToken_cdatasection;
}

/*
 *  Consume as much marked test from scanner as possible.
 *
 *  @update  rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>".
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
  static const PRUnichar theTerminalsChars[] =
  { PRUnichar('\r'), PRUnichar('\n'), PRUnichar(']'), PRUnichar(0) };
  static const nsReadEndCondition theEndCondition(theTerminalsChars);
  nsresult  result=NS_OK;
  PRBool    done=PR_FALSE;

  while((NS_OK==result) && (!done)) {
    result=aScanner.ReadUntil(mTextValue,theEndCondition,PR_FALSE);
    if(NS_OK==result) {
      result=aScanner.Peek(aChar);
      if((kCR==aChar) && (NS_OK==result)) {
        result=aScanner.GetChar(aChar); //strip off the \r
        result=aScanner.Peek(aChar);    //then see what's next.
        if(NS_OK==result) {
          switch(aChar) {
            case kCR:
              result=aScanner.GetChar(aChar); //strip off the \r
              mTextValue.AppendLiteral("\n\n");
              mNewlineCount += 2;
              break;
            case kNewLine:
               //which means we saw \r\n, which becomes \n
              result=aScanner.GetChar(aChar); //strip off the \n
                  //now fall through on purpose...
            default:
              mTextValue.AppendLiteral("\n");
              mNewlineCount++;
              break;
          } //switch
        } //if
      }
      else if (kNewLine == aChar) {
        result=aScanner.GetChar(aChar);
        mTextValue.Append(aChar);
        ++mNewlineCount;
      }
      else if (kRightSquareBracket == aChar) {
        PRBool canClose = PR_FALSE;
        result=aScanner.GetChar(aChar); //strip off the ]
        mTextValue.Append(aChar);
        result=aScanner.Peek(aChar);    //then see what's next.
        if((NS_OK==result) && (kRightSquareBracket==aChar)) {
          result=aScanner.GetChar(aChar); //strip off the second ]
          mTextValue.Append(aChar);
          canClose = PR_TRUE;
        }
        // The goal here is to not lose data from the page when encountering
        // markup like: <![endif]-->.  This means that in normal parsing, we
        // allow ']' to end the marked section and just drop everything between
        // it an the '>'.  In view-source mode, we cannot drop things on the
        // floor like that.  In fact, to make view-source of XML with script in
        // CDATA sections at all bearable, we need to somewhat enforce the ']]>'
        // terminator for marked sections.  So make the tokenization somewhat
        // different when in view-source _and_ dealing with a CDATA section.
        PRBool inCDATA = (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) &&
          StringBeginsWith(mTextValue, NS_LITERAL_STRING("[CDATA["));
        if (inCDATA) {
          result = aScanner.Peek(aChar);
        } else {
          nsAutoString dummy; // skip any bad data
          result=aScanner.ReadUntil(dummy,kGreaterThan,PR_FALSE);
        }
        if (NS_OK==result &&
            (!inCDATA || (canClose && kGreaterThan == aChar))) {
          result=aScanner.GetChar(aChar); //strip off the >
          done=PR_TRUE;
        }
      }
      else done=PR_TRUE;
    }
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // We ran out of space looking for the end of this CDATA section.
    // In order to not completely lose the entire section, treat everything
    // until the end of the document as part of the CDATA section and let
    // the DTD handle it.
    mInError = PR_TRUE;
    result = NS_OK;
  }

  return result;
}

const nsSubstring& CCDATASectionToken::GetStringValue(void)
{
  return mTextValue;
}


/*
 *  default constructor
 *
 *  @param   aName -- string to init token name with
 *  @return
 */
CMarkupDeclToken::CMarkupDeclToken() : CHTMLToken(eHTMLTag_markupDecl) {
}


/*
 *  string based constructor
 *
 *  @param   aName -- string to init token name with
 *  @return
 */
CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName) : CHTMLToken(eHTMLTag_markupDecl) {
  mTextValue.Rebind(aName);
}


/*
 *
 *  @param
 *  @return
 */
PRInt32 CMarkupDeclToken::GetTokenType(void) {
  return eToken_markupDecl;
}

/*
 *  Consume as much declaration from scanner as possible.
 *  Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or
 *  NOTATION, which can span multiple lines and ends in >.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
  static const PRUnichar theTerminalsChars[] =
    { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'),
      PRUnichar('>'),
      PRUnichar(0) };
  static const nsReadEndCondition theEndCondition(theTerminalsChars);
  nsresult  result=NS_OK;
  PRBool    done=PR_FALSE;
  PRUnichar quote=0;

  nsScannerIterator origin, start, end;
  aScanner.CurrentPosition(origin);
  start = origin;

  while((NS_OK==result) && (!done)) {
    aScanner.SetPosition(start);
    result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
    if(NS_OK==result) {
      result=aScanner.Peek(aChar);

      if(NS_OK==result) {
        PRUnichar theNextChar=0;
        if ((kCR==aChar) || (kNewLine==aChar)) {
          result=aScanner.GetChar(aChar); //strip off the char
          result=aScanner.Peek(theNextChar);    //then see what's next.
        }
        switch(aChar) {
          case kCR:
            // result=aScanner.GetChar(aChar);
            if(kLF==theNextChar) {
              // If the "\r" is followed by a "\n", don't replace it and
              // let it be ignored by the layout system
              end.advance(2);
              result=aScanner.GetChar(theNextChar);
            }
            else {
              // If it standalone, replace the "\r" with a "\n" so that
              // it will be considered by the layout system
              aScanner.ReplaceCharacter(end, kLF);
              ++end;
            }
            ++mNewlineCount;
            break;
          case kLF:
            ++end;
            ++mNewlineCount;
            break;
          case '\'':
          case '"':
            ++end;
            if (quote) {
              if (quote == aChar) {
                quote = 0;
              }
            } else {
              quote = aChar;
            }
            break;
          case kGreaterThan:
            if (quote) {
              ++end;
            } else {
              start = end;
              ++start;  // Note that start is wrong after this, we just avoid temp var
              aScanner.SetPosition(start); // Skip the >
              done=PR_TRUE;
            }
            break;
          default:
            NS_ABORT_IF_FALSE(0,"should not happen, switch is missing cases?");
            break;
        } //switch
        start = end;
      }
      else done=PR_TRUE;
    } // if read until !ok
  } // while

  aScanner.BindSubstring(mTextValue, origin, end);

  if (kEOF == result) {
    mInError = PR_TRUE;
    if (!aScanner.IsIncremental()) {
      // Hide this EOF.
      result = NS_OK;
    }
  }

  return result;
}

const nsSubstring& CMarkupDeclToken::GetStringValue(void)
{
  return mTextValue.AsString();
}


/*
 *  Default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
}


/*
 *  Copy constructor
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
CCommentToken::CCommentToken(const nsAString& aName) : CHTMLToken(eHTMLTag_comment) {
  mComment.Rebind(aName);
}

void CCommentToken::AppendSourceTo(nsAString& anOutputString){
  AppendUnicodeTo(mCommentDecl, anOutputString);
}

static PRBool IsCommentEnd(
  const nsScannerIterator& aCurrent,
  const nsScannerIterator& aEnd,
  nsScannerIterator& aGt)
{
  nsScannerIterator current = aCurrent;
  PRInt32 dashes = 0;

  while ((current != aEnd) && (dashes != 2)) {
    if (*current == kGreaterThan) {
      aGt = current;
      return PR_TRUE;
    }
    if (*current == PRUnichar('-')) {
      ++dashes;
    } else {
      dashes = 0;
    }
    ++current;
  }

  return PR_FALSE;
}

nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
{
  // <!--[... -- ... -- ...]*-->
  /*********************************************************
    NOTE: This algorithm does a fine job of handling comments
          when they're formatted per spec, but if they're not
          we don't handle them well.
   *********************************************************/
  nsScannerIterator end, current, gt, lt;
  aScanner.EndReading(end);
  aScanner.CurrentPosition(current);

  nsScannerIterator beginData = end;

  lt = current;
  lt.advance(-2); // <!

  // Regular comment must start with <!--
  if (current != end && *current == kMinus &&
      ++current != end && *current == kMinus &&
      ++current != end) {
    nsScannerIterator currentEnd = end;
    PRBool balancedComment = PR_FALSE;
    static NS_NAMED_LITERAL_STRING(dashes,"--");
    beginData = current;

    while (FindInReadable(dashes, current, currentEnd)) {
      current.advance(2);

      balancedComment = !balancedComment; // We need to match '--' with '--'

      if (balancedComment && IsCommentEnd(current, end, gt)) {
        // done
        current.advance(-2);
        // Note: it's ok if beginData == current, (we'll copy an empty string)
        // and we need to bind mComment anyway.
        aScanner.BindSubstring(mComment, beginData, current);
        aScanner.BindSubstring(mCommentDecl, lt, ++gt);
        aScanner.SetPosition(gt);
        return NS_OK;
      } else {
        // Continue after the last '--'
        currentEnd = end;
      }
    }
  }

  // If beginData == end, we did not find opening '--'
  if (beginData == end) {
    // This might have been empty comment: <!>
    // Or it could have been something completely bogus like: <!This is foobar>
    // Handle both cases below
    aScanner.CurrentPosition(current);
    beginData = current;
    if (FindCharInReadable('>', current, end)) {
      aScanner.BindSubstring(mComment, beginData, current);
      aScanner.BindSubstring(mCommentDecl, lt, ++current);
      aScanner.SetPosition(current);
      return NS_OK;
    }
  }

  if (aScanner.IsIncremental()) {
    // We got here because we saw the beginning of a comment,
    // but not yet the end, and we are still loading the page. In that
    // case the return value here will cause us to unwind,
    // wait for more content, and try again.
    // XXX For performance reasons we should cache where we were, and
    //     continue from there for next call
    return kEOF;
  }

  // There was no terminating string, parse this comment as text.
  aScanner.SetPosition(lt, PR_FALSE, PR_TRUE);
  return kNotAComment;
}

nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner)
{
  // <![-[-]] ... [[-]-|--!]>
  /*********************************************************
    NOTE: This algorithm does a fine job of handling comments
          commonly used, but it doesn't really consume them
          per spec (But then, neither does IE or Nav).
   *********************************************************/
  nsScannerIterator end, current;
  aScanner.EndReading(end);
  aScanner.CurrentPosition(current);
  nsScannerIterator beginData = current,
                    beginLastMinus = end,
                    bestAltCommentEnd = end,
                    lt = current;
  lt.advance(-2); // <!

  // When we get here, we have always already consumed <!
  // Skip over possible leading minuses
  if (current != end && *current == kMinus) {
    beginLastMinus = current;
    ++current;
    ++beginData;
    if (current != end && *current == kMinus) { // <!--
      beginLastMinus = current;
      ++current;
      ++beginData;
      // Long form comment

      nsScannerIterator currentEnd = end, gt = end;

      // Find the end of the comment
      while (FindCharInReadable(kGreaterThan, current, currentEnd)) {
        gt = current;
        if (bestAltCommentEnd == end) {
          bestAltCommentEnd = gt;
        }
        --current;
        PRBool goodComment = PR_FALSE;
        if (current != beginLastMinus && *current == kMinus) { // ->
          --current;
          if (current != beginLastMinus && *current == kMinus) { // -->
            goodComment = PR_TRUE;
            --current;
          }
        } else if (current != beginLastMinus && *current == '!') {
          --current;
          if (current != beginLastMinus && *current == kMinus) {
            --current;
            if (current != beginLastMinus && *current == kMinus) { // --!>
              --current;
              goodComment = PR_TRUE;
            }
          }
        } else if (current == beginLastMinus) {
          goodComment = PR_TRUE;
        }

        if (goodComment) {
          // done
          aScanner.BindSubstring(mComment, beginData, ++current);
          aScanner.BindSubstring(mCommentDecl, lt, ++gt);
          aScanner.SetPosition(gt);
          return NS_OK;
        } else {
          // try again starting after the last '>'
          current = ++gt;
          currentEnd = end;
        }
      } //while

      if (aScanner.IsIncremental()) {
        // We got here because we saw the beginning of a comment,
        // but not yet the end, and we are still loading the page. In that
        // case the return value here will cause us to unwind,
        // wait for more content, and try again.
        // XXX For performance reasons we should cache where we were, and
        //     continue from there for next call
        return kEOF;
      }

      // If you're here, then we're in a special state.
      // The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->".
      // In this case, the first thing we try is to see if we found an alternate endcomment delimiter ">".
      // If so, rewind just pass that, and use everything up to that point as your comment.
      // If not, the document has no end comment and should be treated as one big comment.
      gt = bestAltCommentEnd;
      aScanner.BindSubstring(mComment, beginData, gt);
      if (gt != end) {
        ++gt;
      }
      aScanner.BindSubstring(mCommentDecl, lt, gt);
      aScanner.SetPosition(gt);
      return NS_OK;
    }
  }

  // This could be short form of comment
  // Find the end of the comment
  current = beginData;
  if (FindCharInReadable(kGreaterThan, current, end)) {
    nsScannerIterator gt = current;
    if (current != beginData) {
      --current;
      if (current != beginData && *current == kMinus) { // ->
        --current;
        if (current != beginData && *current == kMinus) { // -->
          --current;
        }
      } else if (current != beginData && *current == '!') { // !>
        --current;
        if (current != beginData && *current == kMinus) { // -!>
          --current;
          if (current != beginData && *current == kMinus) { // --!>
            --current;
          }
        }
      }
    }

    if (current != gt) {
      aScanner.BindSubstring(mComment, beginData, ++current);
    }
    else {
      // Bind mComment to an empty string (note that if current == gt,
      // then current == beginData). We reach this for <!>
      aScanner.BindSubstring(mComment, beginData, current);
    }
    aScanner.BindSubstring(mCommentDecl, lt, ++gt);
    aScanner.SetPosition(gt);
    return NS_OK;
  }

  if (!aScanner.IsIncremental()) {
    // This isn't a comment at all, go back to the < and consume as text.
    aScanner.SetPosition(lt, PR_FALSE, PR_TRUE);
    return kNotAComment;
  }

  // Wait for more data...
  return kEOF;
}

/*
 *  Consume the identifier portion of the comment.
 *  Note that we've already eaten the "<!" portion.
 *
 *  @update  gess 16June2000
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
  nsresult result=PR_TRUE;

  if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) {
    //Enabling strict comment parsing for Bug 53011 and  2749 contradicts!!!!
    result = ConsumeStrictComment(aScanner);
  }
  else {
    result = ConsumeQuirksComment(aScanner);
  }

  if (NS_SUCCEEDED(result)) {
    mNewlineCount = !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) ? mCommentDecl.CountChar(kNewLine) : -1;
  }

  return result;
}

const nsSubstring& CCommentToken::GetStringValue(void)
{
  return mComment.AsString();
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CCommentToken::GetTokenType(void) {
  return eToken_comment;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CNewlineToken::GetTokenType(void) {
  return eToken_newline;
}


static nsScannerSubstring* gNewlineStr;
void CNewlineToken::AllocNewline()
{
  gNewlineStr = new nsScannerSubstring(NS_LITERAL_STRING("\n"));
}

void CNewlineToken::FreeNewline()
{
  if (gNewlineStr) {
    delete gNewlineStr;
    gNewlineStr = nsnull;
  }
}

/**
 *  This method retrieves the value of this internal string.
 *
 *  @update gess 3/25/98
 *  @return nsString reference to internal string value
 */
const nsSubstring& CNewlineToken::GetStringValue(void) {
  return gNewlineStr->AsString();
}

/*
 *  Consume as many cr/lf pairs as you can find.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {

/*******************************************************************

  Here's what the HTML spec says about newlines:

  "A line break is defined to be a carriage return (&#x000D;),
   a line feed (&#x000A;), or a carriage return/line feed pair.
   All line breaks constitute white space."

 *******************************************************************/

  PRUnichar theChar;
  nsresult result=aScanner.Peek(theChar);

  if(NS_OK==result) {
    switch(aChar) {
      case kNewLine:
        if(kCR==theChar) {
          result=aScanner.GetChar(theChar);
        }
        break;
      case kCR:
          //convert CRLF into just CR
        if(kNewLine==theChar) {
          result=aScanner.GetChar(theChar);
        }
        break;
      default:
        break;
    }
  }

  if (result == kEOF && !aScanner.IsIncremental()) {
    // Make sure we don't lose information about this trailing newline.
    result = NS_OK;
  }

  mNewlineCount = 1;
  return result;
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
  mHasEqualWithoutValue=PR_FALSE;
#ifdef DEBUG
  mLastAttribute = PR_FALSE;
#endif
}

/*
 *  string based constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CAttributeToken::CAttributeToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
  mTextValue.writable().Assign(aName);
  mHasEqualWithoutValue=PR_FALSE;
#ifdef DEBUG
  mLastAttribute = PR_FALSE;
#endif
}

/*
 *  construct initializing data to
 *  key value pair
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
  mTextValue.writable().Assign(aName);
  mTextKey.Rebind(aKey);
  mHasEqualWithoutValue=PR_FALSE;
#ifdef DEBUG
  mLastAttribute = PR_FALSE;
#endif
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CAttributeToken::GetTokenType(void) {
  return eToken_attribute;
}

/*
 *  Removes non-alpha-non-digit characters from the end of a KEY
 *
 *  @update harishd 07/15/99
 *  @param
 *  @return
 */
void CAttributeToken::SanitizeKey() {
  PRInt32   length=mTextKey.Length();
  if(length > 0) {
    nsScannerIterator iter, begin, end;
    mTextKey.BeginReading(begin);
    mTextKey.EndReading(end);
    iter = end;

    // Look for the first legal character starting from
    // the end of the string
    do {
      --iter;
    } while (!nsCRT::IsAsciiAlpha(*iter) &&
             !nsCRT::IsAsciiDigit(*iter) &&
             (iter != begin));

    // If there were any illegal characters, just copy out the
    // legal part
    if (iter != --end) {
      nsAutoString buf;
      CopyUnicodeTo(begin, ++iter, buf);
      mTextKey.Rebind(buf);
    }
  }

  return;
}

const nsSubstring& CAttributeToken::GetStringValue(void)
{
  return mTextValue.str();
}

/*
 *
 *
 *  @update  rickg  6June2000
 *  @param   anOutputString will recieve the result
 *  @return  nada
 */
void CAttributeToken::GetSource(nsString& anOutputString){
  anOutputString.Truncate();
  AppendSourceTo(anOutputString);
}

/*
 *
 *
 *  @update  rickg  6June2000
 *  @param   result appended to the output string.
 *  @return  nada
 */
void CAttributeToken::AppendSourceTo(nsAString& anOutputString){
  AppendUnicodeTo(mTextKey, anOutputString);
  if(mTextValue.str().Length() || mHasEqualWithoutValue)
    anOutputString.AppendLiteral("=");
  anOutputString.Append(mTextValue.str());
  // anOutputString.AppendLiteral(";");
}

/*
 *  This general purpose method is used when you want to
 *  consume a known quoted string.
 *
 *  @param   aScanner -- controller of underlying input source
 *  @param   aTerminalChars -- characters that stop consuming attribute.
 *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
 *  @return  error result
 */
static
nsresult ConsumeQuotedString(PRUnichar aChar,
                             nsScannerSharedSubstring& aString,
                             PRInt32& aNewlineCount,
                             nsScanner& aScanner,
                             PRInt32 aFlag)
{
  NS_ASSERTION(aChar==kQuote || aChar==kApostrophe,"char is neither quote nor apostrophe");
  // hold onto this in case this is an unterminated string literal
  PRUint32 origLen = aString.str().Length();

  static const PRUnichar theTerminalCharsQuote[] = {
    PRUnichar(kQuote), PRUnichar('&'), PRUnichar(kCR),
    PRUnichar(kNewLine), PRUnichar(0) };
  static const PRUnichar theTerminalCharsApostrophe[] = {
    PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(kCR),
    PRUnichar(kNewLine), PRUnichar(0) };
  static const nsReadEndCondition
    theTerminateConditionQuote(theTerminalCharsQuote);
  static const nsReadEndCondition
    theTerminateConditionApostrophe(theTerminalCharsApostrophe);

  // Assume Quote to init to something
  const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote;
  if (aChar==kApostrophe)
    terminateCondition = &theTerminateConditionApostrophe;

  nsresult result=NS_OK;
  nsScannerIterator theOffset;
  aScanner.CurrentPosition(theOffset);

  result=ConsumeUntil(aString,aNewlineCount,aScanner,
                      *terminateCondition,PR_TRUE,aFlag);

  if(NS_SUCCEEDED(result)) {
    result = aScanner.GetChar(aChar); // aChar should be " or '
  }

  // Ref: Bug 35806
  // A back up measure when disaster strikes...
  // Ex <table> <tr d="><td>hello</td></tr></table>
  if(!aString.str().IsEmpty() && aString.str().Last()!=aChar &&
     !aScanner.IsIncremental() && result==kEOF) {
    static const nsReadEndCondition
      theAttributeTerminator(kAttributeTerminalChars);
    aString.writable().Truncate(origLen);
    aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
    result=ConsumeUntil(aString,aNewlineCount,aScanner,
                        theAttributeTerminator,PR_FALSE,aFlag);
    if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      // Remember that this string literal was unterminated.
      result = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
    }
  }
  return result;
}

/*
 * This method is meant to be used by view-source to consume invalid attributes.
 * For the purposes of this method, an invalid attribute is an attribute that
 * starts with either ' or ". We consume all ' or " and the following whitespace.
 *
 * @param aScanner -- the scanner we're reading our data from.
 * @param aChar -- the character we're skipping
 * @param aCurrent -- the current position that we're looking at.
 * @param aNewlineCount -- a count of the newlines we've consumed.
 * @return error result.
 */
static
nsresult ConsumeInvalidAttribute(nsScanner& aScanner,
                                 PRUnichar aChar,
                                 nsScannerIterator& aCurrent,
                                 PRInt32& aNewlineCount) {
  NS_ASSERTION(aChar=='\'' || aChar=='"', "aChar must be a quote or apostrophe");
  nsScannerIterator end, wsbeg;
  aScanner.EndReading(end);

  while (aCurrent!=end && *aCurrent==aChar) {
    ++aCurrent;
  }

  aScanner.SetPosition(aCurrent);
  return aScanner.ReadWhitespace(wsbeg,aCurrent,aNewlineCount);
}

/*
 *  Consume the key and value portions of the attribute.
 *
 *  @update  rickg 03.23.2000
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
 *  @return  error result
 */
nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {

  nsresult result;

  nsScannerIterator wsstart, wsend;

  if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
    if (kEOF == result && wsstart != wsend) {
      // Do this here so if this is the final token in the document, we don't
      // lose the whitespace.
      aScanner.BindSubstring(mTextKey, wsstart, wsend);
    }
  }
  else {
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

  if (NS_OK==result) {
    static const PRUnichar theTerminalsChars[] =
    { PRUnichar(' '), PRUnichar('"'),
      PRUnichar('='), PRUnichar('\n'),
      PRUnichar('\r'), PRUnichar('\t'),
      PRUnichar('>'), PRUnichar('<'),
      PRUnichar('\b'), PRUnichar('\''),
      PRUnichar(0) };
    static const nsReadEndCondition theEndCondition(theTerminalsChars);

    nsScannerIterator start, end;
    result=aScanner.ReadUntil(start,end,theEndCondition,PR_FALSE);

    if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      aScanner.BindSubstring(mTextKey, start, end);
    }
    else if (kEOF == result && wsstart != end) {
      //Capture all of the text (from the beginning of the whitespace to the
      //end of the document).
      aScanner.BindSubstring(mTextKey, wsstart, end);
    }

    //now it's time to Consume the (optional) value...
    if (NS_OK==result) {
      if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
        result = aScanner.ReadWhitespace(start, wsend, mNewlineCount);
        aScanner.BindSubstring(mTextKey, wsstart, wsend);
      }
      else {
        result = aScanner.SkipWhitespace(mNewlineCount);
      }

      if (NS_OK==result) {
        result=aScanner.Peek(aChar);       //Skip ahead until you find an equal sign or a '>'...
        if (NS_OK==result) {
          if (kEqual==aChar){
            result=aScanner.GetChar(aChar);  //skip the equal sign...
            if (NS_OK==result) {
              if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
                PRBool haveCR;
                result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
                                                 haveCR);
              }
              else {
                result = aScanner.SkipWhitespace(mNewlineCount);
              }

              if (NS_OK==result) {
                result=aScanner.Peek(aChar);  //and grab the next char.
                if (NS_OK==result) {
                  if ((kQuote==aChar) || (kApostrophe==aChar)) {
                    aScanner.GetChar(aChar);
                    if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
                      mTextValue.writable().Append(aChar);
                    }

                    result=ConsumeQuotedString(aChar,mTextValue,mNewlineCount,
                                               aScanner,aFlag);
                    if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
                      mTextValue.writable().Append(aChar);
                    } else if (result == NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL) {
                      result = NS_OK;
                      mInError = PR_TRUE;
                    }
                    // According to spec. we ( who? ) should ignore linefeeds. But look,
                    // even the carriage return was getting stripped ( wonder why! ) -
                    // Ref. to bug 15204.  Okay, so the spec. told us to ignore linefeeds,
                    // bug then what about bug 47535 ? Should we preserve everything then?
                    // Well, let's make it so! Commenting out the next two lines..
                    /*if(!aRetain)
                      mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
                    */
                  }
                  else if (kGreaterThan==aChar){
                    mHasEqualWithoutValue=PR_TRUE;
                    mInError=PR_TRUE;
                  }
                  else {
                    static const nsReadEndCondition
                      theAttributeTerminator(kAttributeTerminalChars);
                    result=ConsumeUntil(mTextValue,
                                        mNewlineCount,
                                        aScanner,
                                        theAttributeTerminator,
                                        PR_FALSE,
                                        aFlag);
                  }
                }//if
                if (NS_OK==result) {
                  if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
                    PRBool haveCR;
                    result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
                                                     haveCR);
                  }
                  else {
                    result = aScanner.SkipWhitespace(mNewlineCount);
                  }
                }
              }//if
              else {
                //We saw an equal sign but ran out of room looking for a value.
                mHasEqualWithoutValue=PR_TRUE;
                mInError=PR_TRUE;
              }
            }//if
          }//if
          else {
            //This is where we have to handle fairly busted content.
            //If you're here, it means we saw an attribute name, but couldn't find
            //the following equal sign.  <tag NAME=....

            //Doing this right in all cases is <i>REALLY</i> ugly.
            //My best guess is to grab the next non-ws char. We know it's not '=',
            //so let's see what it is. If it's a '"', then assume we're reading
            //from the middle of the value. Try stripping the quote and continuing...
            if (kQuote==aChar || kApostrophe==aChar){
              mInError=PR_TRUE;

              if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
                result=aScanner.SkipOver(aChar); //strip quote.
                if (NS_SUCCEEDED(result)) {
                  result=aScanner.SkipWhitespace(mNewlineCount);
                }
              } else {
                //We want to collect whitespace here so that following
                //attributes can have the right line number (and for
                //parity with the non-view-source code above).
                result=ConsumeInvalidAttribute(aScanner,aChar,wsend,mNewlineCount);

                aScanner.BindSubstring(mTextKey, wsstart, wsend);
                aScanner.SetPosition(wsend);
              }
            }
          }
        }//if
      } //if
    }//if (consume optional value)

    if (NS_OK==result) {
      if (mTextValue.str().Length() == 0 && mTextKey.Length() == 0 &&
          mNewlineCount == 0) {
        //This attribute contains no useful information for us, so there is no
        //use in keeping it around. Attributes that are otherwise empty, but
        //have newlines in them are passed on the the DTD so it can get line
        //numbering right.
        return NS_ERROR_HTMLPARSER_BADATTRIBUTE;
      }

#ifdef DEBUG
      result = aScanner.Peek(aChar);
      mLastAttribute = (kGreaterThan == aChar || kEOF == result);
#endif
    }
  }//if

  if (kEOF == result && !aScanner.IsIncremental()) {
    // This is our run-of-the mill "don't lose content at the end of a
    // document" with a slight twist: we don't want to bother returning an
    // empty attribute key, even if this is the end of the document.
    if (mTextKey.Length() == 0) {
      result = NS_ERROR_HTMLPARSER_BADATTRIBUTE;
    }
    else {
      result = NS_OK;
    }
  }

  return result;
}

void CAttributeToken::SetKey(const nsAString& aKey)
{
  mTextKey.Rebind(aKey);
}

void CAttributeToken::BindKey(nsScanner* aScanner,
                              nsScannerIterator& aStart,
                              nsScannerIterator& aEnd)
{
  aScanner->BindSubstring(mTextKey, aStart, aEnd);
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
}


/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CWhitespaceToken::CWhitespaceToken(const nsAString& aName) : CHTMLToken(eHTMLTag_whitespace) {
  mTextValue.writable().Assign(aName);
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CWhitespaceToken::GetTokenType(void) {
  return eToken_whitespace;
}

/*
 *  This general purpose method is used when you want to
 *  consume an aribrary sequence of whitespace.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
  // If possible, we'd like to just be a dependent substring starting at
  // |aChar|.  The scanner has already been advanced, so we need to
  // back it up to facilitate this.

  nsScannerIterator start;
  aScanner.CurrentPosition(start);
  aScanner.SetPosition(--start, PR_FALSE, PR_TRUE);

  PRBool haveCR;

  nsresult result = aScanner.ReadWhitespace(mTextValue, mNewlineCount, haveCR);

  if (result == kEOF && !aScanner.IsIncremental()) {
    // Oops, we ran off the end, make sure we don't lose the trailing
    // whitespace!
    result = NS_OK;
  }

  if (NS_OK == result && haveCR) {
    mTextValue.writable().StripChar(kCR);
  }
  return result;
}

const nsSubstring& CWhitespaceToken::GetStringValue(void)
{
  return mTextValue.str();
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string to init token name with
 *  @return
 */
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
}

/*
 *  default constructor
 *
 *  @update  gess 3/25/98
 *  @param   aName -- string value to init token name with
 *  @return
 */
CEntityToken::CEntityToken(const nsAString& aName) : CHTMLToken(eHTMLTag_entity) {
  mTextValue.Assign(aName);
#ifdef VERBOSE_DEBUG
  if(!VerifyEntityTable())  {
    cout<<"Entity table is invalid!" << endl;
  }
#endif
}


/*
 *  Consume the rest of the entity. We've already eaten the "&".
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
  nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
  return result;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param
 *  @return
 */
PRInt32 CEntityToken::GetTokenType(void) {
  return eToken_entity;
}

/*
 *  This general purpose method is used when you want to
 *  consume an entity &xxxx;. Keep in mind that entities
 *  are <i>not</i> reduced inline.
 *
 *  @update  gess 3/25/98
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CEntityToken::ConsumeEntity(PRUnichar aChar,
                            nsString& aString,
                            nsScanner& aScanner) {
  nsresult result=NS_OK;
  if(kLeftBrace==aChar) {
    //you're consuming a script entity...
    aScanner.GetChar(aChar); // Consume &

    PRInt32 rightBraceCount = 0;
    PRInt32 leftBraceCount  = 0;

    do {
      result=aScanner.GetChar(aChar);

      if (NS_FAILED(result)) {
        return result;
      }

      aString.Append(aChar);
      if(aChar==kRightBrace)
        ++rightBraceCount;
      else if(aChar==kLeftBrace)
        ++leftBraceCount;
    } while(leftBraceCount!=rightBraceCount);
  } //if
  else {
    PRUnichar theChar=0;
    if (kHashsign==aChar) {
      result = aScanner.Peek(theChar,2);

      if (NS_FAILED(result)) {
        if (kEOF == result && !aScanner.IsIncremental()) {
          // If this is the last buffer then we are certainly
          // not dealing with an entity. That's, there are
          // no more characters after &#. Bug 188278.
          return NS_HTMLTOKENS_NOT_AN_ENTITY;
        }
        return result;
      }

      if (nsCRT::IsAsciiDigit(theChar)) {
        aScanner.GetChar(aChar); // Consume &
        aScanner.GetChar(aChar); // Consume #
        aString.Assign(aChar);
        result=aScanner.ReadNumber(aString,10);
      }
      else if (theChar == 'x' || theChar == 'X') {
        aScanner.GetChar(aChar);   // Consume &
        aScanner.GetChar(aChar);   // Consume #
        aScanner.GetChar(theChar); // Consume x
        aString.Assign(aChar);
        aString.Append(theChar);
        result=aScanner.ReadNumber(aString,16);
      }
      else {
        return NS_HTMLTOKENS_NOT_AN_ENTITY;
      }
    }
    else {
      result = aScanner.Peek(theChar,1);

      if (NS_FAILED(result)) {
        return result;
      }

      if(nsCRT::IsAsciiAlpha(theChar) ||
        theChar == '_' ||
        theChar == ':') {
        aScanner.GetChar(aChar); // Consume &
        result=aScanner.ReadEntityIdentifier(aString);
      }
      else {
        return NS_HTMLTOKENS_NOT_AN_ENTITY;
      }
    }
  }

  if (NS_FAILED(result)) {
    return result;
  }

  result=aScanner.Peek(aChar);

  if (NS_FAILED(result)) {
    return result;
  }

  if (aChar == kSemicolon) {
    // consume semicolon that stopped the scan
    aString.Append(aChar);
    result=aScanner.GetChar(aChar);
  }

  return result;
}

#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1

#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/**
 * Map some illegal but commonly used numeric entities into their
 * appropriate unicode value.
 */
#define NOT_USED 0xfffd

static const PRUint16 PA_HackTable[] = {
	0x20ac,  /* EURO SIGN */
	NOT_USED,
	0x201a,  /* SINGLE LOW-9 QUOTATION MARK */
	0x0192,  /* LATIN SMALL LETTER F WITH HOOK */
	0x201e,  /* DOUBLE LOW-9 QUOTATION MARK */
	0x2026,  /* HORIZONTAL ELLIPSIS */
	0x2020,  /* DAGGER */
	0x2021,  /* DOUBLE DAGGER */
	0x02c6,  /* MODIFIER LETTER CIRCUMFLEX ACCENT */
	0x2030,  /* PER MILLE SIGN */
	0x0160,  /* LATIN CAPITAL LETTER S WITH CARON */
	0x2039,  /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
	0x0152,  /* LATIN CAPITAL LIGATURE OE */
	NOT_USED,
	0x017D,  /* LATIN CAPITAL LETTER Z WITH CARON */
	NOT_USED,
	NOT_USED,
	0x2018,  /* LEFT SINGLE QUOTATION MARK */
	0x2019,  /* RIGHT SINGLE QUOTATION MARK */
	0x201c,  /* LEFT DOUBLE QUOTATION MARK */
	0x201d,  /* RIGHT DOUBLE QUOTATION MARK */
	0x2022,  /* BULLET */
	0x2013,  /* EN DASH */
	0x2014,  /* EM DASH */
	0x02dc,  /* SMALL TILDE */
	0x2122,  /* TRADE MARK SIGN */
	0x0161,  /* LATIN SMALL LETTER S WITH CARON */
	0x203a,  /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
	0x0153,  /* LATIN SMALL LIGATURE OE */
	NOT_USED,
	0x017E,  /* LATIN SMALL LETTER Z WITH CARON */
	0x0178   /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
};
#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */

static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue)
{
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
  /* for some illegal, but popular usage */
  if ((aNCRValue >= 0x0080) && (aNCRValue <= 0x009f)) {
    aNCRValue = PA_HackTable[aNCRValue - 0x0080];
  }
#endif

  if (IS_IN_BMP(aNCRValue))
    aString.Append(PRUnichar(aNCRValue));
  else {
    aString.Append(PRUnichar(H_SURROGATE(aNCRValue)));
    aString.Append(PRUnichar(L_SURROGATE(aNCRValue)));
  }
}

/*
 *  This method converts this entity into its underlying
 *  unicode equivalent.
 *
 *  @update  gess 3/25/98
 *  @param   aString will hold the resulting string value
 *  @return  numeric (unichar) value
 */
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
  PRInt32 value=0;

  if(mTextValue.Length()>1) {
    PRUnichar theChar0=mTextValue.CharAt(0);

    if(kHashsign==theChar0) {
      PRInt32 err=0;

      value=mTextValue.ToInteger(&err,kAutoDetect);

      if(0==err) {
        AppendNCR(aString, value);
      }
    }
    else{
      value = nsHTMLEntities::EntityToUnicode(mTextValue);
      if(-1<value) {
        //we found a named entity...
        aString.Assign(PRUnichar(value));
      }
    }//else
  }//if

  return value;
}


const nsSubstring& CEntityToken::GetStringValue(void)
{
  return mTextValue;
}

/*
 *
 *
 *  @update  gess 3/25/98
 *  @param   anOutputString will recieve the result
 *  @return  nada
 */
void CEntityToken::GetSource(nsString& anOutputString){
  anOutputString.AppendLiteral("&");
  anOutputString+=mTextValue;
  //anOutputString+=";";
}

/*
 *
 *
 *  @update  harishd 03/23/00
 *  @param   result appended to the output string.
 *  @return  nada
 */
void CEntityToken::AppendSourceTo(nsAString& anOutputString){
  anOutputString.AppendLiteral("&");
  anOutputString+=mTextValue;
  //anOutputString+=";";
}

/**
 *
 * @update	gess4/25/98
 * @param
 * @return
 */
const PRUnichar* GetTagName(PRInt32 aTag)
{
  const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);

  if (result) {
    return result;
  }

  if(aTag >= eHTMLTag_userdefined)
    return sUserdefined;

  return 0;
}


/**
 *
 *
 *  @update  gess 9/23/98
 *  @param
 *  @return
 */
CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_instruction) {
}

/**
 *
 *
 *  @update  gess 9/23/98
 *  @param
 *  @return
 */
CInstructionToken::CInstructionToken(const nsAString& aString) : CHTMLToken(eHTMLTag_unknown) {
  mTextValue.Assign(aString);
}

/**
 *
 *
 *  @update  gess 9/23/98
 *  @param
 *  @return
 */
nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aFlag){
  mTextValue.AssignLiteral("<?");
  nsresult result=NS_OK;
  PRBool done=PR_FALSE;

  while (NS_OK==result && !done) {
    //Note, this call does *not* consume the >.
    result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE);
    if (NS_SUCCEEDED(result)) {
      //In HTML, PIs end with a '>', in XML, they end with a '?>'. Cover both
      //cases here.
      if (!(aFlag & NS_IPARSER_FLAG_XML) || kQuestionMark==mTextValue.Last()) {
        //This really is the end of the PI.
        done=PR_TRUE;
      }
      //Need to append this character no matter what.
      aScanner.GetChar(aChar);
      mTextValue.Append(aChar);
    }
  }

  if (kEOF==result && !aScanner.IsIncremental()) {
    //Hide the EOF result because there is no more text coming.
    mInError=PR_TRUE;
    result=NS_OK;
  }

  return result;
}

/**
 *
 *
 *  @update  gess 9/23/98
 *  @param
 *  @return
 */
PRInt32 CInstructionToken::GetTokenType(void){
  return eToken_instruction;
}

const nsSubstring& CInstructionToken::GetStringValue(void)
{
  return mTextValue;
}

// Doctype decl token

CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
  : CHTMLToken(aTag) {
}

CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag)
  : CHTMLToken(aTag), mTextValue(aString) {
}

/**
 *  This method consumes a doctype element.
 *  Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up.
 *
 *  @update  gess 9/23/98
 *  @param
 *  @return
 */
nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {

  static const PRUnichar terminalChars[] =
  { PRUnichar('>'), PRUnichar('<'),
    PRUnichar(0)
  };
  static const nsReadEndCondition theEndCondition(terminalChars);

  nsScannerIterator start, end;

  aScanner.CurrentPosition(start);
  aScanner.EndReading(end);

  nsresult result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);

  if (NS_SUCCEEDED(result)) {
    PRUnichar ch;
    aScanner.Peek(ch);
    if (ch == kGreaterThan) {
      // Include '>' but not '<' since '<'
      // could belong to another tag.
      aScanner.GetChar(ch);
      end.advance(1);
    } else {
      NS_ASSERTION(kLessThan == ch,
                   "Make sure this doctype decl. is really in error.");
      mInError = PR_TRUE;
    }
  }
  else if (!aScanner.IsIncremental()) {
    // We have reached the document end but haven't
    // found either a '<' or a '>'. Therefore use
    // whatever we have.
    mInError = PR_TRUE;
    result = NS_OK;
  }

  if (NS_SUCCEEDED(result)) {
    start.advance(-2); // Make sure to consume <!
    CopyUnicodeTo(start,end,mTextValue);
  }

  return result;
}

PRInt32 CDoctypeDeclToken::GetTokenType(void) {
  return eToken_doctypeDecl;
}

const nsSubstring& CDoctypeDeclToken::GetStringValue(void)
{
  return mTextValue;
}

void CDoctypeDeclToken::SetStringValue(const nsAString& aStr)
{
  mTextValue.Assign(aStr);
}