Mozilla/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 sw=2 et tw=78: */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Blake Kaplan <mrbkap@gmail.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsIAtom.h"
#include "nsHTMLTokens.h"
#include "prtypes.h"
#include "nsDebug.h"
#include "nsHTMLTags.h"
#include "nsHTMLEntities.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsScanner.h"


static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f',
                                         'i', 'n', 'e', 'd', 0};

static const PRUnichar kAttributeTerminalChars[] = {
  PRUnichar('&'), PRUnichar('\b'), PRUnichar('\t'),
  PRUnichar('\n'), PRUnichar('\r'), PRUnichar(' '),
  PRUnichar('>'),
  PRUnichar(0)
};

static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue);
/**
 * Consumes an entity from aScanner and expands it into aString.
 *
 * @param   aString The target string to append the entity to.
 * @param   aScanner Controller of underlying input source
 * @param   aIECompatible Controls whether we respect entities with values >
 *                        255 and no terminating semicolon.
 * @param   aFlag If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
 * @return  error result
 */
static nsresult
ConsumeEntity(nsScannerSharedSubstring& aString,
              nsScanner& aScanner,
              PRBool aIECompatible,
              PRInt32 aFlag)
{
  nsresult result = NS_OK;

  PRUnichar ch;
  result = aScanner.Peek(ch, 1);

  if (NS_SUCCEEDED(result)) {
    PRUnichar amp = 0;
    PRInt32 theNCRValue = 0;
    nsAutoString entity;

    if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      result = CEntityToken::ConsumeEntity(ch, entity, aScanner);
      if (NS_SUCCEEDED(result)) {
        theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
        PRUnichar theTermChar = entity.Last();
        // If an entity value is greater than 255 then:
        // Nav 4.x does not treat it as an entity,
        // IE treats it as an entity if terminated with a semicolon.
        // Resembling IE!!

        nsSubstring &writable = aString.writable();
        if (theNCRValue < 0 ||
            (aIECompatible && theNCRValue > 255 && theTermChar != ';')) {
          // Looks like we're not dealing with an entity
          writable.Append(kAmpersand);
          writable.Append(entity);
        } else {
          // A valid entity so reduce it.
          writable.Append(PRUnichar(theNCRValue));
        }
      }
    } else if (ch == kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      result = CEntityToken::ConsumeEntity(ch, entity, aScanner);
      if (NS_SUCCEEDED(result)) {
        nsSubstring &writable = aString.writable();
        if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
          // Looked like an entity but it's not
          aScanner.GetChar(amp);
          writable.Append(amp);
          result = NS_OK;
        } else {
          PRInt32 err;
          theNCRValue = entity.ToInteger(&err, kAutoDetect);
          AppendNCR(writable, theNCRValue);
        }
      }
    } else {
      // What we thought as entity is not really an entity...
      aScanner.GetChar(amp);
      aString.writable().Append(amp);
    }
  }

  return result;
}

/*
 *  This general purpose method is used when you want to
 *  consume attributed text value.
 *  Note: It also reduces entities.
 *
 *  @param   aNewlineCount -- the newline count to increment when hitting newlines
 *  @param   aScanner -- controller of underlying input source
 *  @param   aTerminalChars -- characters that stop consuming attribute.
 *  @param   aAllowNewlines -- whether to allow newlines in the value.
 *                             XXX it would be nice to roll this info into
 *                             aTerminalChars somehow....
 *  @param   aIECompatEntities IE treats entities with values > 255 as
 *                             entities only if they're terminated with a
 *                             semicolon. This is true to follow that behavior
 *                             and false to treat all values as entities.
 *  @param   aFlag - contains information such as |dtd mode|view mode|doctype|etc...
 *  @return  error result
 */
static nsresult
ConsumeUntil(nsScannerSharedSubstring& aString,
             PRInt32& aNewlineCount,
             nsScanner& aScanner,
             const nsReadEndCondition& aEndCondition,
             PRBool aAllowNewlines,
             PRBool aIECompatEntities,
             PRInt32 aFlag)
{
  nsresult result = NS_OK;
  PRBool   done = PR_FALSE;

  do {
    result = aScanner.ReadUntil(aString, aEndCondition, PR_FALSE);
    if (NS_SUCCEEDED(result)) {
      PRUnichar ch;
      aScanner.Peek(ch);
      if (ch == kAmpersand) {
        result = ConsumeEntity(aString, aScanner, aIECompatEntities, aFlag);
      } else if (ch == kCR && aAllowNewlines) {
        aScanner.GetChar(ch);
        result = aScanner.Peek(ch);
        if (NS_SUCCEEDED(result)) {
          nsSubstring &writable = aString.writable();
          if (ch == kNewLine) {
            writable.AppendLiteral("\r\n");
            aScanner.GetChar(ch);
          } else {
            writable.Append(PRUnichar('\r'));
          }
          ++aNewlineCount;
        }
      } else if (ch == kNewLine && aAllowNewlines) {
        aScanner.GetChar(ch);
        aString.writable().Append(PRUnichar('\n'));
        ++aNewlineCount;
      } else {
        done = PR_TRUE;
      }
    }
  } while (NS_SUCCEEDED(result) && !done);

  return result;
}

/**************************************************************
  And now for the token classes...
 **************************************************************/

/**
 * Constructor from tag id
 */
CHTMLToken::CHTMLToken(eHTMLTags aTag)
  : CToken(aTag)
{
}


CHTMLToken::~CHTMLToken()
{
}

/*
 * Constructor from tag id
 */
CStartToken::CStartToken(eHTMLTags aTag)
  : CHTMLToken(aTag)
{
  mEmpty = PR_FALSE;
  mContainerInfo = eFormUnknown;
#ifdef DEBUG
  mAttributed = PR_FALSE;
#endif
}

CStartToken::CStartToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_unknown)
{
  mEmpty = PR_FALSE;
  mContainerInfo = eFormUnknown;
  mTextValue.Assign(aName);
#ifdef DEBUG
  mAttributed = PR_FALSE;
#endif
}

CStartToken::CStartToken(const nsAString& aName, eHTMLTags aTag)
  : CHTMLToken(aTag)
{
  mEmpty = PR_FALSE;
  mContainerInfo = eFormUnknown;
  mTextValue.Assign(aName);
#ifdef DEBUG
  mAttributed = PR_FALSE;
#endif
}

/*
 * This method returns the typeid (the tag type) for this token.
 */
PRInt32
CStartToken::GetTypeID()
{
  if (eHTMLTag_unknown == mTypeID) {
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }
  return mTypeID;
}

PRInt32
CStartToken::GetTokenType()
{
  return eToken_start;
}

void
CStartToken::SetEmpty(PRBool aValue)
{
  mEmpty = aValue;
}

PRBool
CStartToken::IsEmpty()
{
  return mEmpty;
}

/*
 * Consume the identifier portion of the start tag
 */
nsresult
CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  // If you're here, we've already Consumed the < char, and are
  // ready to Consume the rest of the open tag identifier.
  // Stop consuming as soon as you see a space or a '>'.
  // NOTE: We don't Consume the tag attributes here, nor do we eat the ">"

  nsresult result = NS_OK;
  nsScannerSharedSubstring tagIdent;

  if (aFlag & NS_IPARSER_FLAG_HTML) {
    result = aScanner.ReadTagIdentifier(tagIdent);
    mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
    // Save the original tag string if this is user-defined or if we
    // are viewing source
    if (eHTMLTag_userdefined == mTypeID ||
        (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      mTextValue = tagIdent.str();
    }
  } else {
    result = aScanner.ReadTagIdentifier(tagIdent);
    mTextValue = tagIdent.str();
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }

  if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // Take what we can get.
    result = NS_OK;
  }

  return result;
}

const nsSubstring&
CStartToken::GetStringValue()
{
  if (eHTMLTag_unknown < mTypeID && mTypeID < eHTMLTag_text) {
    if (!mTextValue.Length()) {
      mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
    }
  }
  return mTextValue;
}

void
CStartToken::GetSource(nsString& anOutputString)
{
  anOutputString.Truncate();
  AppendSourceTo(anOutputString);
}

void
CStartToken::AppendSourceTo(nsAString& anOutputString)
{
  anOutputString.Append(PRUnichar('<'));
  /*
   * Watch out for Bug 15204
   */
  if (!mTextValue.IsEmpty()) {
    anOutputString.Append(mTextValue);
  } else {
    anOutputString.Append(GetTagName(mTypeID));
  }

  anOutputString.Append(PRUnichar('>'));
}

CEndToken::CEndToken(eHTMLTags aTag)
  : CHTMLToken(aTag)
{
}

CEndToken::CEndToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_unknown)
{
  mTextValue.Assign(aName);
}

CEndToken::CEndToken(const nsAString& aName, eHTMLTags aTag)
  : CHTMLToken(aTag)
{
  mTextValue.Assign(aName);
}

nsresult
CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  nsresult result = NS_OK;
  nsScannerSharedSubstring tagIdent;

  if (aFlag & NS_IPARSER_FLAG_HTML) {
    result = aScanner.ReadTagIdentifier(tagIdent);

    mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
    // Save the original tag string if this is user-defined or if we
    // are viewing source
    if (eHTMLTag_userdefined == mTypeID ||
        (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      mTextValue = tagIdent.str();
    }
  } else {
    result = aScanner.ReadTagIdentifier(tagIdent);
    mTextValue = tagIdent.str();
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
  }

  if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // Take what we can get.
    result = NS_OK;
  }

  return result;
}


/*
 *  Asks the token to determine the <i>HTMLTag type</i> of
 *  the token. This turns around and looks up the tag name
 *  in the tag dictionary.
 */
PRInt32
CEndToken::GetTypeID()
{
  if (eHTMLTag_unknown == mTypeID) {
    mTypeID = nsHTMLTags::LookupTag(mTextValue);
    switch (mTypeID) {
      case eHTMLTag_dir:
      case eHTMLTag_menu:
        mTypeID = eHTMLTag_ul;
        break;

      default:
        break;
    }
  }

  return mTypeID;
}

PRInt32
CEndToken::GetTokenType()
{
  return eToken_end;
}

const nsSubstring&
CEndToken::GetStringValue()
{
  if (eHTMLTag_unknown < mTypeID && mTypeID < eHTMLTag_text) {
    if (!mTextValue.Length()) {
      mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
    }
  }
  return mTextValue;
}

void
CEndToken::GetSource(nsString& anOutputString)
{
  anOutputString.Truncate();
  AppendSourceTo(anOutputString);
}

void
CEndToken::AppendSourceTo(nsAString& anOutputString)
{
  anOutputString.AppendLiteral("</");
  if (!mTextValue.IsEmpty()) {
    anOutputString.Append(mTextValue);
  } else {
    anOutputString.Append(GetTagName(mTypeID));
  }

  anOutputString.Append(PRUnichar('>'));
}

CTextToken::CTextToken()
  : CHTMLToken(eHTMLTag_text)
{
}

CTextToken::CTextToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_text)
{
  mTextValue.Rebind(aName);
}

PRInt32
CTextToken::GetTokenType()
{
  return eToken_text;
}

PRInt32
CTextToken::GetTextLength()
{
  return mTextValue.Length();
}

nsresult
CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  static const PRUnichar theTerminalsChars[] =
    { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'),
      PRUnichar(0) };
  static const nsReadEndCondition theEndCondition(theTerminalsChars);
  nsresult  result = NS_OK;
  PRBool    done = PR_FALSE;
  nsScannerIterator origin, start, end;

  // Start scanning after the first character, because we know it to
  // be part of this text token (we wouldn't have come here if it weren't)
  aScanner.CurrentPosition(origin);
  start = origin;
  aScanner.EndReading(end);

  NS_ASSERTION(start != end, "Calling CTextToken::Consume when already at the "
                             "end of a document is a bad idea.");

  aScanner.SetPosition(++start);

  while (NS_OK == result && !done) {
    result = aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
    if (NS_OK == result) {
      result = aScanner.Peek(aChar);

      if (NS_OK == result && (kCR == aChar || kNewLine == aChar)) {
        switch (aChar) {
          case kCR:
          {
            // It's a carriage return. See if this is part of a CR-LF pair (in
            // which case we need to treat it as one newline). If we're at the
            // edge of a packet, then leave the CR on the scanner, since it
            // could still be part of a CR-LF pair. Otherwise, it isn't.
            PRUnichar theNextChar;
            result = aScanner.Peek(theNextChar, 1);

            if (result == kEOF && aScanner.IsIncremental()) {
              break;
            }

            if (NS_SUCCEEDED(result)) {
              // Actually get the carriage return.
              aScanner.GetChar(aChar);
            }

            if (kLF == theNextChar) {
              // If the "\r" is followed by a "\n", don't replace it and let
              // it be ignored by the layout system.
              end.advance(2);
              aScanner.GetChar(theNextChar);
            } else {
              // If it is standalone, replace the "\r" with a "\n" so that it
              // will be considered by the layout system.
              aScanner.ReplaceCharacter(end, kLF);
              ++end;
            }
            ++mNewlineCount;
            break;
          }
          case kLF:
            aScanner.GetChar(aChar);
            ++end;
            ++mNewlineCount;
            break;
        }
      } else {
        done = PR_TRUE;
      }
    }
  }

  // Note: This function is only called from nsHTMLTokenizer::ConsumeText. If
  // we return an error result from the final buffer, then it is responsible
  // for turning it into an NS_OK result.
  aScanner.BindSubstring(mTextValue, origin, end);

  return result;
}

/*
 *  Consume as much clear text from scanner as possible.
 *  The scanner is left on the < of the perceived end tag.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aConservativeConsume -- controls our handling of content with no
 *                                   terminating string.
 *  @param   aIgnoreComments -- whether or not we should take comments into
 *                              account in looking for the end tag.
 *  @param   aScanner -- controller of underlying input source
 *  @param   aEndTagname -- the terminal tag name.
 *  @param   aFlag -- dtd modes and such.
 *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
 *  @return  error result
 */
nsresult
CTextToken::ConsumeCharacterData(PRBool aIgnoreComments,
                                 nsScanner& aScanner,
                                 const nsAString& aEndTagName,
                                 PRInt32 aFlag,
                                 PRBool& aFlushTokens)
{
  nsresult result = NS_OK;
  nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos,
                    theStartCommentPos, theAltTermStrPos, endPos;
  PRBool        done = PR_FALSE;
  PRBool        theLastIteration = PR_FALSE;

  aScanner.CurrentPosition(theStartOffset);
  theCurrOffset = theStartOffset;
  aScanner.EndReading(endPos);
  theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;

  // ALGORITHM: *** The performance is based on correctness of the document ***
  // 1. Look for a '<' character.  This could be
  //      a) Start of a comment (<!--),
  //      b) Start of the terminal string, or
  //      c) a start of a tag.
  //    We are interested in a) and b). c) is ignored because in CDATA we
  //    don't care for tags.
  //    NOTE: Technically speaking in CDATA we should ignore the comments too!
  //    But for compatibility we don't.
  // 2. Having the offset, for '<', search for the terminal string from there
  //    on and record its offset.
  // 3. From the same '<' offset also search for start of a comment '<!--'.
  //    If found search for end comment '-->' between the terminal string and
  //    '<!--'.  If you did not find the end comment, then we have a malformed
  //    document, i.e., this section has a prematured terminal string Ex.
  //    <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But record
  //    terminal string's offset if this is the first premature terminal
  //    string, and update the current offset to the terminal string
  //    (prematured) offset and goto step 1.
  // 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
  // 5. If the end of the document is reached and if we still don't have the
  //    condition in step 4. then assume that the prematured terminal string
  //    is the actual terminal string and goto step 1. This will be our last
  //    iteration. If there is no premature terminal string and we're being
  //    conservative in our consumption (aConservativeConsume), then don't
  //    consume anything from the scanner. Otherwise, we consume all the way
  //    until the end.

  NS_NAMED_LITERAL_STRING(ltslash, "</");
  const nsString theTerminalString = ltslash + aEndTagName;

  PRUint32 termStrLen = theTerminalString.Length();
  while (result == NS_OK && !done) {
    PRBool found = PR_FALSE;
    nsScannerIterator gtOffset, ltOffset = theCurrOffset;
    while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
           ((PRUint32)ltOffset.size_forward() >= termStrLen ||
            Distance(ltOffset, endPos) >= termStrLen)) {
      // Make a copy of the (presumed) end tag and
      // do a case-insensitive comparison

      nsScannerIterator start(ltOffset), end(ltOffset);
      end.advance(termStrLen);

      if (CaseInsensitiveFindInReadable(theTerminalString, start, end) &&
          (end == endPos || (*end == '>'  || *end == ' '  ||
                             *end == '\t' || *end == '\n' ||
                             *end == '\r' || *end == '\b'))) {
        gtOffset = end;
        // Note that aIgnoreComments is only not set for <script>. We don't
        // want to execute scripts that aren't in the form of: <script\s.*>
        if ((end == endPos && aIgnoreComments) ||
            FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
          found = PR_TRUE;
          theTermStrPos = start;
        }
        break;
      }
      ltOffset.advance(1);
    }

    if (found && theTermStrPos != endPos) {
      if (!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
          !theLastIteration && !aIgnoreComments) {
        nsScannerIterator endComment(ltOffset);
        endComment.advance(5);

        if ((theStartCommentPos == endPos) &&
            FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset,
                           endComment)) {
          theStartCommentPos = theCurrOffset;
        }

        if (theStartCommentPos != endPos) {
          // Search for --> between <!-- and </TERMINALSTRING>.
          theCurrOffset = theStartCommentPos;
          nsScannerIterator terminal(theTermStrPos);
          if (!RFindInReadable(NS_LITERAL_STRING("-->"),
                               theCurrOffset, terminal)) {
            // If you're here it means that we have a bogus terminal string.
            // Even though it is bogus, the position of the terminal string
            // could be helpful in case we hit the rock bottom.
            if (theAltTermStrPos == endPos) {
              // But we only want to remember the first bogus terminal string.
              theAltTermStrPos = theTermStrPos;
            }

            // We did not find '-->' so keep searching for terminal string.
            theCurrOffset = theTermStrPos;
            theCurrOffset.advance(termStrLen);
            continue;
          }
        }
      }

      aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
      aScanner.SetPosition(ltOffset);

      // We found </SCRIPT> or </STYLE>...permit flushing -> Ref: Bug 22485
      aFlushTokens = PR_TRUE;
      done = PR_TRUE;
    } else {
      // We end up here if:
      // a) when the buffer runs out ot data.
      // b) when the terminal string is not found.
      if (!aScanner.IsIncremental()) {
        if (theAltTermStrPos != endPos) {
          // If you're here it means that we hit the rock bottom and therefore
          // switch to plan B, since we have an alternative terminating string.
          theCurrOffset = theAltTermStrPos;
          theLastIteration = PR_TRUE;
        } else {
          // Oops, We fell all the way down to the end of the document.
          done = PR_TRUE; // Do this to fix Bug. 35456
          result = kFakeEndTag;
          aScanner.BindSubstring(mTextValue, theStartOffset, endPos);
          aScanner.SetPosition(endPos);
        }
      } else {
        result = kEOF;
      }
    }
  }

  if (result == NS_OK) {
    mNewlineCount = mTextValue.CountChar(kNewLine);
  }

  return result;
}

/*
 *  Consume as much clear text from scanner as possible. Reducing entities.
 *  The scanner is left on the < of the perceived end tag.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aConservativeConsume -- controls our handling of content with no
 *                                   terminating string.
 *  @param   aScanner -- controller of underlying input source
 *  @param   aEndTagname -- the terminal tag name.
 *  @param   aFlag -- dtd modes and such.
 *  @param   aFlushTokens -- PR_TRUE if we found the terminal tag.
 *  @return  error result
 */
nsresult
CTextToken::ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
                                       PRBool aConservativeConsume,
                                       nsScanner& aScanner,
                                       const nsAString& aEndTagName,
                                       PRInt32 aFlag,
                                       PRBool& aFound)
{
  // This function is fairly straightforward except if there is no terminating
  // string. If there is, we simply loop through all of the entities, reducing
  // them as necessary and skipping over non-terminal strings starting with <.
  // If there is *no* terminal string, then we examine aConservativeConsume.
  // If we want to be conservative, we backtrack to the first place in the
  // document that looked like the end of PCDATA (i.e., the first tag). This
  // is for compatibility and so we don't regress bug 42945. If we are not
  // conservative, then we consume everything, all the way up to the end of
  // the document.

  static const PRUnichar terminalChars[] = {
    PRUnichar('\r'), PRUnichar('\n'), PRUnichar('&'), PRUnichar('<'),
    PRUnichar(0)
  };
  static const nsReadEndCondition theEndCondition(terminalChars);

  nsScannerIterator currPos, endPos, altEndPos;
  PRUint32 truncPos = 0;
  aScanner.CurrentPosition(currPos);
  aScanner.EndReading(endPos);

  altEndPos = endPos;

  nsScannerSharedSubstring theContent;
  PRUnichar ch = 0;

  NS_NAMED_LITERAL_STRING(commentStart, "<!--");
  NS_NAMED_LITERAL_STRING(ltslash, "</");
  const nsString theTerminalString = ltslash + aEndTagName;
  PRUint32 termStrLen = theTerminalString.Length();
  PRUint32 commentStartLen = commentStart.Length();

  nsresult result = NS_OK;

  // Note that if we're already at the end of the document, the ConsumeUntil
  // will fail, and we'll do the right thing.
  do {
    result = ConsumeUntil(theContent, mNewlineCount, aScanner,
                          theEndCondition, PR_TRUE, PR_FALSE, aFlag);

    if (aDiscardFirstNewline &&
        (NS_SUCCEEDED(result) || !aScanner.IsIncremental()) &&
        !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      // Check if the very first character is a newline, and if so discard it.
      // Note that we don't want to discard it in view source!
      // Also note that this has to happen here (as opposed to before the
      // ConsumeUntil) because we have to expand any entities.
      // XXX It would be nice to be able to do this without calling
      // writable()!
      const nsSubstring &firstChunk = theContent.str();
      if (!firstChunk.IsEmpty()) {
        PRUint32 where = 0;
        PRUnichar newline = firstChunk.First();

        if (newline == kCR || newline == kNewLine) {
          ++where;

          if (firstChunk.Length() > 1) {
            if (newline == kCR && firstChunk.CharAt(1) == kNewLine) {
              // Handle \r\n = 1 newline.
              ++where;
            }
            // Note: \n\r = 2 newlines.
          }
        }

        if (where != 0) {
          theContent.writable() = Substring(firstChunk, where);
        }
      }
    }
    aDiscardFirstNewline = PR_FALSE;

    if (NS_FAILED(result)) {
      if (kEOF == result && !aScanner.IsIncremental()) {
        aFound = PR_TRUE; // this is as good as it gets.
        result = kFakeEndTag;

        if (aConservativeConsume && altEndPos != endPos) {
          // We ran out of room looking for a </title>. Go back to the first
          // place that looked like a tag and use that as our stopping point.
          theContent.writable().Truncate(truncPos);
          aScanner.SetPosition(altEndPos, PR_FALSE, PR_TRUE);
        }
        // else we take everything we consumed.
        mTextValue.Rebind(theContent.str());
      } else {
        aFound = PR_FALSE;
      }

      return result;
    }

    aScanner.CurrentPosition(currPos);
    aScanner.GetChar(ch); // this character must be '&' or '<'

    if (ch == kLessThan && altEndPos == endPos) {
      // Keep this position in case we need it for later.
      altEndPos = currPos;
      truncPos = theContent.str().Length();
    }

    if (Distance(currPos, endPos) >= termStrLen) {
      nsScannerIterator start(currPos), end(currPos);
      end.advance(termStrLen);

      if (CaseInsensitiveFindInReadable(theTerminalString, start, end)) {
        if (end != endPos && (*end == '>'  || *end == ' '  ||
                              *end == '\t' || *end == '\n' ||
                              *end == '\r' || *end == '\b')) {
          aFound = PR_TRUE;
          mTextValue.Rebind(theContent.str());

          // Note: This SetPosition() is actually going backwards from the
          // scanner's mCurrentPosition (so we pass aReverse == PR_TRUE). This
          // is because we call GetChar() above after we get the current
          // position.
          aScanner.SetPosition(currPos, PR_FALSE, PR_TRUE);
          break;
        }
      }
    }
    // IE only consumes <!-- --> as comments in PCDATA.
    if (Distance(currPos, endPos) >= commentStartLen) {
      nsScannerIterator start(currPos), end(currPos);
      end.advance(commentStartLen);

      if (CaseInsensitiveFindInReadable(commentStart, start, end)) {
        CCommentToken consumer; // stack allocated.

        // CCommentToken expects us to be on the '-'
        aScanner.SetPosition(currPos.advance(2));

        // In quirks mode we consume too many things as comments, so pretend
        // that we're not by modifying aFlag.
        result = consumer.Consume(*currPos, aScanner,
                                  (aFlag & ~NS_IPARSER_FLAG_QUIRKS_MODE) |
                                   NS_IPARSER_FLAG_STRICT_MODE);
        if (kEOF == result) {
          // This can only happen if we're really out of space.
          return kEOF;
        } else if (kNotAComment == result) {
          // Fall through and consume this as text.
          aScanner.CurrentPosition(currPos);
          aScanner.SetPosition(currPos.advance(1));
        } else {
          consumer.AppendSourceTo(theContent.writable());
          mNewlineCount += consumer.GetNewlineCount();
          continue;
        }
      }
    }

    result = kEOF;
    // We did not find the terminal string yet so
    // include the character that stopped consumption.
    theContent.writable().Append(ch);
  } while (currPos != endPos);

  return result;
}

void
CTextToken::CopyTo(nsAString& aStr)
{
  nsScannerIterator start, end;
  mTextValue.BeginReading(start);
  mTextValue.EndReading(end);
  CopyUnicodeTo(start, end, aStr);
}

const nsSubstring& CTextToken::GetStringValue()
{
  return mTextValue.AsString();
}

void
CTextToken::Bind(nsScanner* aScanner, nsScannerIterator& aStart,
                 nsScannerIterator& aEnd)
{
  aScanner->BindSubstring(mTextValue, aStart, aEnd);
}

void
CTextToken::Bind(const nsAString& aStr)
{
  mTextValue.Rebind(aStr);
}

CCDATASectionToken::CCDATASectionToken(eHTMLTags aTag)
  : CHTMLToken(aTag)
{
}

CCDATASectionToken::CCDATASectionToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_unknown)
{
  mTextValue.Assign(aName);
}

PRInt32
CCDATASectionToken::GetTokenType()
{
  return eToken_cdatasection;
}

/*
 *  Consume as much marked test from scanner as possible.
 *  Note: This has to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>"
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,
                            PRInt32 aFlag)
{
  static const PRUnichar theTerminalsChars[] =
  { PRUnichar('\r'), PRUnichar('\n'), PRUnichar(']'), PRUnichar(0) };
  static const nsReadEndCondition theEndCondition(theTerminalsChars);
  nsresult  result = NS_OK;
  PRBool    done = PR_FALSE;

  while (NS_OK == result && !done) {
    result = aScanner.ReadUntil(mTextValue, theEndCondition, PR_FALSE);
    if (NS_OK == result) {
      result = aScanner.Peek(aChar);
      if (kCR == aChar && NS_OK == result) {
        result = aScanner.GetChar(aChar); // Strip off the \r
        result = aScanner.Peek(aChar);    // Then see what's next.
        if (NS_OK == result) {
          switch(aChar) {
            case kCR:
              result = aScanner.GetChar(aChar); // Strip off the \r
              mTextValue.AppendLiteral("\n\n");
              mNewlineCount += 2;
              break;

            case kNewLine:
              // Which means we saw \r\n, which becomes \n
              result = aScanner.GetChar(aChar); // Strip off the \n

              // Fall through...
            default:
              mTextValue.AppendLiteral("\n");
              mNewlineCount++;
              break;
          }
        }
      } else if (kNewLine == aChar) {
        result = aScanner.GetChar(aChar);
        mTextValue.Append(aChar);
        ++mNewlineCount;
      } else if (kRightSquareBracket == aChar) {
        PRBool canClose = PR_FALSE;
        result = aScanner.GetChar(aChar); // Strip off the ]
        mTextValue.Append(aChar);
        result = aScanner.Peek(aChar);    // Then see what's next.
        if (NS_OK == result && kRightSquareBracket == aChar) {
          result = aScanner.GetChar(aChar); // Strip off the second ]
          mTextValue.Append(aChar);
          canClose = PR_TRUE;
        }

        // The goal here is to not lose data from the page when encountering
        // markup like: <![endif]-->.  This means that in normal parsing, we
        // allow ']' to end the marked section and just drop everything between
        // it an the '>'.  In view-source mode, we cannot drop things on the
        // floor like that.  In fact, to make view-source of XML with script in
        // CDATA sections at all bearable, we need to somewhat enforce the ']]>'
        // terminator for marked sections.  So make the tokenization somewhat
        // different when in view-source _and_ dealing with a CDATA section.
        // XXX We should remember this StringBeginsWith test.
        PRBool inCDATA = (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) &&
          StringBeginsWith(mTextValue, NS_LITERAL_STRING("[CDATA["));
        if (inCDATA) {
          // Consume all right square brackets to catch cases such as:
          // <![CDATA[foo]]]>
          while (true) {
            result = aScanner.Peek(aChar);
            if (result != NS_OK || aChar != kRightSquareBracket) {
              break;
            }

            mTextValue.Append(aChar);
            aScanner.GetChar(aChar);
          }
        } else {
          nsAutoString dummy; // Skip any bad data
          result = aScanner.ReadUntil(dummy, kGreaterThan, PR_FALSE);
        }
        if (NS_OK == result &&
            (!inCDATA || (canClose && kGreaterThan == aChar))) {
          result = aScanner.GetChar(aChar); // Strip off the >
          done = PR_TRUE;
        }
      } else {
        done = PR_TRUE;
      }
    }
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // We ran out of space looking for the end of this CDATA section.
    // In order to not completely lose the entire section, treat everything
    // until the end of the document as part of the CDATA section and let
    // the DTD handle it.
    mInError = PR_TRUE;
    result = NS_OK;
  }

  return result;
}

const nsSubstring&
CCDATASectionToken::GetStringValue()
{
  return mTextValue;
}


CMarkupDeclToken::CMarkupDeclToken()
  : CHTMLToken(eHTMLTag_markupDecl)
{
}

CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_markupDecl)
{
  mTextValue.Rebind(aName);
}

PRInt32
CMarkupDeclToken::GetTokenType()
{
  return eToken_markupDecl;
}

/*
 *  Consume as much declaration from scanner as possible.
 *  Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or
 *  NOTATION, which can span multiple lines and ends in >.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,
                          PRInt32 aFlag)
{
  static const PRUnichar theTerminalsChars[] =
    { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'),
      PRUnichar('>'),
      PRUnichar(0) };
  static const nsReadEndCondition theEndCondition(theTerminalsChars);
  nsresult  result = NS_OK;
  PRBool    done = PR_FALSE;
  PRUnichar quote = 0;

  nsScannerIterator origin, start, end;
  aScanner.CurrentPosition(origin);
  start = origin;

  while (NS_OK == result && !done) {
    aScanner.SetPosition(start);
    result = aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
    if (NS_OK == result) {
      result = aScanner.Peek(aChar);

      if (NS_OK == result) {
        PRUnichar theNextChar = 0;
        if (kCR == aChar || kNewLine == aChar) {
          result = aScanner.GetChar(aChar); // Strip off the char
          result = aScanner.Peek(theNextChar); // Then see what's next.
        }
        switch(aChar) {
          case kCR:
            // result = aScanner.GetChar(aChar);
            if (kLF == theNextChar) {
              // If the "\r" is followed by a "\n", don't replace it and
              // let it be ignored by the layout system
              end.advance(2);
              result = aScanner.GetChar(theNextChar);
            } else {
              // If it standalone, replace the "\r" with a "\n" so that
              // it will be considered by the layout system
              aScanner.ReplaceCharacter(end, kLF);
              ++end;
            }
            ++mNewlineCount;
            break;
          case kLF:
            ++end;
            ++mNewlineCount;
            break;
          case '\'':
          case '"':
            ++end;
            if (quote) {
              if (quote == aChar) {
                quote = 0;
              }
            } else {
              quote = aChar;
            }
            break;
          case kGreaterThan:
            if (quote) {
              ++end;
            } else {
              start = end;
              // Note that start is wrong after this, we just avoid temp var
              ++start;
              aScanner.SetPosition(start); // Skip the >
              done = PR_TRUE;
            }
            break;
          default:
            NS_ABORT_IF_FALSE(0, "should not happen, switch is missing cases?");
            break;
        }
        start = end;
      } else {
        done = PR_TRUE;
      }
    }
  }
  aScanner.BindSubstring(mTextValue, origin, end);

  if (kEOF == result) {
    mInError = PR_TRUE;
    if (!aScanner.IsIncremental()) {
      // Hide this EOF.
      result = NS_OK;
    }
  }

  return result;
}

const nsSubstring&
CMarkupDeclToken::GetStringValue()
{
  return mTextValue.AsString();
}


CCommentToken::CCommentToken()
  : CHTMLToken(eHTMLTag_comment)
{
}

CCommentToken::CCommentToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_comment)
{
  mComment.Rebind(aName);
}

void
CCommentToken::AppendSourceTo(nsAString& anOutputString)
{
  AppendUnicodeTo(mCommentDecl, anOutputString);
}

static PRBool
IsCommentEnd(const nsScannerIterator& aCurrent, const nsScannerIterator& aEnd,
             nsScannerIterator& aGt)
{
  nsScannerIterator current = aCurrent;
  PRInt32 dashes = 0;

  while (current != aEnd && dashes != 2) {
    if (*current == kGreaterThan) {
      aGt = current;
      return PR_TRUE;
    }
    if (*current == PRUnichar('-')) {
      ++dashes;
    } else {
      dashes = 0;
    }
    ++current;
  }

  return PR_FALSE;
}

nsresult
CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
{
  // <!--[... -- ... -- ...]*-->
  /*********************************************************
    NOTE: This algorithm does a fine job of handling comments
          when they're formatted per spec, but if they're not
          we don't handle them well.
   *********************************************************/
  nsScannerIterator end, current, gt, lt;
  aScanner.EndReading(end);
  aScanner.CurrentPosition(current);

  nsScannerIterator beginData = end;

  lt = current;
  lt.advance(-2); // <!

  current.advance(-1);

  // Regular comment must start with <!--
  if (*current == kExclamation &&
      ++current != end && *current == kMinus &&
      ++current != end && *current == kMinus &&
      ++current != end) {
    nsScannerIterator currentEnd = end;
    PRBool balancedComment = PR_FALSE;
    NS_NAMED_LITERAL_STRING(dashes, "--");
    beginData = current;

    while (FindInReadable(dashes, current, currentEnd)) {
      current.advance(2);

      balancedComment = !balancedComment; // We need to match '--' with '--'

      if (balancedComment && IsCommentEnd(current, end, gt)) {
        // done
        current.advance(-2);
        // Note: it's ok if beginData == current, (we'll copy an empty string)
        // and we need to bind mComment anyway.
        aScanner.BindSubstring(mComment, beginData, current);
        aScanner.BindSubstring(mCommentDecl, lt, ++gt);
        aScanner.SetPosition(gt);
        return NS_OK;
      }

      // Continue after the last '--'
      currentEnd = end;
    }
  }

  // If beginData == end, we did not find opening '--'
  if (beginData == end) {
    // This might have been empty comment: <!>
    // Or it could have been something completely bogus like: <!This is foobar>
    // Handle both cases below
    aScanner.CurrentPosition(current);
    beginData = current;
    if (FindCharInReadable('>', current, end)) {
      aScanner.BindSubstring(mComment, beginData, current);
      aScanner.BindSubstring(mCommentDecl, lt, ++current);
      aScanner.SetPosition(current);
      return NS_OK;
    }
  }

  if (aScanner.IsIncremental()) {
    // We got here because we saw the beginning of a comment,
    // but not yet the end, and we are still loading the page. In that
    // case the return value here will cause us to unwind,
    // wait for more content, and try again.
    // XXX For performance reasons we should cache where we were, and
    //     continue from there for next call
    return kEOF;
  }

  // There was no terminating string, parse this comment as text.
  aScanner.SetPosition(lt, PR_FALSE, PR_TRUE);
  return kNotAComment;
}

nsresult
CCommentToken::ConsumeQuirksComment(nsScanner& aScanner)
{
  // <![-[-]] ... [[-]-|--!]>
  /*********************************************************
    NOTE: This algorithm does a fine job of handling comments
          commonly used, but it doesn't really consume them
          per spec (But then, neither does IE or Nav).
   *********************************************************/
  nsScannerIterator end, current;
  aScanner.EndReading(end);
  aScanner.CurrentPosition(current);
  nsScannerIterator beginData = current,
                    beginLastMinus = end,
                    bestAltCommentEnd = end,
                    lt = current;
  lt.advance(-2); // <!

  // When we get here, we have always already consumed <!
  // Skip over possible leading minuses
  if (current != end && *current == kMinus) {
    beginLastMinus = current;
    ++current;
    ++beginData;
    if (current != end && *current == kMinus) { // <!--
      beginLastMinus = current;
      ++current;
      ++beginData;
      // Long form comment

      nsScannerIterator currentEnd = end, gt = end;

      // Find the end of the comment
      while (FindCharInReadable(kGreaterThan, current, currentEnd)) {
        gt = current;
        if (bestAltCommentEnd == end) {
          bestAltCommentEnd = gt;
        }
        --current;
        PRBool goodComment = PR_FALSE;
        if (current != beginLastMinus && *current == kMinus) { // ->
          --current;
          if (current != beginLastMinus && *current == kMinus) { // -->
            goodComment = PR_TRUE;
            --current;
          }
        } else if (current != beginLastMinus && *current == '!') {
          --current;
          if (current != beginLastMinus && *current == kMinus) {
            --current;
            if (current != beginLastMinus && *current == kMinus) { // --!>
              --current;
              goodComment = PR_TRUE;
            }
          }
        } else if (current == beginLastMinus) {
          goodComment = PR_TRUE;
        }

        if (goodComment) {
          // done
          aScanner.BindSubstring(mComment, beginData, ++current);
          aScanner.BindSubstring(mCommentDecl, lt, ++gt);
          aScanner.SetPosition(gt);
          return NS_OK;
        } else {
          // try again starting after the last '>'
          current = ++gt;
          currentEnd = end;
        }
      }

      if (aScanner.IsIncremental()) {
        // We got here because we saw the beginning of a comment,
        // but not yet the end, and we are still loading the page. In that
        // case the return value here will cause us to unwind,
        // wait for more content, and try again.
        // XXX For performance reasons we should cache where we were, and
        //     continue from there for next call
        return kEOF;
      }

      // If you're here, then we're in a special state.
      // The problem at hand is that we've hit the end of the document without
      // finding the normal endcomment delimiter "-->".  In this case, the
      // first thing we try is to see if we found an alternate endcomment
      // delimiter ">".  If so, rewind just pass that, and use everything up
      // to that point as your comment.  If not, the document has no end
      // comment and should be treated as one big comment.
      gt = bestAltCommentEnd;
      aScanner.BindSubstring(mComment, beginData, gt);
      if (gt != end) {
        ++gt;
      }
      aScanner.BindSubstring(mCommentDecl, lt, gt);
      aScanner.SetPosition(gt);
      return NS_OK;
    }
  }

  // This could be short form of comment
  // Find the end of the comment
  current = beginData;
  if (FindCharInReadable(kGreaterThan, current, end)) {
    nsScannerIterator gt = current;
    if (current != beginData) {
      --current;
      if (current != beginData && *current == kMinus) { // ->
        --current;
        if (current != beginData && *current == kMinus) { // -->
          --current;
        }
      } else if (current != beginData && *current == '!') { // !>
        --current;
        if (current != beginData && *current == kMinus) { // -!>
          --current;
          if (current != beginData && *current == kMinus) { // --!>
            --current;
          }
        }
      }
    }

    if (current != gt) {
      aScanner.BindSubstring(mComment, beginData, ++current);
    } else {
      // Bind mComment to an empty string (note that if current == gt,
      // then current == beginData). We reach this for <!>
      aScanner.BindSubstring(mComment, beginData, current);
    }
    aScanner.BindSubstring(mCommentDecl, lt, ++gt);
    aScanner.SetPosition(gt);
    return NS_OK;
  }

  if (!aScanner.IsIncremental()) {
    // This isn't a comment at all, go back to the < and consume as text.
    aScanner.SetPosition(lt, PR_FALSE, PR_TRUE);
    return kNotAComment;
  }

  // Wait for more data...
  return kEOF;
}

/*
 *  Consume the identifier portion of the comment.
 *  Note that we've already eaten the "<!" portion.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  nsresult result = PR_TRUE;

  if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) {
    // Enabling strict comment parsing for Bug 53011 and 2749 contradicts!
    result = ConsumeStrictComment(aScanner);
  } else {
    result = ConsumeQuirksComment(aScanner);
  }

  if (NS_SUCCEEDED(result)) {
    mNewlineCount = mCommentDecl.CountChar(kNewLine);
  }

  return result;
}

const nsSubstring&
CCommentToken::GetStringValue()
{
  return mComment.AsString();
}

PRInt32
CCommentToken::GetTokenType()
{
  return eToken_comment;
}

CNewlineToken::CNewlineToken()
  : CHTMLToken(eHTMLTag_newline)
{
}

PRInt32
CNewlineToken::GetTokenType()
{
  return eToken_newline;
}

static nsScannerSubstring* gNewlineStr;
void
CNewlineToken::AllocNewline()
{
  gNewlineStr = new nsScannerSubstring(NS_LITERAL_STRING("\n"));
}

void
CNewlineToken::FreeNewline()
{
  if (gNewlineStr) {
    delete gNewlineStr;
    gNewlineStr = nsnull;
  }
}

/**
 *  This method retrieves the value of this internal string.
 *
 *  @return nsString reference to internal string value
 */
const nsSubstring&
CNewlineToken::GetStringValue()
{
  return gNewlineStr->AsString();
}

/*
 * Consume one newline (cr/lf pair).
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  /*
   * Here's what the HTML spec says about newlines:
   *
   * "A line break is defined to be a carriage return (&#x000D;),
   * a line feed (&#x000A;), or a carriage return/line feed pair.
   * All line breaks constitute white space."
   */

  nsresult rv = NS_OK;
  if (aChar == kCR) {
    PRUnichar theChar;
    rv = aScanner.Peek(theChar);
    if (theChar == kNewLine) {
      rv = aScanner.GetChar(theChar);
    } else if (rv == kEOF && !aScanner.IsIncremental()) {
      // Make sure we don't lose information about this trailing newline.
      rv = NS_OK;
    }
  }

  mNewlineCount = 1;
  return rv;
}

CAttributeToken::CAttributeToken()
  : CHTMLToken(eHTMLTag_unknown)
{
  mHasEqualWithoutValue = PR_FALSE;
}

/*
 * String based constructor
 */
CAttributeToken::CAttributeToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_unknown)
{
  mTextValue.writable().Assign(aName);
  mHasEqualWithoutValue = PR_FALSE;
}

/*
 *  construct initializing data to key value pair
 */
CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName)
  : CHTMLToken(eHTMLTag_unknown)
{
  mTextValue.writable().Assign(aName);
  mTextKey.Rebind(aKey);
  mHasEqualWithoutValue = PR_FALSE;
}

PRInt32
CAttributeToken::GetTokenType()
{
  return eToken_attribute;
}

const nsSubstring&
CAttributeToken::GetStringValue()
{
  return mTextValue.str();
}

void
CAttributeToken::GetSource(nsString& anOutputString)
{
  anOutputString.Truncate();
  AppendSourceTo(anOutputString);
}

void
CAttributeToken::AppendSourceTo(nsAString& anOutputString)
{
  AppendUnicodeTo(mTextKey, anOutputString);
  if (mTextValue.str().Length() || mHasEqualWithoutValue) {
    anOutputString.AppendLiteral("=");
  }
  anOutputString.Append(mTextValue.str());
  // anOutputString.AppendLiteral(";");
}

/*
 * This general purpose method is used when you want to
 * consume a known quoted string.
 */
static nsresult
ConsumeQuotedString(PRUnichar aChar,
                    nsScannerSharedSubstring& aString,
                    PRInt32& aNewlineCount,
                    nsScanner& aScanner,
                    PRInt32 aFlag)
{
  NS_ASSERTION(aChar == kQuote || aChar == kApostrophe,
               "char is neither quote nor apostrophe");
  // Hold onto this in case this is an unterminated string literal
  PRUint32 origLen = aString.str().Length();

  static const PRUnichar theTerminalCharsQuote[] = {
    PRUnichar(kQuote), PRUnichar('&'), PRUnichar(kCR),
    PRUnichar(kNewLine), PRUnichar(0) };
  static const PRUnichar theTerminalCharsApostrophe[] = {
    PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(kCR),
    PRUnichar(kNewLine), PRUnichar(0) };
  static const nsReadEndCondition
    theTerminateConditionQuote(theTerminalCharsQuote);
  static const nsReadEndCondition
    theTerminateConditionApostrophe(theTerminalCharsApostrophe);

  // Assume Quote to init to something
  const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote;
  if (aChar == kApostrophe) {
    terminateCondition = &theTerminateConditionApostrophe;
  }

  nsresult result = NS_OK;
  nsScannerIterator theOffset;
  aScanner.CurrentPosition(theOffset);

  result = ConsumeUntil(aString, aNewlineCount, aScanner,
                      *terminateCondition, PR_TRUE, PR_TRUE, aFlag);

  if (NS_SUCCEEDED(result)) {
    result = aScanner.GetChar(aChar); // aChar should be " or '
  }

  // Ref: Bug 35806
  // A back up measure when disaster strikes...
  // Ex <table> <tr d="><td>hello</td></tr></table>
  if (!aString.str().IsEmpty() && aString.str().Last() != aChar &&
      !aScanner.IsIncremental() && result == kEOF) {
    static const nsReadEndCondition
      theAttributeTerminator(kAttributeTerminalChars);
    aString.writable().Truncate(origLen);
    aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
    result = ConsumeUntil(aString, aNewlineCount, aScanner,
                          theAttributeTerminator, PR_FALSE, PR_TRUE, aFlag);
    if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      // Remember that this string literal was unterminated.
      result = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
    }
  }
  return result;
}

/*
 * This method is meant to be used by view-source to consume invalid attributes.
 * For the purposes of this method, an invalid attribute is an attribute that
 * starts with either ', ", or /. We consume all ', ", or / and the following
 * whitespace.
 *
 * @param aScanner -- the scanner we're reading our data from.
 * @param aChar -- the character we're skipping
 * @param aCurrent -- the current position that we're looking at.
 * @param aNewlineCount -- a count of the newlines we've consumed.
 * @return error result.
 */
static nsresult
ConsumeInvalidAttribute(nsScanner& aScanner,
                        PRUnichar aChar,
                        nsScannerIterator& aCurrent,
                        PRInt32& aNewlineCount)
{
  NS_ASSERTION(aChar == kApostrophe || aChar == kQuote || aChar == kForwardSlash,
               "aChar must be a quote or apostrophe");
  nsScannerIterator end, wsbeg;
  aScanner.EndReading(end);

  while (aCurrent != end && *aCurrent == aChar) {
    ++aCurrent;
  }

  aScanner.SetPosition(aCurrent);
  return aScanner.ReadWhitespace(wsbeg, aCurrent, aNewlineCount);
}

/*
 * Consume the key and value portions of the attribute.
 */
nsresult
CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  nsresult result;
  nsScannerIterator wsstart, wsend;

  if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
    result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
    if (kEOF == result && wsstart != wsend) {
      // Do this here so if this is the final token in the document, we don't
      // lose the whitespace.
      aScanner.BindSubstring(mTextKey, wsstart, wsend);
    }
  } else {
    result = aScanner.SkipWhitespace(mNewlineCount);
  }

  if (NS_OK == result) {
    static const PRUnichar theTerminalsChars[] =
    { PRUnichar(' '), PRUnichar('"'),
      PRUnichar('='), PRUnichar('\n'),
      PRUnichar('\r'), PRUnichar('\t'),
      PRUnichar('>'), PRUnichar('<'),
      PRUnichar('\b'), PRUnichar('\''),
      PRUnichar('/'), PRUnichar(0) };
    static const nsReadEndCondition theEndCondition(theTerminalsChars);

    nsScannerIterator start, end;
    result = aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);

    if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
      aScanner.BindSubstring(mTextKey, start, end);
    } else if (kEOF == result && wsstart != end) {
      // Capture all of the text (from the beginning of the whitespace to the
      // end of the document).
      aScanner.BindSubstring(mTextKey, wsstart, end);
    }

    // Now it's time to Consume the (optional) value...
    if (NS_OK == result) {
      if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
        result = aScanner.ReadWhitespace(start, wsend, mNewlineCount);
        aScanner.BindSubstring(mTextKey, wsstart, wsend);
      } else {
        result = aScanner.SkipWhitespace(mNewlineCount);
      }

      if (NS_OK == result) {
        // Skip ahead until you find an equal sign or a '>'...
        result = aScanner.Peek(aChar);
        if (NS_OK == result) {
          if (kEqual == aChar) {
            result = aScanner.GetChar(aChar);  // Skip the equal sign...
            if (NS_OK == result) {
              if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
                PRBool haveCR;
                result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
                                                 haveCR);
              } else {
                result = aScanner.SkipWhitespace(mNewlineCount);
              }

              if (NS_OK == result) {
                result = aScanner.Peek(aChar);  // And grab the next char.
                if (NS_OK == result) {
                  if (kQuote == aChar || kApostrophe == aChar) {
                    aScanner.GetChar(aChar);
                    if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
                      mTextValue.writable().Append(aChar);
                    }

                    result = ConsumeQuotedString(aChar, mTextValue,
                                                 mNewlineCount, aScanner,
                                                 aFlag);
                    if (NS_SUCCEEDED(result) &&
                        (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
                      mTextValue.writable().Append(aChar);
                    } else if (result ==
                                NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL) {
                      result = NS_OK;
                      mInError = PR_TRUE;
                    }
                    // According to spec. we ( who? ) should ignore linefeeds.
                    // But look, even the carriage return was getting stripped
                    // ( wonder why! ) - Ref. to bug 15204.  Okay, so the
                    // spec. told us to ignore linefeeds, bug then what about
                    // bug 47535 ? Should we preserve everything then?  Well,
                    // let's make it so!
                  } else if (kGreaterThan == aChar) {
                    mHasEqualWithoutValue = PR_TRUE;
                    mInError = PR_TRUE;
                  } else {
                    static const nsReadEndCondition
                      theAttributeTerminator(kAttributeTerminalChars);
                    result =
                      ConsumeUntil(mTextValue,
                                   mNewlineCount,
                                   aScanner,
                                   theAttributeTerminator,
                                   PR_FALSE,
                                   PR_TRUE,
                                   aFlag);
                  }
                }
                if (NS_OK == result) {
                  if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
                    PRBool haveCR;
                    result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
                                                     haveCR);
                  } else {
                    result = aScanner.SkipWhitespace(mNewlineCount);
                  }
                }
              } else {
                // We saw an equal sign but ran out of room looking for a value.
                mHasEqualWithoutValue = PR_TRUE;
                mInError = PR_TRUE;
              }
            }
          } else {
            // This is where we have to handle fairly busted content.
            // If you're here, it means we saw an attribute name, but couldn't
            // find the following equal sign.  <tag NAME....

            // Doing this right in all cases is <i>REALLY</i> ugly.
            // My best guess is to grab the next non-ws char. We know it's not
            // '=', so let's see what it is. If it's a '"', then assume we're
            // reading from the middle of the value. Try stripping the quote
            // and continuing...  Note that this code also strips forward
            // slashes to handle cases like <tag NAME/>
            if (kQuote == aChar || kApostrophe == aChar ||
                kForwardSlash == aChar) {
              // In XML, a trailing slash isn't an error.
              if (kForwardSlash != aChar || !(aFlag & NS_IPARSER_FLAG_XML)) {
                mInError = PR_TRUE;
              }

              if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
                result = aScanner.SkipOver(aChar); // Strip quote or slash.
                if (NS_SUCCEEDED(result)) {
                  result = aScanner.SkipWhitespace(mNewlineCount);
                }
              } else {
                // We want to collect whitespace here so that following
                // attributes can have the right line number (and for
                // parity with the non-view-source code above).
                result = ConsumeInvalidAttribute(aScanner, aChar,
                                                 wsend, mNewlineCount);

                aScanner.BindSubstring(mTextKey, wsstart, wsend);
                aScanner.SetPosition(wsend);
              }
            }
          }
        }
      }
    }

    if (NS_OK == result) {
      if (mTextValue.str().Length() == 0 && mTextKey.Length() == 0 &&
          mNewlineCount == 0 && !mHasEqualWithoutValue) {
        // This attribute contains no useful information for us, so there is no
        // use in keeping it around. Attributes that are otherwise empty, but
        // have newlines in them are passed on the the DTD so it can get line
        // numbering right.
        return NS_ERROR_HTMLPARSER_BADATTRIBUTE;
      }
    }
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // This is our run-of-the mill "don't lose content at the end of a
    // document" with a slight twist: we don't want to bother returning an
    // empty attribute key, even if this is the end of the document.
    if (mTextKey.Length() == 0) {
      result = NS_ERROR_HTMLPARSER_BADATTRIBUTE;
    } else {
      result = NS_OK;
    }
  }

  return result;
}

void
CAttributeToken::SetKey(const nsAString& aKey)
{
  mTextKey.Rebind(aKey);
}

void
CAttributeToken::BindKey(nsScanner* aScanner,
                         nsScannerIterator& aStart,
                         nsScannerIterator& aEnd)
{
  aScanner->BindSubstring(mTextKey, aStart, aEnd);
}

CWhitespaceToken::CWhitespaceToken()
  : CHTMLToken(eHTMLTag_whitespace)
{
}

CWhitespaceToken::CWhitespaceToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_whitespace)
{
  mTextValue.writable().Assign(aName);
}

PRInt32 CWhitespaceToken::GetTokenType()
{
  return eToken_whitespace;
}

/*
 * This general purpose method is used when you want to
 * consume an aribrary sequence of whitespace.
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  // If possible, we'd like to just be a dependent substring starting at
  // |aChar|.  The scanner has already been advanced, so we need to
  // back it up to facilitate this.

  nsScannerIterator start;
  aScanner.CurrentPosition(start);
  aScanner.SetPosition(--start, PR_FALSE, PR_TRUE);

  PRBool haveCR;

  nsresult result = aScanner.ReadWhitespace(mTextValue, mNewlineCount, haveCR);

  if (result == kEOF && !aScanner.IsIncremental()) {
    // Oops, we ran off the end, make sure we don't lose the trailing
    // whitespace!
    result = NS_OK;
  }

  if (NS_OK == result && haveCR) {
    mTextValue.writable().StripChar(kCR);
  }
  return result;
}

const nsSubstring&
CWhitespaceToken::GetStringValue()
{
  return mTextValue.str();
}

CEntityToken::CEntityToken()
  : CHTMLToken(eHTMLTag_entity)
{
}

CEntityToken::CEntityToken(const nsAString& aName)
  : CHTMLToken(eHTMLTag_entity)
{
  mTextValue.Assign(aName);
}


/*
 *  Consume the rest of the entity. We've already eaten the "&".
 *
 *  @param   aChar -- last char consumed from stream
 *  @param   aScanner -- controller of underlying input source
 *  @return  error result
 */
nsresult
CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  nsresult result = ConsumeEntity(aChar, mTextValue, aScanner);
  return result;
}

PRInt32
CEntityToken::GetTokenType()
{
  return eToken_entity;
}

/*
 * This general purpose method is used when you want to
 * consume an entity &xxxx;. Keep in mind that entities
 * are <i>not</i> reduced inline.
 *
 * @param   aChar -- last char consumed from stream
 * @param   aScanner -- controller of underlying input source
 * @return  error result
 */
nsresult
CEntityToken::ConsumeEntity(PRUnichar aChar,
                            nsString& aString,
                            nsScanner& aScanner)
{
  nsresult result = NS_OK;
  if (kLeftBrace == aChar) {
    // You're consuming a script entity...
    aScanner.GetChar(aChar); // Consume &

    PRInt32 rightBraceCount = 0;
    PRInt32 leftBraceCount  = 0;

    do {
      result = aScanner.GetChar(aChar);

      if (NS_FAILED(result)) {
        return result;
      }

      aString.Append(aChar);
      if (aChar == kRightBrace) {
        ++rightBraceCount;
      } else if (aChar == kLeftBrace) {
        ++leftBraceCount;
      }
    } while (leftBraceCount != rightBraceCount);
  } else {
    PRUnichar theChar = 0;
    if (kHashsign == aChar) {
      result = aScanner.Peek(theChar, 2);

      if (NS_FAILED(result)) {
        if (kEOF == result && !aScanner.IsIncremental()) {
          // If this is the last buffer then we are certainly
          // not dealing with an entity. That's, there are
          // no more characters after &#. Bug 188278.
          return NS_HTMLTOKENS_NOT_AN_ENTITY;
        }
        return result;
      }

      if (nsCRT::IsAsciiDigit(theChar)) {
        aScanner.GetChar(aChar); // Consume &
        aScanner.GetChar(aChar); // Consume #
        aString.Assign(aChar);
        result = aScanner.ReadNumber(aString, 10);
      } else if (theChar == 'x' || theChar == 'X') {
        aScanner.GetChar(aChar);   // Consume &
        aScanner.GetChar(aChar);   // Consume #
        aScanner.GetChar(theChar); // Consume x
        aString.Assign(aChar);
        aString.Append(theChar);
        result = aScanner.ReadNumber(aString, 16);
      } else {
        return NS_HTMLTOKENS_NOT_AN_ENTITY;
      }
    } else {
      result = aScanner.Peek(theChar, 1);

      if (NS_FAILED(result)) {
        return result;
      }

      if (nsCRT::IsAsciiAlpha(theChar) ||
        theChar == '_' ||
        theChar == ':') {
        aScanner.GetChar(aChar); // Consume &
        result = aScanner.ReadEntityIdentifier(aString);
      } else {
        return NS_HTMLTOKENS_NOT_AN_ENTITY;
      }
    }
  }

  if (NS_FAILED(result)) {
    return result;
  }

  result = aScanner.Peek(aChar);

  if (NS_FAILED(result)) {
    return result;
  }

  if (aChar == kSemicolon) {
    // Consume semicolon that stopped the scan
    aString.Append(aChar);
    result = aScanner.GetChar(aChar);
  }

  return result;
}

/**
 * Map some illegal but commonly used numeric entities into their
 * appropriate unicode value.
 */
#define NOT_USED 0xfffd

static const PRUint16 PA_HackTable[] = {
	0x20ac,  /* EURO SIGN */
	NOT_USED,
	0x201a,  /* SINGLE LOW-9 QUOTATION MARK */
	0x0192,  /* LATIN SMALL LETTER F WITH HOOK */
	0x201e,  /* DOUBLE LOW-9 QUOTATION MARK */
	0x2026,  /* HORIZONTAL ELLIPSIS */
	0x2020,  /* DAGGER */
	0x2021,  /* DOUBLE DAGGER */
	0x02c6,  /* MODIFIER LETTER CIRCUMFLEX ACCENT */
	0x2030,  /* PER MILLE SIGN */
	0x0160,  /* LATIN CAPITAL LETTER S WITH CARON */
	0x2039,  /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
	0x0152,  /* LATIN CAPITAL LIGATURE OE */
	NOT_USED,
	0x017D,  /* LATIN CAPITAL LETTER Z WITH CARON */
	NOT_USED,
	NOT_USED,
	0x2018,  /* LEFT SINGLE QUOTATION MARK */
	0x2019,  /* RIGHT SINGLE QUOTATION MARK */
	0x201c,  /* LEFT DOUBLE QUOTATION MARK */
	0x201d,  /* RIGHT DOUBLE QUOTATION MARK */
	0x2022,  /* BULLET */
	0x2013,  /* EN DASH */
	0x2014,  /* EM DASH */
	0x02dc,  /* SMALL TILDE */
	0x2122,  /* TRADE MARK SIGN */
	0x0161,  /* LATIN SMALL LETTER S WITH CARON */
	0x203a,  /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
	0x0153,  /* LATIN SMALL LIGATURE OE */
	NOT_USED,
	0x017E,  /* LATIN SMALL LETTER Z WITH CARON */
	0x0178   /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
};

static void
AppendNCR(nsSubstring& aString, PRInt32 aNCRValue)
{
  /* For some illegal, but popular usage */
  if (aNCRValue >= 0x0080 && aNCRValue <= 0x009f) {
    aNCRValue = PA_HackTable[aNCRValue - 0x0080];
  }

  AppendUCS4ToUTF16(ENSURE_VALID_CHAR(aNCRValue), aString);
}

/*
 * This method converts this entity into its underlying
 * unicode equivalent.
 *
 *  @param   aString will hold the resulting string value
 *  @return  numeric (unichar) value
 */
PRInt32
CEntityToken::TranslateToUnicodeStr(nsString& aString)
{
  PRInt32 value = 0;

  if (mTextValue.Length() > 1) {
    PRUnichar theChar0 = mTextValue.CharAt(0);

    if (kHashsign == theChar0) {
      PRInt32 err = 0;

      value = mTextValue.ToInteger(&err, kAutoDetect);

      if (0 == err) {
        AppendNCR(aString, value);
      }
    } else {
      value = nsHTMLEntities::EntityToUnicode(mTextValue);
      if (-1 < value) {
        // We found a named entity...
        aString.Assign(PRUnichar(value));
      }
    }
  }

  return value;
}


const
nsSubstring& CEntityToken::GetStringValue()
{
  return mTextValue;
}

void
CEntityToken::GetSource(nsString& anOutputString)
{
  anOutputString.AppendLiteral("&");
  anOutputString += mTextValue;
  // Any possible ; is part of our text value.
}

void
CEntityToken::AppendSourceTo(nsAString& anOutputString)
{
  anOutputString.AppendLiteral("&");
  anOutputString += mTextValue;
  // Any possible ; is part of our text value.
}

const PRUnichar*
GetTagName(PRInt32 aTag)
{
  const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);

  if (result) {
    return result;
  }

  if (aTag >= eHTMLTag_userdefined) {
    return sUserdefined;
  }

  return 0;
}


CInstructionToken::CInstructionToken()
  : CHTMLToken(eHTMLTag_instruction)
{
}

CInstructionToken::CInstructionToken(const nsAString& aString)
  : CHTMLToken(eHTMLTag_unknown)
{
  mTextValue.Assign(aString);
}

nsresult
CInstructionToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  mTextValue.AssignLiteral("<?");
  nsresult result = NS_OK;
  PRBool done = PR_FALSE;

  while (NS_OK == result && !done) {
    // Note, this call does *not* consume the >.
    result = aScanner.ReadUntil(mTextValue, kGreaterThan, PR_FALSE);
    if (NS_SUCCEEDED(result)) {
      // In HTML, PIs end with a '>', in XML, they end with a '?>'. Cover both
      // cases here.
      if (!(aFlag & NS_IPARSER_FLAG_XML) ||
          kQuestionMark == mTextValue.Last()) {
        // This really is the end of the PI.
        done = PR_TRUE;
      }
      // Need to append this character no matter what.
      aScanner.GetChar(aChar);
      mTextValue.Append(aChar);
    }
  }

  if (kEOF == result && !aScanner.IsIncremental()) {
    // Hide the EOF result because there is no more text coming.
    mInError = PR_TRUE;
    result = NS_OK;
  }

  return result;
}

PRInt32
CInstructionToken::GetTokenType()
{
  return eToken_instruction;
}

const nsSubstring&
CInstructionToken::GetStringValue()
{
  return mTextValue;
}

// Doctype decl token

CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
  : CHTMLToken(aTag)
{
}

CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString, eHTMLTags aTag)
  : CHTMLToken(aTag), mTextValue(aString)
{
}

/**
 *  This method consumes a doctype element.
 *  Note: I'm rewriting this method to seek to the first <, since quotes can
 *  really screw us up.
 *  XXX Maybe this should do better in XML or strict mode?
 */
nsresult
CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner, PRInt32 aFlag)
{
  static const PRUnichar terminalChars[] =
  { PRUnichar('>'), PRUnichar('<'),
    PRUnichar(0)
  };
  static const nsReadEndCondition theEndCondition(terminalChars);

  nsScannerIterator start, end;

  aScanner.CurrentPosition(start);
  aScanner.EndReading(end);

  nsresult result = aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);

  if (NS_SUCCEEDED(result)) {
    PRUnichar ch;
    aScanner.Peek(ch);
    if (ch == kGreaterThan) {
      // Include '>' but not '<' since '<'
      // could belong to another tag.
      aScanner.GetChar(ch);
      end.advance(1);
    } else {
      NS_ASSERTION(kLessThan == ch,
                   "Make sure this doctype decl. is really in error.");
      mInError = PR_TRUE;
    }
  } else if (!aScanner.IsIncremental()) {
    // We have reached the document end but haven't
    // found either a '<' or a '>'. Therefore use
    // whatever we have.
    mInError = PR_TRUE;
    result = NS_OK;
  }

  if (NS_SUCCEEDED(result)) {
    start.advance(-2); // Make sure to consume <!
    CopyUnicodeTo(start, end, mTextValue);
  }

  return result;
}

PRInt32
CDoctypeDeclToken::GetTokenType()
{
  return eToken_doctypeDecl;
}

const nsSubstring&
CDoctypeDeclToken::GetStringValue()
{
  return mTextValue;
}

void
CDoctypeDeclToken::SetStringValue(const nsAString& aStr)
{
  mTextValue.Assign(aStr);
}