Mozilla/mozilla/parser/htmlparser/src/nsHTMLTokens.cpp

2626 lines
77 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 sw=2 et tw=78: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Blake Kaplan <mrbkap@gmail.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsIAtom.h"
#include "nsHTMLTokens.h"
#include "prtypes.h"
#include "nsDebug.h"
#include "nsHTMLTags.h"
#include "nsHTMLEntities.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsScanner.h"
static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f',
'i', 'n', 'e', 'd', 0};
static const PRUnichar kAttributeTerminalChars[] = {
PRUnichar('&'), PRUnichar('\b'), PRUnichar('\t'),
PRUnichar('\n'), PRUnichar('\r'), PRUnichar(' '),
PRUnichar('>'),
PRUnichar(0)
};
static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue);
/*
* @param aScanner -- controller of underlying input source
* @param aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
* @return error result
*
*/
static
nsresult ConsumeEntity(nsScannerSharedSubstring& aString,
nsScanner& aScanner,
PRInt32 aFlag)
{
nsresult result=NS_OK;
PRUnichar ch;
result=aScanner.Peek(ch, 1);
if (NS_SUCCEEDED(result)) {
PRUnichar amp=0;
PRInt32 theNCRValue=0;
nsAutoString entity;
if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
if (NS_SUCCEEDED(result)) {
theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
PRUnichar theTermChar=entity.Last();
// If an entity value is greater than 255 then:
// Nav 4.x does not treat it as an entity,
// IE treats it as an entity if terminated with a semicolon.
// Resembling IE!!
nsSubstring &writable = aString.writable();
if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
// Looks like we're not dealing with an entity
writable.Append(kAmpersand);
writable.Append(entity);
}
else {
// A valid entity so reduce it.
writable.Append(PRUnichar(theNCRValue));
}
}
}
else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
if (NS_SUCCEEDED(result)) {
nsSubstring &writable = aString.writable();
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
// Looked like an entity but it's not
aScanner.GetChar(amp);
writable.Append(amp);
result = NS_OK; // just being safe..
}
else {
PRInt32 err;
theNCRValue=entity.ToInteger(&err,kAutoDetect);
AppendNCR(writable, theNCRValue);
}
}
}
else {
// What we thought as entity is not really an entity...
aScanner.GetChar(amp);
aString.writable().Append(amp);
}//if
}
return result;
}
/*
* This general purpose method is used when you want to
* consume attributed text value.
* Note: It also reduces entities.
*
* @param aNewlineCount -- the newline count to increment when hitting newlines
* @param aScanner -- controller of underlying input source
* @param aTerminalChars -- characters that stop consuming attribute.
* @param aAllowNewlines -- whether to allow newlines in the value.
* XXX it would be nice to roll this info into
* aTerminalChars somehow....
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
* @return error result
*/
static
nsresult ConsumeUntil(nsScannerSharedSubstring& aString,
PRInt32& aNewlineCount,
nsScanner& aScanner,
const nsReadEndCondition& aEndCondition,
PRBool aAllowNewlines,
PRInt32 aFlag)
{
nsresult result = NS_OK;
PRBool done = PR_FALSE;
do {
result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE);
if(NS_SUCCEEDED(result)) {
PRUnichar ch;
aScanner.Peek(ch);
if(ch == kAmpersand) {
result = ConsumeEntity(aString,aScanner,aFlag);
}
else if(ch == kCR && aAllowNewlines) {
aScanner.GetChar(ch);
result = aScanner.Peek(ch);
if (NS_SUCCEEDED(result)) {
nsSubstring &writable = aString.writable();
if(ch == kNewLine) {
writable.AppendLiteral("\r\n");
aScanner.GetChar(ch);
}
else {
writable.Append(PRUnichar('\r'));
}
++aNewlineCount;
}
}
else if(ch == kNewLine && aAllowNewlines) {
aScanner.GetChar(ch);
aString.writable().Append(PRUnichar('\n'));
++aNewlineCount;
}
else {
done = PR_TRUE;
}
}
} while (NS_SUCCEEDED(result) && !done);
return result;
}
/**************************************************************
And now for the token classes...
**************************************************************/
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
}
CHTMLToken::~CHTMLToken() {
}
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
mEmpty=PR_FALSE;
mContainerInfo=eFormUnknown;
#ifdef DEBUG
mAttributed = PR_FALSE;
#endif
}
CStartToken::CStartToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
mEmpty=PR_FALSE;
mContainerInfo=eFormUnknown;
mTextValue.Assign(aName);
#ifdef DEBUG
mAttributed = PR_FALSE;
#endif
}
CStartToken::CStartToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
mEmpty=PR_FALSE;
mContainerInfo=eFormUnknown;
mTextValue.Assign(aName);
#ifdef DEBUG
mAttributed = PR_FALSE;
#endif
}
/*
* This method returns the typeid (the tag type) for this token.
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CStartToken::GetTypeID(){
if(eHTMLTag_unknown==mTypeID) {
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
return mTypeID;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CStartToken::GetTokenType(void) {
return eToken_start;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
void CStartToken::SetEmpty(PRBool aValue) {
mEmpty=aValue;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRBool CStartToken::IsEmpty(void) {
return mEmpty;
}
/*
* Consume the identifier portion of the start tag
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
* @return error result
*/
nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
//if you're here, we've already Consumed the < char, and are
//ready to Consume the rest of the open tag identifier.
//Stop consuming as soon as you see a space or a '>'.
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
nsresult result=NS_OK;
nsScannerSharedSubstring tagIdent;
if (aFlag & NS_IPARSER_FLAG_HTML) {
result = aScanner.ReadTagIdentifier(tagIdent);
mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
// Save the original tag string if this is user-defined or if we
// are viewing source
if(eHTMLTag_userdefined==mTypeID || (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue = tagIdent.str();
}
}
else {
result = aScanner.ReadTagIdentifier(tagIdent);
mTextValue = tagIdent.str();
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
result = aScanner.SkipWhitespace(mNewlineCount);
}
if (kEOF == result && !aScanner.IsIncremental()) {
// Take what we can get.
result = NS_OK;
}
return result;
}
const nsSubstring& CStartToken::GetStringValue()
{
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
if(!mTextValue.Length()) {
mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
}
}
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param anOutputString will recieve the result
* @return nada
*/
void CStartToken::GetSource(nsString& anOutputString){
anOutputString.Truncate();
AppendSourceTo(anOutputString);
}
/*
*
*
* @update harishd 03/23/00
* @param result appended to the output string.
* @return nada
*/
void CStartToken::AppendSourceTo(nsAString& anOutputString){
anOutputString.Append(PRUnichar('<'));
/*
* Watch out for Bug 15204
*/
if(!mTextValue.IsEmpty())
anOutputString.Append(mTextValue);
else
anOutputString.Append(GetTagName(mTypeID));
anOutputString.Append(PRUnichar('>'));
}
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
CEndToken::CEndToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aName);
}
CEndToken::CEndToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
mTextValue.Assign(aName);
}
/*
* Consume the identifier portion of the end tag
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
* @return error result
*/
nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
{
nsresult result = NS_OK;
nsScannerSharedSubstring tagIdent;
if (aFlag & NS_IPARSER_FLAG_HTML) {
result = aScanner.ReadTagIdentifier(tagIdent);
mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str());
// Save the original tag string if this is user-defined or if we
// are viewing source
if(eHTMLTag_userdefined==mTypeID ||
(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue = tagIdent.str();
}
}
else {
result = aScanner.ReadTagIdentifier(tagIdent);
mTextValue = tagIdent.str();
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
result = aScanner.SkipWhitespace(mNewlineCount);
}
if (kEOF == result && !aScanner.IsIncremental()) {
// Take what we can get.
result = NS_OK;
}
return result;
}
/*
* Asks the token to determine the <i>HTMLTag type</i> of
* the token. This turns around and looks up the tag name
* in the tag dictionary.
*
* @update gess 3/25/98
* @param
* @return eHTMLTag id of this endtag
*/
PRInt32 CEndToken::GetTypeID(){
if(eHTMLTag_unknown==mTypeID) {
mTypeID = nsHTMLTags::LookupTag(mTextValue);
switch(mTypeID) {
case eHTMLTag_dir:
case eHTMLTag_menu:
mTypeID=eHTMLTag_ul;
break;
default:
break;
}
}
return mTypeID;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CEndToken::GetTokenType(void) {
return eToken_end;
}
const nsSubstring& CEndToken::GetStringValue()
{
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
if(!mTextValue.Length()) {
mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
}
}
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param anOutputString will recieve the result
* @return nada
*/
void CEndToken::GetSource(nsString& anOutputString){
anOutputString.Truncate();
AppendSourceTo(anOutputString);
}
/*
*
*
* @update harishd 03/23/00
* @param result appended to the output string.
* @return nada
*/
void CEndToken::AppendSourceTo(nsAString& anOutputString){
anOutputString.AppendLiteral("</");
if(!mTextValue.IsEmpty())
anOutputString.Append(mTextValue);
else
anOutputString.Append(GetTagName(mTypeID));
anOutputString.Append(PRUnichar('>'));
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CTextToken::CTextToken(const nsAString& aName) : CHTMLToken(eHTMLTag_text) {
mTextValue.Rebind(aName);
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CTextToken::GetTokenType(void) {
return eToken_text;
}
PRInt32 CTextToken::GetTextLength(void) {
return mTextValue.Length();
}
/*
* Consume as much clear text from scanner as possible.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
static const PRUnichar theTerminalsChars[] =
{ PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'),
PRUnichar(0) };
static const nsReadEndCondition theEndCondition(theTerminalsChars);
nsresult result=NS_OK;
PRBool done=PR_FALSE;
nsScannerIterator origin, start, end;
// Start scanning after the first character, because we know it to
// be part of this text token (we wouldn't have come here if it weren't)
aScanner.CurrentPosition(origin);
start = origin;
aScanner.EndReading(end);
NS_ASSERTION(start != end, "Calling CTextToken::Consume when already at the "
"end of a document is a bad idea.");
aScanner.SetPosition(++start);
while((NS_OK==result) && (!done)) {
result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) {
result=aScanner.GetChar(aChar); //strip off the char
PRUnichar theNextChar;
result=aScanner.Peek(theNextChar); //then see what's next.
switch(aChar) {
case kCR:
// result=aScanner.GetChar(aChar);
if(kLF==theNextChar) {
// If the "\r" is followed by a "\n", don't replace it and
// let it be ignored by the layout system
end.advance(2);
result=aScanner.GetChar(theNextChar);
}
else {
// If it standalone, replace the "\r" with a "\n" so that
// it will be considered by the layout system
aScanner.ReplaceCharacter(end, kLF);
++end;
}
++mNewlineCount;
break;
case kLF:
++end;
++mNewlineCount;
break;
} //switch
}
else done=PR_TRUE;
}
}
aScanner.BindSubstring(mTextValue, origin, end);
return result;
}
/*
* Consume as much clear text from scanner as possible.
* The scanner is left on the < of the perceived end tag.
*
* @param aChar -- last char consumed from stream
* @param aConservativeConsume -- controls our handling of content with no
* terminating string.
* @param aIgnoreComments -- whether or not we should take comments into
* account in looking for the end tag.
* @param aScanner -- controller of underlying input source
* @param aEndTagname -- the terminal tag name.
* @param aFlag -- dtd modes and such.
* @param aFlushTokens -- PR_TRUE if we found the terminal tag.
* @return error result
*/
nsresult CTextToken::ConsumeCharacterData(PRBool aConservativeConsume,
PRBool aIgnoreComments,
nsScanner& aScanner,
const nsAString& aEndTagName,
PRInt32 aFlag,
PRBool& aFlushTokens) {
nsresult result=NS_OK;
nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
PRBool done=PR_FALSE;
PRBool theLastIteration=PR_FALSE;
aScanner.CurrentPosition(theStartOffset);
theCurrOffset = theStartOffset;
aScanner.EndReading(endPos);
theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;
// ALGORITHM: *** The performance is based on correctness of the document ***
// 1. Look for a '<' character. This could be
// a) Start of a comment (<!--), b) Start of the terminal string, or c) a start of a tag.
// We are interested in a) and b). c) is ignored because in CDATA we don't care for tags.
// NOTE: Technically speaking in CDATA we should ignore the comments too!! But for compatibility
// we don't.
// 2. Having the offset, for '<', search for the terminal string from there on and record its offset.
// 3. From the same '<' offset also search for start of a comment '<!--'. If found search for
// end comment '-->' between the terminal string and '<!--'. If you did not find the end
// comment, then we have a malformed document, i.e., this section has a prematured terminal string
// Ex. <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But record terminal string's
// offset if this is the first premature terminal string, and update the current offset to the terminal
// string (prematured) offset and goto step 1.
// 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
// 5. If the end of the document is reached and if we still don't have the condition in step 4. then
// assume that the prematured terminal string is the actual terminal string and goto step 1. This
// will be our last iteration. If there is no premature terminal string and we're being
// conservative in our consumption (aConservativeConsume), then don't consume anything
// from the scanner. Otherwise, we consume all the way until the end (for <xmp>).
NS_NAMED_LITERAL_STRING(ltslash, "</");
const nsString theTerminalString = ltslash + aEndTagName;
PRUint32 termStrLen=theTerminalString.Length();
while((result == NS_OK) && !done) {
PRBool found = PR_FALSE;
nsScannerIterator gtOffset,ltOffset = theCurrOffset;
while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
((PRUint32)ltOffset.size_forward() >= termStrLen ||
Distance(ltOffset, endPos) >= termStrLen)) {
// Make a copy of the (presumed) end tag and
// do a case-insensitive comparison
nsScannerIterator start(ltOffset), end(ltOffset);
end.advance(termStrLen);
if (CaseInsensitiveFindInReadable(theTerminalString,start,end) &&
(end == endPos || (*end == '>' || *end == ' ' ||
*end == '\t' || *end == '\n' ||
*end == '\r' || *end == '\b'))) {
gtOffset = end;
// Note that aIgnoreComments is only not set for <script>. We don't
// want to execute scripts that aren't in the form of: <script\s.*>
if ((end == endPos && aIgnoreComments) ||
FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
found = PR_TRUE;
theTermStrPos = start;
}
break;
}
ltOffset.advance(1);
}
if (found && theTermStrPos != endPos) {
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
!theLastIteration && !aIgnoreComments) {
nsScannerIterator endComment(ltOffset);
endComment.advance(5);
if ((theStartCommentPos == endPos) &&
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
theStartCommentPos = theCurrOffset;
}
if (theStartCommentPos != endPos) {
// Search for --> between <!-- and </TERMINALSTRING>.
theCurrOffset = theStartCommentPos;
nsScannerIterator terminal(theTermStrPos);
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
theCurrOffset, terminal)) {
// If you're here it means that we have a bogus terminal string.
// Even though it is bogus, the position of the terminal string
// could be helpful in case we hit the rock bottom.
if (theAltTermStrPos == endPos) {
// But we only want to remember the first bogus terminal string.
theAltTermStrPos = theTermStrPos;
}
// We did not find '-->' so keep searching for terminal string.
theCurrOffset = theTermStrPos;
theCurrOffset.advance(termStrLen);
continue;
}
}
}
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
aScanner.SetPosition(ltOffset);
// We found </SCRIPT> or </STYLE>...permit flushing -> Ref: Bug 22485
aFlushTokens=PR_TRUE;
done = PR_TRUE;
}
else {
// We end up here if:
// a) when the buffer runs out ot data.
// b) when the terminal string is not found.
if(!aScanner.IsIncremental()) {
if(theAltTermStrPos != endPos && aConservativeConsume) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
theCurrOffset = theAltTermStrPos;
theLastIteration = PR_TRUE;
}
else if (!aConservativeConsume) {
done = PR_TRUE; // Do this to fix Bug. 35456
result = kFakeEndTag;
aScanner.BindSubstring(mTextValue, theStartOffset, endPos);
aScanner.SetPosition(endPos);
}
else {
done = PR_TRUE;
result = kFakeEndTag;
// We need to bind our value to a non-empty string.
aScanner.BindSubstring(mTextValue, theStartOffset, theStartOffset);
}
}
else {
result=kEOF;
}
}
}
return result;
}
/*
* Consume as much clear text from scanner as possible. Reducing entities.
* The scanner is left on the < of the perceived end tag.
*
* @param aChar -- last char consumed from stream
* @param aConservativeConsume -- controls our handling of content with no
* terminating string.
* @param aScanner -- controller of underlying input source
* @param aEndTagname -- the terminal tag name.
* @param aFlag -- dtd modes and such.
* @param aFlushTokens -- PR_TRUE if we found the terminal tag.
* @return error result
*/
nsresult CTextToken::ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
PRBool aConservativeConsume,
nsScanner& aScanner,
const nsAString& aEndTagName,
PRInt32 aFlag,
PRBool& aFound)
{
// This function is fairly straightforward except if there is no terminating
// string. If there is, we simply loop through all of the entities, reducing
// them as necessary and skipping over non-terminal strings starting with <.
// If there is *no* terminal string, then we examine aConservativeConsume.
// If we want to be conservative, we backtrack to the first place in the
// document that looked like the end of PCDATA (i.e., the first tag). This
// is for compatibility and so we don't regress bug 42945. If we are not
// conservative, then we consume everything, all the way up to the end of
// the document.
static const PRUnichar terminalChars[] = {
PRUnichar('&'), PRUnichar('<'),
PRUnichar(0)
};
static const nsReadEndCondition theEndCondition(terminalChars);
nsScannerIterator currPos, endPos, altEndPos;
PRUint32 truncPos = 0;
aScanner.CurrentPosition(currPos);
aScanner.EndReading(endPos);
altEndPos = endPos;
nsScannerSharedSubstring theContent;
PRUnichar ch = 0;
NS_NAMED_LITERAL_STRING(commentStart, "<!--");
NS_NAMED_LITERAL_STRING(ltslash, "</");
const nsString theTerminalString = ltslash + aEndTagName;
PRUint32 termStrLen = theTerminalString.Length();
PRUint32 commentStartLen = commentStart.Length();
nsresult result = NS_OK;
// Note that if we're already at the end of the document, the ConsumeUntil
// will fail, and we'll do the right thing.
do {
result = ConsumeUntil(theContent, mNewlineCount, aScanner,
theEndCondition, PR_TRUE, aFlag);
if (aDiscardFirstNewline &&
(NS_SUCCEEDED(result) || !aScanner.IsIncremental()) &&
!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
// Check if the very first character is a newline, and if so discard it.
// Note that we don't want to discard it in view source!
// Also note that this has to happen here (as opposed to before the
// ConsumeUntil) because we have to expand any entities.
// XXX It would be nice to be able to do this without calling
// writable()!
const nsSubstring &firstChunk = theContent.str();
if (!firstChunk.IsEmpty()) {
PRUint32 where = 0;
PRUnichar newline = firstChunk.First();
if (newline == kCR || newline == kNewLine) {
++where;
if (firstChunk.Length() > 1) {
if (newline == kCR && firstChunk.CharAt(1) == kNewLine) {
// Handle \r\n = 1 newline.
++where;
}
// Note: \n\r = 2 newlines.
}
}
if (where != 0) {
theContent.writable() = Substring(firstChunk, where);
}
}
}
aDiscardFirstNewline = PR_FALSE;
if (NS_FAILED(result)) {
if (kEOF == result && !aScanner.IsIncremental()) {
aFound = PR_TRUE; // this is as good as it gets.
result = kFakeEndTag;
if (aConservativeConsume && altEndPos != endPos) {
// We ran out of room looking for a </title>. Go back to the first
// place that looked like a tag and use that as our stopping point.
theContent.writable().Truncate(truncPos);
aScanner.SetPosition(altEndPos, PR_FALSE, PR_TRUE);
}
// else we take everything we consumed.
mTextValue.Rebind(theContent.str());
}
else {
aFound = PR_FALSE;
}
return result;
}
aScanner.CurrentPosition(currPos);
aScanner.GetChar(ch); // this character must be '&' or '<'
if (ch == kLessThan && altEndPos == endPos) {
// Keep this position in case we need it for later.
altEndPos = currPos;
truncPos = theContent.str().Length();
}
if (Distance(currPos, endPos) >= termStrLen) {
nsScannerIterator start(currPos), end(currPos);
end.advance(termStrLen);
if (CaseInsensitiveFindInReadable(theTerminalString,start,end)) {
if (end != endPos && (*end == '>' || *end == ' ' ||
*end == '\t' || *end == '\n' ||
*end == '\r' || *end == '\b')) {
aFound = PR_TRUE;
mTextValue.Rebind(theContent.str());
// Note: This SetPosition() is actually going backwards from the
// scanner's mCurrentPosition (so we pass aReverse == PR_TRUE). This
// is because we call GetChar() above after we get the current
// position.
aScanner.SetPosition(currPos, PR_FALSE, PR_TRUE);
break;
}
}
}
// IE only consumes <!-- --> as comments in PCDATA.
if (Distance(currPos, endPos) >= commentStartLen) {
nsScannerIterator start(currPos), end(currPos);
end.advance(commentStartLen);
if (CaseInsensitiveFindInReadable(commentStart,start,end)) {
CCommentToken consumer; // stack allocated.
// CCommentToken expects us to be on the '-'
aScanner.SetPosition(currPos.advance(2));
// In quirks mode we consume too many things as comments, so pretend
// that we're not by modifying aFlag.
result = consumer.Consume(*currPos, aScanner,
(aFlag & ~NS_IPARSER_FLAG_QUIRKS_MODE) | NS_IPARSER_FLAG_STRICT_MODE);
if (kEOF == result) {
return kEOF; // this can only happen if we're really out of space.
}
else if (kNotAComment == result) {
// Fall through and consume this as text.
aScanner.CurrentPosition(currPos);
aScanner.SetPosition(currPos.advance(1));
}
else {
consumer.AppendSourceTo(theContent.writable());
mNewlineCount += consumer.GetNewlineCount();
continue;
}
}
}
result = kEOF;
// We did not find the terminal string yet so
// include the character that stopped consumption.
theContent.writable().Append(ch);
} while (currPos != endPos);
return result;
}
void CTextToken::CopyTo(nsAString& aStr)
{
nsScannerIterator start, end;
mTextValue.BeginReading(start);
mTextValue.EndReading(end);
CopyUnicodeTo(start, end, aStr);
}
const nsSubstring& CTextToken::GetStringValue(void)
{
return mTextValue.AsString();
}
void CTextToken::Bind(nsScanner* aScanner, nsScannerIterator& aStart, nsScannerIterator& aEnd)
{
aScanner->BindSubstring(mTextValue, aStart, aEnd);
}
void CTextToken::Bind(const nsAString& aStr)
{
mTextValue.Rebind(aStr);
}
/*
* default constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
/*
* string based constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aName);
}
/*
*
* @update vidur 11/12/98
* @param
* @return
*/
PRInt32 CCDATASectionToken::GetTokenType(void) {
return eToken_cdatasection;
}
/*
* Consume as much marked test from scanner as possible.
*
* @update rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>".
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
static const PRUnichar theTerminalsChars[] =
{ PRUnichar('\r'), PRUnichar('\n'), PRUnichar(']'), PRUnichar(0) };
static const nsReadEndCondition theEndCondition(theTerminalsChars);
nsresult result=NS_OK;
PRBool done=PR_FALSE;
while((NS_OK==result) && (!done)) {
result=aScanner.ReadUntil(mTextValue,theEndCondition,PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if((kCR==aChar) && (NS_OK==result)) {
result=aScanner.GetChar(aChar); //strip off the \r
result=aScanner.Peek(aChar); //then see what's next.
if(NS_OK==result) {
switch(aChar) {
case kCR:
result=aScanner.GetChar(aChar); //strip off the \r
mTextValue.AppendLiteral("\n\n");
mNewlineCount += 2;
break;
case kNewLine:
//which means we saw \r\n, which becomes \n
result=aScanner.GetChar(aChar); //strip off the \n
//now fall through on purpose...
default:
mTextValue.AppendLiteral("\n");
mNewlineCount++;
break;
} //switch
} //if
}
else if (kNewLine == aChar) {
result=aScanner.GetChar(aChar);
mTextValue.Append(aChar);
++mNewlineCount;
}
else if (kRightSquareBracket == aChar) {
PRBool canClose = PR_FALSE;
result=aScanner.GetChar(aChar); //strip off the ]
mTextValue.Append(aChar);
result=aScanner.Peek(aChar); //then see what's next.
if((NS_OK==result) && (kRightSquareBracket==aChar)) {
result=aScanner.GetChar(aChar); //strip off the second ]
mTextValue.Append(aChar);
canClose = PR_TRUE;
}
// The goal here is to not lose data from the page when encountering
// markup like: <![endif]-->. This means that in normal parsing, we
// allow ']' to end the marked section and just drop everything between
// it an the '>'. In view-source mode, we cannot drop things on the
// floor like that. In fact, to make view-source of XML with script in
// CDATA sections at all bearable, we need to somewhat enforce the ']]>'
// terminator for marked sections. So make the tokenization somewhat
// different when in view-source _and_ dealing with a CDATA section.
PRBool inCDATA = (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) &&
StringBeginsWith(mTextValue, NS_LITERAL_STRING("[CDATA["));
if (inCDATA) {
result = aScanner.Peek(aChar);
} else {
nsAutoString dummy; // skip any bad data
result=aScanner.ReadUntil(dummy,kGreaterThan,PR_FALSE);
}
if (NS_OK==result &&
(!inCDATA || (canClose && kGreaterThan == aChar))) {
result=aScanner.GetChar(aChar); //strip off the >
done=PR_TRUE;
}
}
else done=PR_TRUE;
}
}
if (kEOF == result && !aScanner.IsIncremental()) {
// We ran out of space looking for the end of this CDATA section.
// In order to not completely lose the entire section, treat everything
// until the end of the document as part of the CDATA section and let
// the DTD handle it.
mInError = PR_TRUE;
result = NS_OK;
}
return result;
}
const nsSubstring& CCDATASectionToken::GetStringValue(void)
{
return mTextValue;
}
/*
* default constructor
*
* @param aName -- string to init token name with
* @return
*/
CMarkupDeclToken::CMarkupDeclToken() : CHTMLToken(eHTMLTag_markupDecl) {
}
/*
* string based constructor
*
* @param aName -- string to init token name with
* @return
*/
CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName) : CHTMLToken(eHTMLTag_markupDecl) {
mTextValue.Rebind(aName);
}
/*
*
* @param
* @return
*/
PRInt32 CMarkupDeclToken::GetTokenType(void) {
return eToken_markupDecl;
}
/*
* Consume as much declaration from scanner as possible.
* Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or
* NOTATION, which can span multiple lines and ends in >.
*
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
static const PRUnichar theTerminalsChars[] =
{ PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'),
PRUnichar('>'),
PRUnichar(0) };
static const nsReadEndCondition theEndCondition(theTerminalsChars);
nsresult result=NS_OK;
PRBool done=PR_FALSE;
PRUnichar quote=0;
nsScannerIterator origin, start, end;
aScanner.CurrentPosition(origin);
start = origin;
while((NS_OK==result) && (!done)) {
aScanner.SetPosition(start);
result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(NS_OK==result) {
PRUnichar theNextChar=0;
if ((kCR==aChar) || (kNewLine==aChar)) {
result=aScanner.GetChar(aChar); //strip off the char
result=aScanner.Peek(theNextChar); //then see what's next.
}
switch(aChar) {
case kCR:
// result=aScanner.GetChar(aChar);
if(kLF==theNextChar) {
// If the "\r" is followed by a "\n", don't replace it and
// let it be ignored by the layout system
end.advance(2);
result=aScanner.GetChar(theNextChar);
}
else {
// If it standalone, replace the "\r" with a "\n" so that
// it will be considered by the layout system
aScanner.ReplaceCharacter(end, kLF);
++end;
}
++mNewlineCount;
break;
case kLF:
++end;
++mNewlineCount;
break;
case '\'':
case '"':
++end;
if (quote) {
if (quote == aChar) {
quote = 0;
}
} else {
quote = aChar;
}
break;
case kGreaterThan:
if (quote) {
++end;
} else {
start = end;
++start; // Note that start is wrong after this, we just avoid temp var
aScanner.SetPosition(start); // Skip the >
done=PR_TRUE;
}
break;
default:
NS_ABORT_IF_FALSE(0,"should not happen, switch is missing cases?");
break;
} //switch
start = end;
}
else done=PR_TRUE;
} // if read until !ok
} // while
aScanner.BindSubstring(mTextValue, origin, end);
if (kEOF == result) {
mInError = PR_TRUE;
if (!aScanner.IsIncremental()) {
// Hide this EOF.
result = NS_OK;
}
}
return result;
}
const nsSubstring& CMarkupDeclToken::GetStringValue(void)
{
return mTextValue.AsString();
}
/*
* Default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
}
/*
* Copy constructor
*
* @update gess 3/25/98
* @param
* @return
*/
CCommentToken::CCommentToken(const nsAString& aName) : CHTMLToken(eHTMLTag_comment) {
mComment.Rebind(aName);
}
void CCommentToken::AppendSourceTo(nsAString& anOutputString){
AppendUnicodeTo(mCommentDecl, anOutputString);
}
static PRBool IsCommentEnd(
const nsScannerIterator& aCurrent,
const nsScannerIterator& aEnd,
nsScannerIterator& aGt)
{
nsScannerIterator current = aCurrent;
PRInt32 dashes = 0;
while ((current != aEnd) && (dashes != 2)) {
if (*current == kGreaterThan) {
aGt = current;
return PR_TRUE;
}
if (*current == PRUnichar('-')) {
++dashes;
} else {
dashes = 0;
}
++current;
}
return PR_FALSE;
}
nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
{
// <!--[... -- ... -- ...]*-->
/*********************************************************
NOTE: This algorithm does a fine job of handling comments
when they're formatted per spec, but if they're not
we don't handle them well.
*********************************************************/
nsScannerIterator end, current, gt, lt;
aScanner.EndReading(end);
aScanner.CurrentPosition(current);
nsScannerIterator beginData = end;
lt = current;
lt.advance(-2); // <!
// Regular comment must start with <!--
if (current != end && *current == kMinus &&
++current != end && *current == kMinus &&
++current != end) {
nsScannerIterator currentEnd = end;
PRBool balancedComment = PR_FALSE;
static NS_NAMED_LITERAL_STRING(dashes,"--");
beginData = current;
while (FindInReadable(dashes, current, currentEnd)) {
current.advance(2);
balancedComment = !balancedComment; // We need to match '--' with '--'
if (balancedComment && IsCommentEnd(current, end, gt)) {
// done
current.advance(-2);
// Note: it's ok if beginData == current, (we'll copy an empty string)
// and we need to bind mComment anyway.
aScanner.BindSubstring(mComment, beginData, current);
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
aScanner.SetPosition(gt);
return NS_OK;
} else {
// Continue after the last '--'
currentEnd = end;
}
}
}
// If beginData == end, we did not find opening '--'
if (beginData == end) {
// This might have been empty comment: <!>
// Or it could have been something completely bogus like: <!This is foobar>
// Handle both cases below
aScanner.CurrentPosition(current);
beginData = current;
if (FindCharInReadable('>', current, end)) {
aScanner.BindSubstring(mComment, beginData, current);
aScanner.BindSubstring(mCommentDecl, lt, ++current);
aScanner.SetPosition(current);
return NS_OK;
}
}
if (aScanner.IsIncremental()) {
// We got here because we saw the beginning of a comment,
// but not yet the end, and we are still loading the page. In that
// case the return value here will cause us to unwind,
// wait for more content, and try again.
// XXX For performance reasons we should cache where we were, and
// continue from there for next call
return kEOF;
}
// There was no terminating string, parse this comment as text.
aScanner.SetPosition(lt, PR_FALSE, PR_TRUE);
return kNotAComment;
}
nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner)
{
// <![-[-]] ... [[-]-|--!]>
/*********************************************************
NOTE: This algorithm does a fine job of handling comments
commonly used, but it doesn't really consume them
per spec (But then, neither does IE or Nav).
*********************************************************/
nsScannerIterator end, current;
aScanner.EndReading(end);
aScanner.CurrentPosition(current);
nsScannerIterator beginData = current,
beginLastMinus = end,
bestAltCommentEnd = end,
lt = current;
lt.advance(-2); // <!
// When we get here, we have always already consumed <!
// Skip over possible leading minuses
if (current != end && *current == kMinus) {
beginLastMinus = current;
++current;
++beginData;
if (current != end && *current == kMinus) { // <!--
beginLastMinus = current;
++current;
++beginData;
// Long form comment
nsScannerIterator currentEnd = end, gt = end;
// Find the end of the comment
while (FindCharInReadable(kGreaterThan, current, currentEnd)) {
gt = current;
if (bestAltCommentEnd == end) {
bestAltCommentEnd = gt;
}
--current;
PRBool goodComment = PR_FALSE;
if (current != beginLastMinus && *current == kMinus) { // ->
--current;
if (current != beginLastMinus && *current == kMinus) { // -->
goodComment = PR_TRUE;
--current;
}
} else if (current != beginLastMinus && *current == '!') {
--current;
if (current != beginLastMinus && *current == kMinus) {
--current;
if (current != beginLastMinus && *current == kMinus) { // --!>
--current;
goodComment = PR_TRUE;
}
}
} else if (current == beginLastMinus) {
goodComment = PR_TRUE;
}
if (goodComment) {
// done
aScanner.BindSubstring(mComment, beginData, ++current);
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
aScanner.SetPosition(gt);
return NS_OK;
} else {
// try again starting after the last '>'
current = ++gt;
currentEnd = end;
}
} //while
if (aScanner.IsIncremental()) {
// We got here because we saw the beginning of a comment,
// but not yet the end, and we are still loading the page. In that
// case the return value here will cause us to unwind,
// wait for more content, and try again.
// XXX For performance reasons we should cache where we were, and
// continue from there for next call
return kEOF;
}
// If you're here, then we're in a special state.
// The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->".
// In this case, the first thing we try is to see if we found an alternate endcomment delimiter ">".
// If so, rewind just pass that, and use everything up to that point as your comment.
// If not, the document has no end comment and should be treated as one big comment.
gt = bestAltCommentEnd;
aScanner.BindSubstring(mComment, beginData, gt);
if (gt != end) {
++gt;
}
aScanner.BindSubstring(mCommentDecl, lt, gt);
aScanner.SetPosition(gt);
return NS_OK;
}
}
// This could be short form of comment
// Find the end of the comment
current = beginData;
if (FindCharInReadable(kGreaterThan, current, end)) {
nsScannerIterator gt = current;
if (current != beginData) {
--current;
if (current != beginData && *current == kMinus) { // ->
--current;
if (current != beginData && *current == kMinus) { // -->
--current;
}
} else if (current != beginData && *current == '!') { // !>
--current;
if (current != beginData && *current == kMinus) { // -!>
--current;
if (current != beginData && *current == kMinus) { // --!>
--current;
}
}
}
}
if (current != gt) {
aScanner.BindSubstring(mComment, beginData, ++current);
}
else {
// Bind mComment to an empty string (note that if current == gt,
// then current == beginData). We reach this for <!>
aScanner.BindSubstring(mComment, beginData, current);
}
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
aScanner.SetPosition(gt);
return NS_OK;
}
if (!aScanner.IsIncremental()) {
// This isn't a comment at all, go back to the < and consume as text.
aScanner.SetPosition(lt, PR_FALSE, PR_TRUE);
return kNotAComment;
}
// Wait for more data...
return kEOF;
}
/*
* Consume the identifier portion of the comment.
* Note that we've already eaten the "<!" portion.
*
* @update gess 16June2000
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
nsresult result=PR_TRUE;
if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) {
//Enabling strict comment parsing for Bug 53011 and 2749 contradicts!!!!
result = ConsumeStrictComment(aScanner);
}
else {
result = ConsumeQuirksComment(aScanner);
}
if (NS_SUCCEEDED(result)) {
mNewlineCount = !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) ? mCommentDecl.CountChar(kNewLine) : -1;
}
return result;
}
const nsSubstring& CCommentToken::GetStringValue(void)
{
return mComment.AsString();
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CCommentToken::GetTokenType(void) {
return eToken_comment;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CNewlineToken::GetTokenType(void) {
return eToken_newline;
}
static nsScannerSubstring* gNewlineStr;
void CNewlineToken::AllocNewline()
{
gNewlineStr = new nsScannerSubstring(NS_LITERAL_STRING("\n"));
}
void CNewlineToken::FreeNewline()
{
if (gNewlineStr) {
delete gNewlineStr;
gNewlineStr = nsnull;
}
}
/**
* This method retrieves the value of this internal string.
*
* @update gess 3/25/98
* @return nsString reference to internal string value
*/
const nsSubstring& CNewlineToken::GetStringValue(void) {
return gNewlineStr->AsString();
}
/*
* Consume as many cr/lf pairs as you can find.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
/*******************************************************************
Here's what the HTML spec says about newlines:
"A line break is defined to be a carriage return (&#x000D;),
a line feed (&#x000A;), or a carriage return/line feed pair.
All line breaks constitute white space."
*******************************************************************/
PRUnichar theChar;
nsresult result=aScanner.Peek(theChar);
if(NS_OK==result) {
switch(aChar) {
case kNewLine:
if(kCR==theChar) {
result=aScanner.GetChar(theChar);
}
break;
case kCR:
//convert CRLF into just CR
if(kNewLine==theChar) {
result=aScanner.GetChar(theChar);
}
break;
default:
break;
}
}
if (result == kEOF && !aScanner.IsIncremental()) {
// Make sure we don't lose information about this trailing newline.
result = NS_OK;
}
mNewlineCount = 1;
return result;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
mHasEqualWithoutValue=PR_FALSE;
#ifdef DEBUG
mLastAttribute = PR_FALSE;
#endif
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CAttributeToken::CAttributeToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.writable().Assign(aName);
mHasEqualWithoutValue=PR_FALSE;
#ifdef DEBUG
mLastAttribute = PR_FALSE;
#endif
}
/*
* construct initializing data to
* key value pair
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.writable().Assign(aName);
mTextKey.Rebind(aKey);
mHasEqualWithoutValue=PR_FALSE;
#ifdef DEBUG
mLastAttribute = PR_FALSE;
#endif
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CAttributeToken::GetTokenType(void) {
return eToken_attribute;
}
/*
* Removes non-alpha-non-digit characters from the end of a KEY
*
* @update harishd 07/15/99
* @param
* @return
*/
void CAttributeToken::SanitizeKey() {
PRInt32 length=mTextKey.Length();
if(length > 0) {
nsScannerIterator iter, begin, end;
mTextKey.BeginReading(begin);
mTextKey.EndReading(end);
iter = end;
// Look for the first legal character starting from
// the end of the string
do {
--iter;
} while (!nsCRT::IsAsciiAlpha(*iter) &&
!nsCRT::IsAsciiDigit(*iter) &&
(iter != begin));
// If there were any illegal characters, just copy out the
// legal part
if (iter != --end) {
nsAutoString buf;
CopyUnicodeTo(begin, ++iter, buf);
mTextKey.Rebind(buf);
}
}
return;
}
const nsSubstring& CAttributeToken::GetStringValue(void)
{
return mTextValue.str();
}
/*
*
*
* @update rickg 6June2000
* @param anOutputString will recieve the result
* @return nada
*/
void CAttributeToken::GetSource(nsString& anOutputString){
anOutputString.Truncate();
AppendSourceTo(anOutputString);
}
/*
*
*
* @update rickg 6June2000
* @param result appended to the output string.
* @return nada
*/
void CAttributeToken::AppendSourceTo(nsAString& anOutputString){
AppendUnicodeTo(mTextKey, anOutputString);
if(mTextValue.str().Length() || mHasEqualWithoutValue)
anOutputString.AppendLiteral("=");
anOutputString.Append(mTextValue.str());
// anOutputString.AppendLiteral(";");
}
/*
* This general purpose method is used when you want to
* consume a known quoted string.
*
* @param aScanner -- controller of underlying input source
* @param aTerminalChars -- characters that stop consuming attribute.
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
* @return error result
*/
static
nsresult ConsumeQuotedString(PRUnichar aChar,
nsScannerSharedSubstring& aString,
PRInt32& aNewlineCount,
nsScanner& aScanner,
PRInt32 aFlag)
{
NS_ASSERTION(aChar==kQuote || aChar==kApostrophe,"char is neither quote nor apostrophe");
// hold onto this in case this is an unterminated string literal
PRUint32 origLen = aString.str().Length();
static const PRUnichar theTerminalCharsQuote[] = {
PRUnichar(kQuote), PRUnichar('&'), PRUnichar(kCR),
PRUnichar(kNewLine), PRUnichar(0) };
static const PRUnichar theTerminalCharsApostrophe[] = {
PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(kCR),
PRUnichar(kNewLine), PRUnichar(0) };
static const nsReadEndCondition
theTerminateConditionQuote(theTerminalCharsQuote);
static const nsReadEndCondition
theTerminateConditionApostrophe(theTerminalCharsApostrophe);
// Assume Quote to init to something
const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote;
if (aChar==kApostrophe)
terminateCondition = &theTerminateConditionApostrophe;
nsresult result=NS_OK;
nsScannerIterator theOffset;
aScanner.CurrentPosition(theOffset);
result=ConsumeUntil(aString,aNewlineCount,aScanner,
*terminateCondition,PR_TRUE,aFlag);
if(NS_SUCCEEDED(result)) {
result = aScanner.GetChar(aChar); // aChar should be " or '
}
// Ref: Bug 35806
// A back up measure when disaster strikes...
// Ex <table> <tr d="><td>hello</td></tr></table>
if(!aString.str().IsEmpty() && aString.str().Last()!=aChar &&
!aScanner.IsIncremental() && result==kEOF) {
static const nsReadEndCondition
theAttributeTerminator(kAttributeTerminalChars);
aString.writable().Truncate(origLen);
aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
result=ConsumeUntil(aString,aNewlineCount,aScanner,
theAttributeTerminator,PR_FALSE,aFlag);
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
// Remember that this string literal was unterminated.
result = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL;
}
}
return result;
}
/*
* This method is meant to be used by view-source to consume invalid attributes.
* For the purposes of this method, an invalid attribute is an attribute that
* starts with either ' or ". We consume all ' or " and the following whitespace.
*
* @param aScanner -- the scanner we're reading our data from.
* @param aChar -- the character we're skipping
* @param aCurrent -- the current position that we're looking at.
* @param aNewlineCount -- a count of the newlines we've consumed.
* @return error result.
*/
static
nsresult ConsumeInvalidAttribute(nsScanner& aScanner,
PRUnichar aChar,
nsScannerIterator& aCurrent,
PRInt32& aNewlineCount) {
NS_ASSERTION(aChar=='\'' || aChar=='"', "aChar must be a quote or apostrophe");
nsScannerIterator end, wsbeg;
aScanner.EndReading(end);
while (aCurrent!=end && *aCurrent==aChar) {
++aCurrent;
}
aScanner.SetPosition(aCurrent);
return aScanner.ReadWhitespace(wsbeg,aCurrent,aNewlineCount);
}
/*
* Consume the key and value portions of the attribute.
*
* @update rickg 03.23.2000
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
* @return error result
*/
nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
nsresult result;
nsScannerIterator wsstart, wsend;
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
if (kEOF == result && wsstart != wsend) {
// Do this here so if this is the final token in the document, we don't
// lose the whitespace.
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
}
else {
result = aScanner.SkipWhitespace(mNewlineCount);
}
if (NS_OK==result) {
static const PRUnichar theTerminalsChars[] =
{ PRUnichar(' '), PRUnichar('"'),
PRUnichar('='), PRUnichar('\n'),
PRUnichar('\r'), PRUnichar('\t'),
PRUnichar('>'), PRUnichar('<'),
PRUnichar('\b'), PRUnichar('\''),
PRUnichar(0) };
static const nsReadEndCondition theEndCondition(theTerminalsChars);
nsScannerIterator start, end;
result=aScanner.ReadUntil(start,end,theEndCondition,PR_FALSE);
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.BindSubstring(mTextKey, start, end);
}
else if (kEOF == result && wsstart != end) {
//Capture all of the text (from the beginning of the whitespace to the
//end of the document).
aScanner.BindSubstring(mTextKey, wsstart, end);
}
//now it's time to Consume the (optional) value...
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(start, wsend, mNewlineCount);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
else {
result = aScanner.SkipWhitespace(mNewlineCount);
}
if (NS_OK==result) {
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
if (NS_OK==result) {
if (kEqual==aChar){
result=aScanner.GetChar(aChar); //skip the equal sign...
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
PRBool haveCR;
result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
haveCR);
}
else {
result = aScanner.SkipWhitespace(mNewlineCount);
}
if (NS_OK==result) {
result=aScanner.Peek(aChar); //and grab the next char.
if (NS_OK==result) {
if ((kQuote==aChar) || (kApostrophe==aChar)) {
aScanner.GetChar(aChar);
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
mTextValue.writable().Append(aChar);
}
result=ConsumeQuotedString(aChar,mTextValue,mNewlineCount,
aScanner,aFlag);
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue.writable().Append(aChar);
} else if (result == NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL) {
result = NS_OK;
mInError = PR_TRUE;
}
// According to spec. we ( who? ) should ignore linefeeds. But look,
// even the carriage return was getting stripped ( wonder why! ) -
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
// bug then what about bug 47535 ? Should we preserve everything then?
// Well, let's make it so! Commenting out the next two lines..
/*if(!aRetain)
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
*/
}
else if (kGreaterThan==aChar){
mHasEqualWithoutValue=PR_TRUE;
mInError=PR_TRUE;
}
else {
static const nsReadEndCondition
theAttributeTerminator(kAttributeTerminalChars);
result=ConsumeUntil(mTextValue,
mNewlineCount,
aScanner,
theAttributeTerminator,
PR_FALSE,
aFlag);
}
}//if
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
PRBool haveCR;
result = aScanner.ReadWhitespace(mTextValue, mNewlineCount,
haveCR);
}
else {
result = aScanner.SkipWhitespace(mNewlineCount);
}
}
}//if
else {
//We saw an equal sign but ran out of room looking for a value.
mHasEqualWithoutValue=PR_TRUE;
mInError=PR_TRUE;
}
}//if
}//if
else {
//This is where we have to handle fairly busted content.
//If you're here, it means we saw an attribute name, but couldn't find
//the following equal sign. <tag NAME=....
//Doing this right in all cases is <i>REALLY</i> ugly.
//My best guess is to grab the next non-ws char. We know it's not '=',
//so let's see what it is. If it's a '"', then assume we're reading
//from the middle of the value. Try stripping the quote and continuing...
if (kQuote==aChar || kApostrophe==aChar){
mInError=PR_TRUE;
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
result=aScanner.SkipOver(aChar); //strip quote.
if (NS_SUCCEEDED(result)) {
result=aScanner.SkipWhitespace(mNewlineCount);
}
} else {
//We want to collect whitespace here so that following
//attributes can have the right line number (and for
//parity with the non-view-source code above).
result=ConsumeInvalidAttribute(aScanner,aChar,wsend,mNewlineCount);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
aScanner.SetPosition(wsend);
}
}
}
}//if
} //if
}//if (consume optional value)
if (NS_OK==result) {
if (mTextValue.str().Length() == 0 && mTextKey.Length() == 0 &&
mNewlineCount == 0) {
//This attribute contains no useful information for us, so there is no
//use in keeping it around. Attributes that are otherwise empty, but
//have newlines in them are passed on the the DTD so it can get line
//numbering right.
return NS_ERROR_HTMLPARSER_BADATTRIBUTE;
}
#ifdef DEBUG
result = aScanner.Peek(aChar);
mLastAttribute = (kGreaterThan == aChar || kEOF == result);
#endif
}
}//if
if (kEOF == result && !aScanner.IsIncremental()) {
// This is our run-of-the mill "don't lose content at the end of a
// document" with a slight twist: we don't want to bother returning an
// empty attribute key, even if this is the end of the document.
if (mTextKey.Length() == 0) {
result = NS_ERROR_HTMLPARSER_BADATTRIBUTE;
}
else {
result = NS_OK;
}
}
return result;
}
void CAttributeToken::SetKey(const nsAString& aKey)
{
mTextKey.Rebind(aKey);
}
void CAttributeToken::BindKey(nsScanner* aScanner,
nsScannerIterator& aStart,
nsScannerIterator& aEnd)
{
aScanner->BindSubstring(mTextKey, aStart, aEnd);
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CWhitespaceToken::CWhitespaceToken(const nsAString& aName) : CHTMLToken(eHTMLTag_whitespace) {
mTextValue.writable().Assign(aName);
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CWhitespaceToken::GetTokenType(void) {
return eToken_whitespace;
}
/*
* This general purpose method is used when you want to
* consume an aribrary sequence of whitespace.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
// If possible, we'd like to just be a dependent substring starting at
// |aChar|. The scanner has already been advanced, so we need to
// back it up to facilitate this.
nsScannerIterator start;
aScanner.CurrentPosition(start);
aScanner.SetPosition(--start, PR_FALSE, PR_TRUE);
PRBool haveCR;
nsresult result = aScanner.ReadWhitespace(mTextValue, mNewlineCount, haveCR);
if (result == kEOF && !aScanner.IsIncremental()) {
// Oops, we ran off the end, make sure we don't lose the trailing
// whitespace!
result = NS_OK;
}
if (NS_OK == result && haveCR) {
mTextValue.writable().StripChar(kCR);
}
return result;
}
const nsSubstring& CWhitespaceToken::GetStringValue(void)
{
return mTextValue.str();
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CEntityToken::CEntityToken(const nsAString& aName) : CHTMLToken(eHTMLTag_entity) {
mTextValue.Assign(aName);
#ifdef VERBOSE_DEBUG
if(!VerifyEntityTable()) {
cout<<"Entity table is invalid!" << endl;
}
#endif
}
/*
* Consume the rest of the entity. We've already eaten the "&".
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
return result;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CEntityToken::GetTokenType(void) {
return eToken_entity;
}
/*
* This general purpose method is used when you want to
* consume an entity &xxxx;. Keep in mind that entities
* are <i>not</i> reduced inline.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult
CEntityToken::ConsumeEntity(PRUnichar aChar,
nsString& aString,
nsScanner& aScanner) {
nsresult result=NS_OK;
if(kLeftBrace==aChar) {
//you're consuming a script entity...
aScanner.GetChar(aChar); // Consume &
PRInt32 rightBraceCount = 0;
PRInt32 leftBraceCount = 0;
do {
result=aScanner.GetChar(aChar);
if (NS_FAILED(result)) {
return result;
}
aString.Append(aChar);
if(aChar==kRightBrace)
++rightBraceCount;
else if(aChar==kLeftBrace)
++leftBraceCount;
} while(leftBraceCount!=rightBraceCount);
} //if
else {
PRUnichar theChar=0;
if (kHashsign==aChar) {
result = aScanner.Peek(theChar,2);
if (NS_FAILED(result)) {
if (kEOF == result && !aScanner.IsIncremental()) {
// If this is the last buffer then we are certainly
// not dealing with an entity. That's, there are
// no more characters after &#. Bug 188278.
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
return result;
}
if (nsCRT::IsAsciiDigit(theChar)) {
aScanner.GetChar(aChar); // Consume &
aScanner.GetChar(aChar); // Consume #
aString.Assign(aChar);
result=aScanner.ReadNumber(aString,10);
}
else if (theChar == 'x' || theChar == 'X') {
aScanner.GetChar(aChar); // Consume &
aScanner.GetChar(aChar); // Consume #
aScanner.GetChar(theChar); // Consume x
aString.Assign(aChar);
aString.Append(theChar);
result=aScanner.ReadNumber(aString,16);
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
}
else {
result = aScanner.Peek(theChar,1);
if (NS_FAILED(result)) {
return result;
}
if(nsCRT::IsAsciiAlpha(theChar) ||
theChar == '_' ||
theChar == ':') {
aScanner.GetChar(aChar); // Consume &
result=aScanner.ReadEntityIdentifier(aString);
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
}
}
if (NS_FAILED(result)) {
return result;
}
result=aScanner.Peek(aChar);
if (NS_FAILED(result)) {
return result;
}
if (aChar == kSemicolon) {
// consume semicolon that stopped the scan
aString.Append(aChar);
result=aScanner.GetChar(aChar);
}
return result;
}
#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/**
* Map some illegal but commonly used numeric entities into their
* appropriate unicode value.
*/
#define NOT_USED 0xfffd
static const PRUint16 PA_HackTable[] = {
0x20ac, /* EURO SIGN */
NOT_USED,
0x201a, /* SINGLE LOW-9 QUOTATION MARK */
0x0192, /* LATIN SMALL LETTER F WITH HOOK */
0x201e, /* DOUBLE LOW-9 QUOTATION MARK */
0x2026, /* HORIZONTAL ELLIPSIS */
0x2020, /* DAGGER */
0x2021, /* DOUBLE DAGGER */
0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */
0x2030, /* PER MILLE SIGN */
0x0160, /* LATIN CAPITAL LETTER S WITH CARON */
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
0x0152, /* LATIN CAPITAL LIGATURE OE */
NOT_USED,
0x017D, /* LATIN CAPITAL LETTER Z WITH CARON */
NOT_USED,
NOT_USED,
0x2018, /* LEFT SINGLE QUOTATION MARK */
0x2019, /* RIGHT SINGLE QUOTATION MARK */
0x201c, /* LEFT DOUBLE QUOTATION MARK */
0x201d, /* RIGHT DOUBLE QUOTATION MARK */
0x2022, /* BULLET */
0x2013, /* EN DASH */
0x2014, /* EM DASH */
0x02dc, /* SMALL TILDE */
0x2122, /* TRADE MARK SIGN */
0x0161, /* LATIN SMALL LETTER S WITH CARON */
0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
0x0153, /* LATIN SMALL LIGATURE OE */
NOT_USED,
0x017E, /* LATIN SMALL LETTER Z WITH CARON */
0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
};
#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */
static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue)
{
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/* for some illegal, but popular usage */
if ((aNCRValue >= 0x0080) && (aNCRValue <= 0x009f)) {
aNCRValue = PA_HackTable[aNCRValue - 0x0080];
}
#endif
if (IS_IN_BMP(aNCRValue))
aString.Append(PRUnichar(aNCRValue));
else {
aString.Append(PRUnichar(H_SURROGATE(aNCRValue)));
aString.Append(PRUnichar(L_SURROGATE(aNCRValue)));
}
}
/*
* This method converts this entity into its underlying
* unicode equivalent.
*
* @update gess 3/25/98
* @param aString will hold the resulting string value
* @return numeric (unichar) value
*/
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
PRInt32 value=0;
if(mTextValue.Length()>1) {
PRUnichar theChar0=mTextValue.CharAt(0);
if(kHashsign==theChar0) {
PRInt32 err=0;
value=mTextValue.ToInteger(&err,kAutoDetect);
if(0==err) {
AppendNCR(aString, value);
}
}
else{
value = nsHTMLEntities::EntityToUnicode(mTextValue);
if(-1<value) {
//we found a named entity...
aString.Assign(PRUnichar(value));
}
}//else
}//if
return value;
}
const nsSubstring& CEntityToken::GetStringValue(void)
{
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param anOutputString will recieve the result
* @return nada
*/
void CEntityToken::GetSource(nsString& anOutputString){
anOutputString.AppendLiteral("&");
anOutputString+=mTextValue;
//anOutputString+=";";
}
/*
*
*
* @update harishd 03/23/00
* @param result appended to the output string.
* @return nada
*/
void CEntityToken::AppendSourceTo(nsAString& anOutputString){
anOutputString.AppendLiteral("&");
anOutputString+=mTextValue;
//anOutputString+=";";
}
/**
*
* @update gess4/25/98
* @param
* @return
*/
const PRUnichar* GetTagName(PRInt32 aTag)
{
const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);
if (result) {
return result;
}
if(aTag >= eHTMLTag_userdefined)
return sUserdefined;
return 0;
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_instruction) {
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
CInstructionToken::CInstructionToken(const nsAString& aString) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aString);
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aFlag){
mTextValue.AssignLiteral("<?");
nsresult result=NS_OK;
PRBool done=PR_FALSE;
while (NS_OK==result && !done) {
//Note, this call does *not* consume the >.
result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE);
if (NS_SUCCEEDED(result)) {
//In HTML, PIs end with a '>', in XML, they end with a '?>'. Cover both
//cases here.
if (!(aFlag & NS_IPARSER_FLAG_XML) || kQuestionMark==mTextValue.Last()) {
//This really is the end of the PI.
done=PR_TRUE;
}
//Need to append this character no matter what.
aScanner.GetChar(aChar);
mTextValue.Append(aChar);
}
}
if (kEOF==result && !aScanner.IsIncremental()) {
//Hide the EOF result because there is no more text coming.
mInError=PR_TRUE;
result=NS_OK;
}
return result;
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
PRInt32 CInstructionToken::GetTokenType(void){
return eToken_instruction;
}
const nsSubstring& CInstructionToken::GetStringValue(void)
{
return mTextValue;
}
// Doctype decl token
CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
: CHTMLToken(aTag) {
}
CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag)
: CHTMLToken(aTag), mTextValue(aString) {
}
/**
* This method consumes a doctype element.
* Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up.
*
* @update gess 9/23/98
* @param
* @return
*/
nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
static const PRUnichar terminalChars[] =
{ PRUnichar('>'), PRUnichar('<'),
PRUnichar(0)
};
static const nsReadEndCondition theEndCondition(terminalChars);
nsScannerIterator start, end;
aScanner.CurrentPosition(start);
aScanner.EndReading(end);
nsresult result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
if (NS_SUCCEEDED(result)) {
PRUnichar ch;
aScanner.Peek(ch);
if (ch == kGreaterThan) {
// Include '>' but not '<' since '<'
// could belong to another tag.
aScanner.GetChar(ch);
end.advance(1);
} else {
NS_ASSERTION(kLessThan == ch,
"Make sure this doctype decl. is really in error.");
mInError = PR_TRUE;
}
}
else if (!aScanner.IsIncremental()) {
// We have reached the document end but haven't
// found either a '<' or a '>'. Therefore use
// whatever we have.
mInError = PR_TRUE;
result = NS_OK;
}
if (NS_SUCCEEDED(result)) {
start.advance(-2); // Make sure to consume <!
CopyUnicodeTo(start,end,mTextValue);
}
return result;
}
PRInt32 CDoctypeDeclToken::GetTokenType(void) {
return eToken_doctypeDecl;
}
const nsSubstring& CDoctypeDeclToken::GetStringValue(void)
{
return mTextValue;
}
void CDoctypeDeclToken::SetStringValue(const nsAString& aStr)
{
mTextValue.Assign(aStr);
}