1254 lines
42 KiB
C++
1254 lines
42 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set sw=2 ts=2 et tw=78: */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corporation.
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Blake Kaplan <mrbkap@gmail.com>
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
|
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
|
|
/**
|
|
* @file nsHTMLTokenizer.cpp
|
|
* This is an implementation of the nsITokenizer interface.
|
|
* This file contains the implementation of a tokenizer to tokenize an HTML
|
|
* document. It attempts to do so, making tradeoffs between compatibility with
|
|
* older parsers and the SGML specification. Note that most of the real
|
|
* "tokenization" takes place in nsHTMLTokens.cpp.
|
|
*/
|
|
|
|
#include "nsIAtom.h"
|
|
#include "nsHTMLTokenizer.h"
|
|
#include "nsScanner.h"
|
|
#include "nsElementTable.h"
|
|
#include "CParserContext.h"
|
|
#include "nsReadableUtils.h"
|
|
#include "nsUnicharUtils.h"
|
|
|
|
/************************************************************************
|
|
And now for the main class -- nsHTMLTokenizer...
|
|
************************************************************************/
|
|
|
|
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
|
static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID);
|
|
static NS_DEFINE_IID(kClassIID, NS_HTMLTOKENIZER_IID);
|
|
|
|
/**
|
|
* This method gets called as part of our COM-like interfaces.
|
|
* Its purpose is to create an interface to parser object
|
|
* of some type.
|
|
*
|
|
* @param aIID id of object to discover
|
|
* @param aInstancePtr ptr to newly discovered interface
|
|
* @return NS_xxx result code
|
|
*/
|
|
nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|
{
|
|
if (NULL == aInstancePtr) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
|
|
if(aIID.Equals(kISupportsIID)) { // Do IUnknown...
|
|
*aInstancePtr = (nsISupports*)(this);
|
|
}
|
|
else if(aIID.Equals(kITokenizerIID)) { // Do ITokenizer base class...
|
|
*aInstancePtr = (nsITokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(kClassIID)) { // Do this class...
|
|
*aInstancePtr = (nsHTMLTokenizer*)(this);
|
|
}
|
|
else {
|
|
*aInstancePtr=0;
|
|
return NS_NOINTERFACE;
|
|
}
|
|
NS_ADDREF_THIS();
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This method is defined in nsHTMLTokenizer.h. It is used to
|
|
* cause the COM-like construction of an HTMLTokenizer.
|
|
*
|
|
* @param aInstancePtrResult** ptr to newly instantiated parser
|
|
* @param aFlag Parser flags the tokenizer should be aware of
|
|
* @param aDocType The doctype of the current document.
|
|
* @param aCommand The current command (view-source, fragment, etc).
|
|
* @return NS_xxx error result
|
|
*/
|
|
|
|
nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult,
|
|
PRInt32 aFlag,
|
|
eParserDocType aDocType,
|
|
eParserCommands aCommand,
|
|
PRInt32 aFlags)
|
|
{
|
|
NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr");
|
|
if (nsnull == aInstancePtrResult) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand,aFlags);
|
|
if (nsnull == it) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
return it->QueryInterface(kClassIID, (void **) aInstancePtrResult);
|
|
}
|
|
|
|
|
|
NS_IMPL_ADDREF(nsHTMLTokenizer)
|
|
NS_IMPL_RELEASE(nsHTMLTokenizer)
|
|
|
|
|
|
/**
|
|
* Default constructor
|
|
*
|
|
* @param aParseMode The current mode the document is in (quirks, etc.)
|
|
* @param aDocType The document type of the current document
|
|
* @param aCommand What we are trying to do (view-source, parse a fragment, etc.)
|
|
*/
|
|
nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode,
|
|
eParserDocType aDocType,
|
|
eParserCommands aCommand,
|
|
PRUint16 aFlags) :
|
|
nsITokenizer(), mTokenDeque(0), mFlags(aFlags)
|
|
{
|
|
if (aParseMode==eDTDMode_full_standards ||
|
|
aParseMode==eDTDMode_almost_standards) {
|
|
mFlags |= NS_IPARSER_FLAG_STRICT_MODE;
|
|
}
|
|
else if (aParseMode==eDTDMode_quirks) {
|
|
mFlags |= NS_IPARSER_FLAG_QUIRKS_MODE;
|
|
}
|
|
else if (aParseMode==eDTDMode_autodetect) {
|
|
mFlags |= NS_IPARSER_FLAG_AUTO_DETECT_MODE;
|
|
}
|
|
else {
|
|
mFlags |= NS_IPARSER_FLAG_UNKNOWN_MODE;
|
|
}
|
|
|
|
if (aDocType==ePlainText) {
|
|
mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT;
|
|
}
|
|
else if (aDocType==eXML) {
|
|
mFlags |= NS_IPARSER_FLAG_XML;
|
|
}
|
|
else if (aDocType==eHTML_Quirks ||
|
|
aDocType==eHTML3_Quirks ||
|
|
aDocType==eHTML_Strict) {
|
|
mFlags |= NS_IPARSER_FLAG_HTML;
|
|
}
|
|
|
|
mFlags |= (aCommand==eViewSource)? NS_IPARSER_FLAG_VIEW_SOURCE:NS_IPARSER_FLAG_VIEW_NORMAL;
|
|
|
|
NS_ASSERTION(!(mFlags & NS_IPARSER_FLAG_XML) ||
|
|
(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE),
|
|
"Why isn't this XML document going through our XML parser?");
|
|
|
|
mTokenAllocator = nsnull;
|
|
mTokenScanPos = 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* The destructor ensures that we don't leak any left over tokens.
|
|
*/
|
|
nsHTMLTokenizer::~nsHTMLTokenizer()
|
|
{
|
|
if(mTokenDeque.GetSize()){
|
|
CTokenDeallocator theDeallocator(mTokenAllocator->GetArenaPool());
|
|
mTokenDeque.ForEach(theDeallocator);
|
|
}
|
|
}
|
|
|
|
|
|
/*******************************************************************
|
|
Here begins the real working methods for the tokenizer.
|
|
*******************************************************************/
|
|
|
|
/**
|
|
* Adds a token onto the end of the deque if aResult is a successful result.
|
|
* Otherwise, this function frees aToken and sets it to nsnull.
|
|
*
|
|
* @param aToken The token that wants to be added.
|
|
* @param aResult The error code that will be used to determine if we actually
|
|
* want to push this token.
|
|
* @param aDeque The deque we want to push aToken onto.
|
|
* @param aTokenAllocator The allocator we use to free aToken in case aResult
|
|
* is not a success code.
|
|
*/
|
|
/* static */
|
|
void nsHTMLTokenizer::AddToken(CToken*& aToken,
|
|
nsresult aResult,
|
|
nsDeque* aDeque,
|
|
nsTokenAllocator* aTokenAllocator)
|
|
{
|
|
if(aToken && aDeque) {
|
|
if(NS_SUCCEEDED(aResult)) {
|
|
aDeque->Push(aToken);
|
|
}
|
|
else {
|
|
IF_FREE(aToken, aTokenAllocator);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieve a pointer to the global token recycler...
|
|
*
|
|
* @return Pointer to recycler (or null)
|
|
*/
|
|
nsTokenAllocator* nsHTMLTokenizer::GetTokenAllocator(void)
|
|
{
|
|
return mTokenAllocator;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method provides access to the topmost token in the tokenDeque.
|
|
* The token is not really removed from the list.
|
|
*
|
|
* @return Pointer to token
|
|
*/
|
|
CToken* nsHTMLTokenizer::PeekToken()
|
|
{
|
|
return (CToken*)mTokenDeque.PeekFront();
|
|
}
|
|
|
|
|
|
/**
|
|
* This method provides access to the topmost token in the tokenDeque.
|
|
* The token is really removed from the list; if the list is empty we return 0.
|
|
*
|
|
* @return Pointer to token or NULL
|
|
*/
|
|
CToken* nsHTMLTokenizer::PopToken()
|
|
{
|
|
CToken* result=nsnull;
|
|
result=(CToken*)mTokenDeque.PopFront();
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* Pushes a token onto the front of our deque such that the next call to
|
|
* PopToken() or PeekToken() will return that token.
|
|
*
|
|
* @param theToken The next token to be processed
|
|
* @return theToken
|
|
*/
|
|
CToken* nsHTMLTokenizer::PushTokenFront(CToken* theToken)
|
|
{
|
|
mTokenDeque.PushFront(theToken);
|
|
return theToken;
|
|
}
|
|
|
|
/**
|
|
* Pushes a token onto the deque.
|
|
*
|
|
* @param theToken the new token.
|
|
* @return theToken
|
|
*/
|
|
CToken* nsHTMLTokenizer::PushToken(CToken* theToken)
|
|
{
|
|
mTokenDeque.Push(theToken);
|
|
return theToken;
|
|
}
|
|
|
|
/**
|
|
* Returns the size of the deque.
|
|
*
|
|
* @return The number of remaining tokens.
|
|
*/
|
|
PRInt32 nsHTMLTokenizer::GetCount(void)
|
|
{
|
|
return mTokenDeque.GetSize();
|
|
}
|
|
|
|
/**
|
|
* Allows access to an arbitrary token in the deque. The accessed token is left
|
|
* in the deque.
|
|
*
|
|
* @param anIndex The index of the target token. Token 0 would be the same as
|
|
* the result of a call to PeekToken()
|
|
* @return The requested token.
|
|
*/
|
|
CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex)
|
|
{
|
|
return (CToken*)mTokenDeque.ObjectAt(anIndex);
|
|
}
|
|
|
|
/**
|
|
* This method is part of the "sandwich" that occurs when we want to tokenize
|
|
* a document. This prepares us to be able to tokenize properly.
|
|
*
|
|
* @param aIsFinalChunk Whether this is the last chunk of data that we will
|
|
* get to see.
|
|
* @param aTokenAllocator The token allocator to use for this document.
|
|
* @return Our success in setting up.
|
|
*/
|
|
nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk,
|
|
nsTokenAllocator* aTokenAllocator)
|
|
{
|
|
mTokenAllocator=aTokenAllocator;
|
|
mIsFinalChunk=aIsFinalChunk;
|
|
// Cause ScanDocStructure to search from here for new tokens...
|
|
mTokenScanPos=mTokenDeque.GetSize();
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* Pushes all of the tokens in aDeque onto the front of our deque so they
|
|
* get processed before any other tokens.
|
|
*
|
|
* @param aDeque The deque with the tokens in it.
|
|
*/
|
|
void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque)
|
|
{
|
|
PRInt32 aCount=aDeque.GetSize();
|
|
|
|
PRInt32 anIndex=0;
|
|
for(anIndex=0;anIndex<aCount;++anIndex){
|
|
CToken* theToken=(CToken*)aDeque.Pop();
|
|
PushTokenFront(theToken);
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Copies the state flags from aTokenizer into this tokenizer. This is used
|
|
* to pass information around between the main tokenizer and tokenizers
|
|
* created for document.write() calls.
|
|
*
|
|
* @param aTokenizer The tokenizer with more information in it.
|
|
* @return NS_OK
|
|
*/
|
|
nsresult nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer)
|
|
{
|
|
if (aTokenizer) {
|
|
mFlags = ((nsHTMLTokenizer*)aTokenizer)->mFlags;
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This is a utilty method for ScanDocStructure, which finds a given
|
|
* tag in the stack. The return value is meant to be used with
|
|
* nsDeque::ObjectAt() on aTagStack.
|
|
*
|
|
* @param aTag -- the ID of the tag we're seeking
|
|
* @param aTagStack -- the stack to be searched
|
|
* @return index position of tag in stack if found, otherwise kNotFound
|
|
*/
|
|
static PRInt32 FindLastIndexOfTag(eHTMLTags aTag,nsDeque &aTagStack)
|
|
{
|
|
PRInt32 theCount=aTagStack.GetSize();
|
|
|
|
while(0<theCount) {
|
|
CHTMLToken *theToken=(CHTMLToken*)aTagStack.ObjectAt(--theCount);
|
|
if(theToken) {
|
|
eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID();
|
|
if(theTag==aTag) {
|
|
return theCount;
|
|
}
|
|
}
|
|
}
|
|
|
|
return kNotFound;
|
|
}
|
|
|
|
/**
|
|
* This method scans the sequence of tokens to determine whether or not the
|
|
* tag structure of the document is well formed. In well formed cases, we can
|
|
* skip doing residual style handling and allow inlines to contain block-level
|
|
* elements.
|
|
*
|
|
* @param aFinalChunk Is unused.
|
|
* @return Success (currently, this function cannot fail).
|
|
*/
|
|
nsresult nsHTMLTokenizer::ScanDocStructure(PRBool aFinalChunk)
|
|
{
|
|
nsresult result = NS_OK;
|
|
if (!mTokenDeque.GetSize())
|
|
return result;
|
|
|
|
CHTMLToken* theToken = (CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos);
|
|
|
|
// Start by finding the first start tag that hasn't been reviewed.
|
|
while(mTokenScanPos > 0) {
|
|
if(theToken) {
|
|
eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());
|
|
if(eToken_start == theType) {
|
|
if(eFormUnknown == theToken->GetContainerInfo()) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
theToken = (CHTMLToken*)mTokenDeque.ObjectAt(--mTokenScanPos);
|
|
}
|
|
|
|
// Now that we know where to start, let's walk through the
|
|
// tokens to see which are well-formed. Stop when you run out
|
|
// of fresh tokens.
|
|
|
|
nsDeque theStack(0);
|
|
nsDeque tempStack(0);
|
|
PRInt32 theStackDepth = 0;
|
|
// Don't bother if we get ridiculously deep.
|
|
static const PRInt32 theMaxStackDepth = 200;
|
|
|
|
while(theToken && theStackDepth < theMaxStackDepth) {
|
|
eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());
|
|
eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID();
|
|
|
|
if(nsHTMLElement::IsContainer(theTag)) { // Bug 54117
|
|
PRBool theTagIsBlock = gHTMLElements[theTag].IsMemberOf(kBlockEntity);
|
|
PRBool theTagIsInline = (theTagIsBlock) ?
|
|
PR_FALSE :
|
|
gHTMLElements[theTag].IsMemberOf(kInlineEntity);
|
|
|
|
if(theTagIsBlock || theTagIsInline || eHTMLTag_table == theTag) {
|
|
switch(theType) {
|
|
case eToken_start:
|
|
{
|
|
if (gHTMLElements[theTag].ShouldVerifyHierarchy()) {
|
|
PRInt32 earlyPos = FindLastIndexOfTag(theTag, theStack);
|
|
if (earlyPos != kNotFound) {
|
|
// Uh-oh, we've found a tag that is not allowed to nest at
|
|
// all. Mark the previous one and all of its children as
|
|
// malformed to increase our chances of doing RS handling
|
|
// on all of them. We want to do this for cases such as:
|
|
// <a><div><a></a></div></a>.
|
|
// Note that we have to iterate through all of the chilren
|
|
// of the original malformed tag to protect against:
|
|
// <a><font><div><a></a></div></font></a>, so that the <font>
|
|
// is allowed to contain the <div>.
|
|
// XXX What about <a><span><a>, where the second <a> closes
|
|
// the <span>?
|
|
nsDequeIterator it(theStack, earlyPos), end(theStack.End());
|
|
while (it < end) {
|
|
CHTMLToken *theMalformedToken =
|
|
NS_STATIC_CAST(CHTMLToken*, it++);
|
|
|
|
theMalformedToken->SetContainerInfo(eMalformed);
|
|
}
|
|
}
|
|
}
|
|
|
|
theStack.Push(theToken);
|
|
++theStackDepth;
|
|
}
|
|
break;
|
|
case eToken_end:
|
|
{
|
|
CHTMLToken *theLastToken = NS_STATIC_CAST(CHTMLToken*, theStack.Peek());
|
|
if(theLastToken) {
|
|
if(theTag == theLastToken->GetTypeID()) {
|
|
theStack.Pop(); // Yank it for real
|
|
theStackDepth--;
|
|
theLastToken->SetContainerInfo(eWellFormed);
|
|
}
|
|
else {
|
|
// This token wasn't what we expected it to be! We need to
|
|
// go searching for its real start tag on our stack. Each
|
|
// tag in between the end tag and start tag must be malformed
|
|
|
|
if(FindLastIndexOfTag(theTag, theStack) != kNotFound) {
|
|
// Find theTarget in the stack, marking each (malformed!)
|
|
// tag in our way.
|
|
theStack.Pop(); // pop off theLastToken for real.
|
|
do {
|
|
theLastToken->SetContainerInfo(eMalformed);
|
|
tempStack.Push(theLastToken);
|
|
theLastToken = NS_STATIC_CAST(CHTMLToken*, theStack.Pop());
|
|
} while(theLastToken && theTag != theLastToken->GetTypeID());
|
|
// XXX The above test can confuse two different userdefined
|
|
// tags.
|
|
|
|
NS_ASSERTION(theLastToken,
|
|
"FindLastIndexOfTag lied to us!"
|
|
" We couldn't find theTag on theStack");
|
|
theLastToken->SetContainerInfo(eMalformed);
|
|
|
|
// Great, now push all of the other tokens back onto the
|
|
// stack to preserve the general structure of the document.
|
|
// Note that we don't push the target token back onto the
|
|
// the stack (since it was just closed).
|
|
while(tempStack.GetSize() != 0) {
|
|
theStack.Push(tempStack.Pop());
|
|
}
|
|
} // else ignore a bogus end tag.
|
|
}
|
|
} // if (theLastToken)
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
theToken = (CHTMLToken*)mTokenDeque.ObjectAt(++mTokenScanPos);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called after we're done tokenizing a chunk of data.
|
|
*
|
|
* @param aFinalChunk Tells us if this was the last chunk of data.
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::DidTokenize(PRBool aFinalChunk)
|
|
{
|
|
return ScanDocStructure(aFinalChunk);
|
|
}
|
|
|
|
/**
|
|
* This method is repeatedly called by the tokenizer.
|
|
* Each time, we determine the kind of token we're about to
|
|
* read, and then we call the appropriate method to handle
|
|
* that token type.
|
|
*
|
|
* @param aScanner The source of our input.
|
|
* @param aFlushTokens An OUT parameter to tell the caller whether it should
|
|
* process our queued tokens up to now (e.g., when we
|
|
* reach a <script>).
|
|
* @return Success or error
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens)
|
|
{
|
|
PRUnichar theChar;
|
|
CToken* theToken=0;
|
|
|
|
nsresult result=aScanner.Peek(theChar);
|
|
|
|
switch(result) {
|
|
case kEOF:
|
|
// Tell our caller that'we finished.
|
|
return result;
|
|
|
|
case NS_OK:
|
|
default:
|
|
|
|
if(!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) {
|
|
if(kLessThan==theChar) {
|
|
return ConsumeTag(theChar,theToken,aScanner,aFlushTokens);
|
|
}
|
|
else if(kAmpersand==theChar){
|
|
return ConsumeEntity(theChar,theToken,aScanner);
|
|
}
|
|
}
|
|
|
|
if((kCR==theChar) || (kLF==theChar)) {
|
|
return ConsumeNewline(theChar,theToken,aScanner);
|
|
}
|
|
else {
|
|
if(!nsCRT::IsAsciiSpace(theChar)) {
|
|
if(theChar!=nsnull) {
|
|
result=ConsumeText(theToken,aScanner);
|
|
}
|
|
else {
|
|
aScanner.GetChar(theChar); // skip the embedded null char. Fix bug 64098.
|
|
}
|
|
break;
|
|
}
|
|
result=ConsumeWhitespace(theChar,theToken,aScanner);
|
|
}
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is called just after a "<" has been consumed
|
|
* and we know we're at the start of some kind of tagged
|
|
* element. We don't know yet if it's a tag or a comment.
|
|
*
|
|
* @param aChar is the last char read
|
|
* @param aToken is the out arg holding our new token (the function allocates
|
|
* the return token using mTokenAllocator).
|
|
* @param aScanner represents our input source
|
|
* @param aFlushTokens is an OUT parameter use to tell consumers to flush
|
|
* the current tokens after processing the current one.
|
|
* @return error code.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner,
|
|
PRBool& aFlushTokens)
|
|
{
|
|
PRUnichar theNextChar, oldChar;
|
|
nsresult result=aScanner.Peek(aChar,1);
|
|
|
|
if(NS_OK==result) {
|
|
|
|
switch(aChar) {
|
|
case kForwardSlash:
|
|
result=aScanner.Peek(theNextChar, 2);
|
|
|
|
if(NS_OK==result) {
|
|
// Get the original "<" (we've already seen it with a Peek)
|
|
aScanner.GetChar(oldChar);
|
|
|
|
// XML allows non ASCII tag names, consume this as an end tag. This
|
|
// is needed to make XML view source work
|
|
PRBool isXML=(mFlags & NS_IPARSER_FLAG_XML);
|
|
if(nsCRT::IsAsciiAlpha(theNextChar)||(kGreaterThan==theNextChar)||
|
|
(isXML && (! nsCRT::IsAscii(theNextChar)))) {
|
|
result=ConsumeEndTag(aChar,aToken,aScanner);
|
|
}
|
|
else result=ConsumeComment(aChar,aToken,aScanner);
|
|
}
|
|
|
|
break;
|
|
|
|
case kExclamation:
|
|
result=aScanner.Peek(theNextChar, 2);
|
|
|
|
if(NS_OK==result) {
|
|
// Get the original "<" (we've already seen it with a Peek)
|
|
aScanner.GetChar(oldChar);
|
|
|
|
if((kMinus==theNextChar) || (kGreaterThan==theNextChar)) {
|
|
result=ConsumeComment(aChar,aToken,aScanner);
|
|
}
|
|
else
|
|
result=ConsumeSpecialMarkup(aChar,aToken,aScanner);
|
|
}
|
|
break;
|
|
|
|
case kQuestionMark: // It must be an XML processing instruction...
|
|
// Get the original "<" (we've already seen it with a Peek)
|
|
aScanner.GetChar(oldChar);
|
|
result=ConsumeProcessingInstruction(aChar,aToken,aScanner);
|
|
break;
|
|
|
|
default:
|
|
// XML allows non ASCII tag names, consume this as a start tag.
|
|
PRBool isXML=(mFlags & NS_IPARSER_FLAG_XML);
|
|
if(nsCRT::IsAsciiAlpha(aChar) ||
|
|
(isXML && (! nsCRT::IsAscii(aChar)))) {
|
|
// Get the original "<" (we've already seen it with a Peek)
|
|
aScanner.GetChar(oldChar);
|
|
result=ConsumeStartTag(aChar,aToken,aScanner,aFlushTokens);
|
|
}
|
|
else {
|
|
// We are not dealing with a tag. So, don't consume the original
|
|
// char and leave the decision to ConsumeText().
|
|
result=ConsumeText(aToken,aScanner);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Last ditch attempt to make sure we don't lose data.
|
|
if (kEOF == result && !aScanner.IsIncremental()) {
|
|
// Whoops, we don't want to lose any data! Consume the rest as text.
|
|
// This normally happens for either a trailing < or </
|
|
result = ConsumeText(aToken,aScanner);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after we've consumed a start or end
|
|
* tag, and we now have to consume its attributes.
|
|
*
|
|
* @param aChar is the last char read
|
|
* @param aToken is the start or end tag that "owns" these attributes.
|
|
* @param aScanner represents our input source
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,
|
|
CToken* aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
PRBool done=PR_FALSE;
|
|
nsresult result=NS_OK;
|
|
PRInt16 theAttrCount=0;
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
|
|
while((!done) && (result==NS_OK)) {
|
|
CAttributeToken* theToken =
|
|
NS_STATIC_CAST(CAttributeToken*,
|
|
theAllocator->CreateTokenOfType(eToken_attribute,
|
|
eHTMLTag_unknown));
|
|
if(theToken){
|
|
// Tell the new token to finish consuming text...
|
|
result=theToken->Consume(aChar,aScanner,mFlags);
|
|
|
|
// Much as I hate to do this, here's some special case code.
|
|
// This handles the case of empty-tags in XML. Our last
|
|
// attribute token will come through with a text value of ""
|
|
// and a textkey of "/". We should destroy it.
|
|
if(NS_SUCCEEDED(result)) {
|
|
PRBool isUsableAttr = PR_TRUE;
|
|
const nsSubstring& key=theToken->GetKey();
|
|
const nsAString& text=theToken->GetValue();
|
|
|
|
if(!key.IsEmpty() && kForwardSlash==key.First() && text.IsEmpty()) {
|
|
if(!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
// We only care about these in view-source.
|
|
isUsableAttr = PR_FALSE;
|
|
}
|
|
}
|
|
if(isUsableAttr) {
|
|
++theAttrCount;
|
|
AddToken((CToken*&)theToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
else {
|
|
IF_FREE(theToken, mTokenAllocator);
|
|
}
|
|
}
|
|
else {
|
|
IF_FREE(theToken, mTokenAllocator);
|
|
// Bad attributes are not a reason to set empty.
|
|
if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result) {
|
|
result=NS_OK;
|
|
} else {
|
|
aToken->SetEmpty(PR_TRUE);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if(NS_SUCCEEDED(result)){
|
|
PRInt32 newline = 0;
|
|
result = aScanner.SkipWhitespace(newline);
|
|
NS_ASSERTION(newline == 0, "CAttribute::Consume() failed to collect all the newlines!");
|
|
}
|
|
#endif
|
|
if (NS_SUCCEEDED(result)) {
|
|
result = aScanner.Peek(aChar);
|
|
if (NS_SUCCEEDED(result)) {
|
|
if (aChar == kGreaterThan) { // You just ate the '>'
|
|
aScanner.GetChar(aChar); // Skip the '>'
|
|
done = PR_TRUE;
|
|
}
|
|
else if(aChar == kLessThan) {
|
|
aToken->SetInError(PR_TRUE);
|
|
done = PR_TRUE;
|
|
}
|
|
}
|
|
}
|
|
} // End while
|
|
|
|
if (NS_FAILED(result)) {
|
|
aToken->SetInError(PR_TRUE);
|
|
|
|
if (!aScanner.IsIncremental()) {
|
|
result = NS_OK;
|
|
}
|
|
}
|
|
|
|
aToken->SetAttributeCount(theAttrCount);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method consumes a start tag and all of its attributes.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token. (allocated
|
|
* by the function using mTokenAllocator
|
|
* @param aScanner Our source of data
|
|
* @param aFlushTokens is an OUT parameter use to tell consumers to flush
|
|
* the current tokens after processing the current one.
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner,
|
|
PRBool& aFlushTokens)
|
|
{
|
|
// Remember this for later in case you have to unwind...
|
|
PRInt32 theDequeSize=mTokenDeque.GetSize();
|
|
nsresult result=NS_OK;
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
aToken=theAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown);
|
|
|
|
if(aToken) {
|
|
// Tell the new token to finish consuming text...
|
|
result= aToken->Consume(aChar,aScanner,mFlags);
|
|
|
|
if(NS_SUCCEEDED(result)) {
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
|
|
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
|
|
|
|
// Good. Now, let's see if the next char is ">".
|
|
// If so, we have a complete tag, otherwise, we have attributes.
|
|
result = aScanner.Peek(aChar);
|
|
if (NS_FAILED(result)) {
|
|
aToken->SetInError(PR_TRUE);
|
|
|
|
// Don't return early here so we can create a text and end token for
|
|
// the special <iframe>, <script> and similar tags down below.
|
|
result = NS_OK;
|
|
}
|
|
else {
|
|
if(kGreaterThan != aChar) { // Look for a '>'
|
|
result = ConsumeAttributes(aChar, aToken, aScanner);
|
|
}
|
|
else {
|
|
aScanner.GetChar(aChar);
|
|
}
|
|
}
|
|
|
|
/* Now that that's over with, we have one more problem to solve.
|
|
In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
|
consume all the content itself.
|
|
But XML doesn't treat these tags differently, so we shouldn't if the
|
|
document is XML.
|
|
*/
|
|
if(NS_SUCCEEDED(result) && !(mFlags & NS_IPARSER_FLAG_XML)) {
|
|
PRBool isCDATA = gHTMLElements[theTag].CanContainType(kCDATA);
|
|
PRBool isPCDATA = eHTMLTag_textarea == theTag ||
|
|
eHTMLTag_title == theTag;
|
|
|
|
// XXX This is an evil hack, we should be able to handle these properly
|
|
// in the DTD.
|
|
if ((eHTMLTag_iframe == theTag && (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
|
|
(eHTMLTag_noframes == theTag && (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) ||
|
|
(eHTMLTag_noscript == theTag && (mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED)) ||
|
|
(eHTMLTag_noembed == theTag)) {
|
|
isCDATA = PR_TRUE;
|
|
}
|
|
|
|
// Plaintext contains CDATA, but it's special, so we handle it
|
|
// differently than the other CDATA elements
|
|
if (eHTMLTag_plaintext == theTag) {
|
|
isCDATA = PR_FALSE;
|
|
|
|
// Note: We check in ConsumeToken() for this flag, and if we see it
|
|
// we only construct text tokens (which is what we want).
|
|
mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT;
|
|
}
|
|
|
|
|
|
if (isCDATA || isPCDATA) {
|
|
PRBool done = PR_FALSE;
|
|
nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag));
|
|
|
|
CToken* text =
|
|
theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
|
|
CTextToken* textToken = NS_STATIC_CAST(CTextToken*,text);
|
|
|
|
if (isCDATA) {
|
|
// The only tags that consume conservatively are <script> and
|
|
// <style>, the rest all consume until the end of the document.
|
|
result = textToken->ConsumeCharacterData(theTag==eHTMLTag_script ||
|
|
theTag==eHTMLTag_style,
|
|
theTag!=eHTMLTag_script,
|
|
aScanner,
|
|
endTagName,
|
|
mFlags,
|
|
done);
|
|
|
|
// Only flush tokens for <script>, to give ourselves more of a
|
|
// chance of allowing inlines to contain blocks.
|
|
aFlushTokens = done && theTag == eHTMLTag_script;
|
|
}
|
|
else if (isPCDATA) {
|
|
// Title is consumed conservatively in order to not regress
|
|
// bug 42945
|
|
result = textToken->ConsumeParsedCharacterData(
|
|
theTag==eHTMLTag_textarea,
|
|
theTag==eHTMLTag_title,
|
|
aScanner,
|
|
endTagName,
|
|
mFlags,
|
|
done);
|
|
|
|
// Note: we *don't* set aFlushTokens here.
|
|
}
|
|
|
|
// We want to do this unless result is kEOF, in which case we will
|
|
// simply unwind our stack and wait for more data anyway.
|
|
if (kEOF != result) {
|
|
AddToken(text,NS_OK,&mTokenDeque,theAllocator);
|
|
CToken* endToken = nsnull;
|
|
|
|
if (NS_SUCCEEDED(result) && done) {
|
|
PRUnichar theChar;
|
|
// Get the <
|
|
result = aScanner.GetChar(theChar);
|
|
NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan,
|
|
"CTextToken::Consume*Data is broken!");
|
|
#ifdef DEBUG
|
|
// Ensure we have a /
|
|
PRUnichar tempChar; // Don't change non-debug vars in debug-only code
|
|
result = aScanner.Peek(tempChar);
|
|
NS_ASSERTION(NS_SUCCEEDED(result) && tempChar == kForwardSlash,
|
|
"CTextToken::Consume*Data is broken!");
|
|
#endif
|
|
result = ConsumeEndTag(PRUnichar('/'),endToken,aScanner);
|
|
} else if (result == kFakeEndTag &&
|
|
!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result = NS_OK;
|
|
endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName);
|
|
AddToken(endToken,result,&mTokenDeque,theAllocator);
|
|
} else if (result == kFakeEndTag) {
|
|
// If we are here, we are both faking having seen the end tag
|
|
// and are in view-source.
|
|
result = NS_OK;
|
|
}
|
|
}
|
|
else {
|
|
IF_FREE(text, mTokenAllocator);
|
|
}
|
|
}
|
|
}
|
|
|
|
// This code is confusing, so pay attention.
|
|
// If you're here, it's because we were in the midst of consuming a start
|
|
// tag but ran out of data (not in the stream, but in this *part* of the
|
|
// stream. For simplicity, we have to unwind our input. Therefore, we pop
|
|
// and discard any new tokens we've cued this round. Later we can get
|
|
// smarter about this.
|
|
if(NS_FAILED(result)) {
|
|
while(mTokenDeque.GetSize()>theDequeSize) {
|
|
CToken* theToken=(CToken*)mTokenDeque.Pop();
|
|
IF_FREE(theToken, mTokenAllocator);
|
|
}
|
|
}
|
|
}
|
|
else IF_FREE(aToken, mTokenAllocator);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method consumes an end tag and any "attributes" that may come after it.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
// Get the "/" (we've already seen it with a Peek)
|
|
aScanner.GetChar(aChar);
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
aToken=theAllocator->CreateTokenOfType(eToken_end,eHTMLTag_unknown);
|
|
// Remember this for later in case you have to unwind...
|
|
PRInt32 theDequeSize=mTokenDeque.GetSize();
|
|
nsresult result=NS_OK;
|
|
|
|
if(aToken) {
|
|
// Tell the new token to finish consuming text...
|
|
result= aToken->Consume(aChar,aScanner,mFlags);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
if (NS_FAILED(result)) {
|
|
// Note that this early-return here is safe because we have not yet
|
|
// added any of our tokens to the queue (AddToken only adds the token if
|
|
// result is a success), so we don't need to fall through.
|
|
return result;
|
|
}
|
|
|
|
result = aScanner.Peek(aChar);
|
|
if (NS_FAILED(result)) {
|
|
aToken->SetInError(PR_TRUE);
|
|
|
|
// Note: We know here that the scanner is not incremental since if
|
|
// this peek fails, then we've already masked over a kEOF coming from
|
|
// the Consume() call above.
|
|
return NS_OK;
|
|
}
|
|
|
|
if(kGreaterThan != aChar) {
|
|
result = ConsumeAttributes(aChar, aToken, aScanner);
|
|
}
|
|
else {
|
|
aScanner.GetChar(aChar);
|
|
}
|
|
|
|
// Do the same thing as we do in ConsumeStartTag. Basically, if we've run
|
|
// out of room in this *section* of the document, pop all of the tokens
|
|
// we've consumed this round and wait for more data.
|
|
if(NS_FAILED(result)) {
|
|
while(mTokenDeque.GetSize()>theDequeSize) {
|
|
CToken* theToken=(CToken*)mTokenDeque.Pop();
|
|
IF_FREE(theToken, mTokenAllocator);
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "&" has been consumed
|
|
* and we know we're at the start of an entity.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
PRUnichar theChar;
|
|
nsresult result=aScanner.Peek(theChar, 1);
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
if (NS_SUCCEEDED(result)) {
|
|
if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) {
|
|
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
|
|
result=aToken->Consume(theChar,aScanner,mFlags);
|
|
|
|
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
|
|
IF_FREE(aToken, mTokenAllocator);
|
|
}
|
|
else {
|
|
if (!aScanner.IsIncremental() && result == kEOF) {
|
|
result=NS_OK; // Use as much of the entity as you can get.
|
|
}
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
return result;
|
|
}
|
|
}
|
|
// Oops, we're actually looking at plain text...
|
|
result = ConsumeText(aToken,aScanner);
|
|
}
|
|
else if (result == kEOF && !aScanner.IsIncremental()) {
|
|
// If the last character in the file is an &, consume it as text.
|
|
result = ConsumeText(aToken, aScanner);
|
|
if (aToken) {
|
|
aToken->SetInError(PR_TRUE);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is called just after whitespace has been
|
|
* consumed and we know we're at the start a whitespace run.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
// Get the whitespace character
|
|
aScanner.GetChar(aChar);
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
aToken = theAllocator->CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,mFlags);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "<!" has been consumed
|
|
* and we know we're at the start of a comment.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
// Get the "!"
|
|
aScanner.GetChar(aChar);
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,mFlags);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
|
|
if (kNotAComment == result) {
|
|
// AddToken has IF_FREE()'d our token, so...
|
|
result = ConsumeText(aToken, aScanner);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a known text char has
|
|
* been consumed and we should read a text run. Note: we actually ignore the
|
|
* first character of the text run so that we can consume invalid markup
|
|
* as text.
|
|
*
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeText(CToken*& aToken,nsScanner& aScanner)
|
|
{
|
|
nsresult result=NS_OK;
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
CTextToken* theToken = (CTextToken*)theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
|
|
if(theToken) {
|
|
PRUnichar ch=0;
|
|
result=theToken->Consume(ch,aScanner,mFlags);
|
|
if(NS_FAILED(result)) {
|
|
if(0==theToken->GetTextLength()){
|
|
IF_FREE(aToken, mTokenAllocator);
|
|
aToken = nsnull;
|
|
}
|
|
else result=NS_OK;
|
|
}
|
|
aToken = theToken;
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "<!" has been consumed.
|
|
* NOTE: Here we might consume DOCTYPE and "special" markups.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeSpecialMarkup(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
// Get the "!"
|
|
aScanner.GetChar(aChar);
|
|
|
|
nsresult result=NS_OK;
|
|
nsAutoString theBufCopy;
|
|
aScanner.Peek(theBufCopy, 20);
|
|
ToUpperCase(theBufCopy);
|
|
PRInt32 theIndex=theBufCopy.Find("DOCTYPE");
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
|
|
if(theIndex==kNotFound) {
|
|
if('['==theBufCopy.CharAt(0)) {
|
|
aToken = theAllocator->CreateTokenOfType(eToken_cdatasection,eHTMLTag_comment);
|
|
} else if (StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ELEMENT")) ||
|
|
StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ATTLIST")) ||
|
|
StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ENTITY")) ||
|
|
StringBeginsWith(theBufCopy, NS_LITERAL_STRING("NOTATION"))) {
|
|
aToken = theAllocator->CreateTokenOfType(eToken_markupDecl,eHTMLTag_markupDecl);
|
|
} else {
|
|
aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment);
|
|
}
|
|
}
|
|
else
|
|
aToken = theAllocator->CreateTokenOfType(eToken_doctypeDecl,eHTMLTag_doctypeDecl);
|
|
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,mFlags);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
|
|
if (result == kNotAComment) {
|
|
result = ConsumeText(aToken, aScanner);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a newline has been consumed.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
// Get the newline character
|
|
aScanner.GetChar(aChar);
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
aToken=theAllocator->CreateTokenOfType(eToken_newline,eHTMLTag_newline);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,mFlags);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is called just after a <? has been consumed.
|
|
*
|
|
* @param aChar The last character read from the scanner.
|
|
* @param aToken The OUT parameter that holds our resulting token.
|
|
* @param aScanner Our source of data
|
|
* @return Error result.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,
|
|
CToken*& aToken,
|
|
nsScanner& aScanner)
|
|
{
|
|
// Get the "?"
|
|
aScanner.GetChar(aChar);
|
|
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
aToken=theAllocator->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,mFlags);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
return result;
|
|
}
|