676 lines
20 KiB
C++
676 lines
20 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
|
|
|
|
/**
|
|
* MODULE NOTES:
|
|
* @update gess 4/1/98
|
|
*
|
|
*/
|
|
|
|
#include "nsHTMLTokenizer.h"
|
|
#include "nsParser.h"
|
|
#include "nsScanner.h"
|
|
#include "nsDTDUtils.h"
|
|
#include "nsElementTable.h"
|
|
#include "nsHTMLEntities.h"
|
|
|
|
/************************************************************************
|
|
And now for the main class -- nsHTMLTokenizer...
|
|
************************************************************************/
|
|
|
|
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
|
static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID);
|
|
static NS_DEFINE_IID(kClassIID, NS_HTMLTOKENIZER_IID);
|
|
static CTokenRecycler* gTokenRecycler=0;
|
|
|
|
/**
|
|
* This method gets called as part of our COM-like interfaces.
|
|
* Its purpose is to create an interface to parser object
|
|
* of some type.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIID id of object to discover
|
|
* @param aInstancePtr ptr to newly discovered interface
|
|
* @return NS_xxx result code
|
|
*/
|
|
nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|
{
|
|
if (NULL == aInstancePtr) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
|
|
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
|
*aInstancePtr = (nsIDTD*)(this);
|
|
}
|
|
else if(aIID.Equals(kITokenizerIID)) { //do IParser base class...
|
|
*aInstancePtr = (nsIDTD*)(this);
|
|
}
|
|
else if(aIID.Equals(kClassIID)) { //do this class...
|
|
*aInstancePtr = (nsHTMLTokenizer*)(this);
|
|
}
|
|
else {
|
|
*aInstancePtr=0;
|
|
return NS_NOINTERFACE;
|
|
}
|
|
NS_ADDREF_THIS();
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is defined in nsIParser. It is used to
|
|
* cause the COM-like construction of an nsParser.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIParser** ptr to newly instantiated parser
|
|
* @return NS_xxx error result
|
|
*/
|
|
NS_HTMLPARS nsresult NS_NewHTMLTokenizer(nsIDTD** aInstancePtrResult) {
|
|
nsHTMLTokenizer* it = new nsHTMLTokenizer();
|
|
|
|
if (it == 0) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
return it->QueryInterface(kClassIID, (void **) aInstancePtrResult);
|
|
}
|
|
|
|
|
|
NS_IMPL_ADDREF(nsHTMLTokenizer)
|
|
NS_IMPL_RELEASE(nsHTMLTokenizer)
|
|
|
|
|
|
/**
|
|
* Default constructor
|
|
*
|
|
* @update gess 4/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsHTMLTokenizer::nsHTMLTokenizer() : nsITokenizer(), mTokenDeque(new CTokenDeallocator()) {
|
|
NS_INIT_REFCNT();
|
|
mDoXMLEmptyTags=PR_FALSE;
|
|
}
|
|
|
|
/**
|
|
* Default constructor
|
|
*
|
|
* @update gess 4/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsHTMLTokenizer::~nsHTMLTokenizer(){
|
|
}
|
|
|
|
|
|
/*******************************************************************
|
|
Here begins the real working methods for the tokenizer.
|
|
*******************************************************************/
|
|
|
|
void nsHTMLTokenizer::AddToken(CToken*& aToken,nsresult aResult,nsDeque& aDeque) {
|
|
if(aToken) {
|
|
if(NS_SUCCEEDED(aResult)) {
|
|
aDeque.Push(aToken);
|
|
}
|
|
else {
|
|
delete aToken;
|
|
aToken=0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieve a ptr to the global token recycler...
|
|
* @update gess8/4/98
|
|
* @return ptr to recycler (or null)
|
|
*/
|
|
nsITokenRecycler* nsHTMLTokenizer::GetTokenRecycler(void) {
|
|
if (! gTokenRecycler) {
|
|
gTokenRecycler=new CTokenRecycler();
|
|
}
|
|
return gTokenRecycler;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method provides access to the topmost token in the tokenDeque.
|
|
* The token is not really removed from the list.
|
|
* @update gess8/2/98
|
|
* @return ptr to token
|
|
*/
|
|
CToken* nsHTMLTokenizer::PeekToken() {
|
|
return (CToken*)mTokenDeque.Peek();
|
|
}
|
|
|
|
|
|
/**
|
|
* This method provides access to the topmost token in the tokenDeque.
|
|
* The token is really removed from the list; if the list is empty we return 0.
|
|
* @update gess8/2/98
|
|
* @return ptr to token or NULL
|
|
*/
|
|
CToken* nsHTMLTokenizer::PopToken() {
|
|
return (CToken*)mTokenDeque.PopFront();
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess8/2/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CToken* nsHTMLTokenizer::PushTokenFront(CToken* theToken) {
|
|
mTokenDeque.PushFront(theToken);
|
|
return theToken;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess8/2/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CToken* nsHTMLTokenizer::PushToken(CToken* theToken) {
|
|
mTokenDeque.Push(theToken);
|
|
return theToken;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess12/29/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 nsHTMLTokenizer::GetCount(void) {
|
|
return mTokenDeque.GetSize();
|
|
}
|
|
|
|
CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){
|
|
return (CToken*)mTokenDeque.ObjectAt(anIndex);
|
|
}
|
|
|
|
/**
|
|
* This method repeatedly called by the tokenizer.
|
|
* Each time, we determine the kind of token were about to
|
|
* read, and then we call the appropriate method to handle
|
|
* that token type.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner) {
|
|
|
|
nsresult result=NS_OK;
|
|
if(NS_OK==result){
|
|
PRUnichar theChar;
|
|
result=aScanner.GetChar(theChar);
|
|
CToken* theToken=0;
|
|
switch(result) {
|
|
case kEOF:
|
|
//We convert from eof to complete here, because we never really tried to get data.
|
|
//All we did was try to see if data was available, which it wasn't.
|
|
//It's important to return process complete, so that controlling logic can know that
|
|
//everything went well, but we're done with token processing.
|
|
break;
|
|
|
|
case NS_OK:
|
|
default:
|
|
switch(theChar) {
|
|
case kLessThan:
|
|
result=ConsumeTag(theChar,theToken,aScanner);
|
|
break;
|
|
|
|
case kAmpersand:
|
|
result=ConsumeEntity(theChar,theToken,aScanner);
|
|
break;
|
|
|
|
case kCR: case kLF:
|
|
result=ConsumeNewline(theChar,theToken,aScanner);
|
|
break;
|
|
|
|
case kNotFound:
|
|
break;
|
|
|
|
default:
|
|
if(!nsString::IsSpace(theChar)) {
|
|
nsAutoString temp(theChar);
|
|
result=ConsumeText(temp,theToken,aScanner);
|
|
break;
|
|
}
|
|
result=ConsumeWhitespace(theChar,theToken,aScanner);
|
|
break;
|
|
} //switch
|
|
break;
|
|
} //switch
|
|
} //if
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is called just after a "<" has been consumed
|
|
* and we know we're at the start of some kind of tagged
|
|
* element. We don't know yet if it's a tag or a comment.
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param aChar is the last char read
|
|
* @param aScanner is represents our input source
|
|
* @param aToken is the out arg holding our new token
|
|
* @return error code.
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) {
|
|
|
|
nsresult result=aScanner.GetChar(aChar);
|
|
|
|
if(NS_OK==result) {
|
|
|
|
switch(aChar) {
|
|
case kForwardSlash:
|
|
PRUnichar ch;
|
|
result=aScanner.Peek(ch);
|
|
if(NS_OK==result) {
|
|
if(nsString::IsAlpha(ch)) {
|
|
result=ConsumeEndTag(aChar,aToken,aScanner);
|
|
}
|
|
else result=ConsumeComment(aChar,aToken,aScanner);
|
|
}//if
|
|
break;
|
|
|
|
case kExclamation:
|
|
result=ConsumeComment(aChar,aToken,aScanner);
|
|
break;
|
|
|
|
case kQuestionMark: //it must be an XML processing instruction...
|
|
result=ConsumeProcessingInstruction(aChar,aToken,aScanner);
|
|
break;
|
|
|
|
default:
|
|
if(nsString::IsAlpha(aChar))
|
|
result=ConsumeStartTag(aChar,aToken,aScanner);
|
|
else if(kEOF!=aChar) {
|
|
nsAutoString temp("<");
|
|
result=ConsumeText(temp,aToken,aScanner);
|
|
}
|
|
} //switch
|
|
|
|
} //if
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after we've consumed a start
|
|
* tag, and we now have to consume its attributes.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,CStartToken* aToken,nsScanner& aScanner) {
|
|
PRBool done=PR_FALSE;
|
|
nsresult result=NS_OK;
|
|
PRInt16 theAttrCount=0;
|
|
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
|
|
while((!done) && (result==NS_OK)) {
|
|
CToken* theToken= (CAttributeToken*)theRecycler->CreateTokenOfType(eToken_attribute,eHTMLTag_unknown);
|
|
if(theToken){
|
|
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
|
|
|
//Much as I hate to do this, here's some special case code.
|
|
//This handles the case of empty-tags in XML. Our last
|
|
//attribute token will come through with a text value of ""
|
|
//and a textkey of "/". We should destroy it, and tell the
|
|
//start token it was empty.
|
|
if(NS_SUCCEEDED(result)) {
|
|
nsString& key=((CAttributeToken*)theToken)->GetKey();
|
|
nsString& text=theToken->GetStringValueXXX();
|
|
if((mDoXMLEmptyTags) && (key[0]==kForwardSlash) && (0==text.Length())){
|
|
//tada! our special case! Treat it like an empty start tag...
|
|
aToken->SetEmpty(PR_TRUE);
|
|
theRecycler->RecycleToken(theToken);
|
|
}
|
|
else {
|
|
theAttrCount++;
|
|
AddToken(theToken,result,mTokenDeque);
|
|
}
|
|
}
|
|
else { //if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result){
|
|
aToken->SetEmpty(PR_TRUE);
|
|
theRecycler->RecycleToken(theToken);
|
|
result=NS_OK;
|
|
}
|
|
}//if
|
|
|
|
if(NS_SUCCEEDED(result)){
|
|
result=aScanner.SkipWhitespace();
|
|
if(NS_SUCCEEDED(result)) {
|
|
result=aScanner.Peek(aChar);
|
|
if(NS_SUCCEEDED(result) && (aChar==kGreaterThan)) { //you just ate the '>'
|
|
aScanner.GetChar(aChar); //skip the '>'
|
|
done=PR_TRUE;
|
|
}//if
|
|
}
|
|
}//if
|
|
}//while
|
|
|
|
aToken->SetAttributeCount(theAttrCount);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This is a special case method. It's job is to consume
|
|
* all of the given tag up to an including the end tag.
|
|
*
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeContentToEndTag(PRUnichar aChar,
|
|
eHTMLTags aChildTag,
|
|
nsScanner& aScanner,
|
|
CToken*& aToken){
|
|
|
|
//In the case that we just read the given tag, we should go and
|
|
//consume all the input until we find a matching end tag.
|
|
|
|
nsAutoString endTag("</");
|
|
endTag.Append(NS_EnumToTag(aChildTag));
|
|
endTag.Append(">");
|
|
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken=theRecycler->CreateTokenOfType(eToken_skippedcontent,aChildTag,endTag);
|
|
return aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess12/28/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLTokenizer::HandleSkippedContent(nsScanner& aScanner,CToken*& aToken) {
|
|
nsresult result=NS_OK;
|
|
|
|
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
|
|
if(eHTMLTag_unknown!=gHTMLElements[theTag].mSkipTarget) {
|
|
|
|
//Do special case handling for <script>, <style>, <title> or <textarea>...
|
|
CToken* skippedToken=0;
|
|
PRUnichar theChar=0;
|
|
result=ConsumeContentToEndTag(theChar,gHTMLElements[theTag].mSkipTarget,aScanner,skippedToken);
|
|
|
|
if((NS_OK==result) && skippedToken){
|
|
AddToken(skippedToken,result,mTokenDeque);
|
|
|
|
//In the case that we just read a given tag, we should go and
|
|
//consume all the tag content itself (and throw it all away).
|
|
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
nsString& theTagStr=skippedToken->GetStringValueXXX();
|
|
CToken* endtoken=theRecycler->CreateTokenOfType(eToken_end,theTag,theTagStr);
|
|
if(endtoken){
|
|
nsAutoString temp;
|
|
theTagStr.Mid(temp,2,theTagStr.Length()-3);
|
|
//now strip the leading and trailing delimiters...
|
|
endtoken->Reinitialize(theTag,temp);
|
|
AddToken(endtoken,result,mTokenDeque);
|
|
}
|
|
} //if
|
|
} //if
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess12/28/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) {
|
|
PRInt32 theDequeSize=mTokenDeque.GetSize(); //remember this for later in case you have to unwind...
|
|
nsresult result=NS_OK;
|
|
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken=theRecycler->CreateTokenOfType(eToken_start,eHTMLTag_unknown);
|
|
|
|
if(aToken) {
|
|
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
|
if(NS_SUCCEEDED(result)) {
|
|
|
|
AddToken(aToken,result,mTokenDeque);
|
|
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
|
|
|
|
if(((CStartToken*)aToken)->IsAttributed()) {
|
|
result=ConsumeAttributes(aChar,(CStartToken*)aToken,aScanner);
|
|
}
|
|
|
|
//now that that's over with, we have one more problem to solve.
|
|
//In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and
|
|
//consume all the content itself.
|
|
if(NS_SUCCEEDED(result)) {
|
|
result=HandleSkippedContent(aScanner,aToken);
|
|
}
|
|
|
|
//EEEEECCCCKKKK!!!
|
|
//This code is confusing, so pay attention.
|
|
//If you're here, it's because we were in the midst of consuming a start
|
|
//tag but ran out of data (not in the stream, but in this *part* of the stream.
|
|
//For simplicity, we have to unwind our input. Therefore, we pop and discard
|
|
//any new tokens we've cued this round. Later we can get smarter about this.
|
|
if(!NS_SUCCEEDED(result)) {
|
|
while(mTokenDeque.GetSize()>theDequeSize) {
|
|
theRecycler->RecycleToken((CToken*)mTokenDeque.Pop());
|
|
}
|
|
}
|
|
} //if
|
|
else theRecycler->RecycleToken(aToken);
|
|
} //if
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess12/28/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) {
|
|
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken=theRecycler->CreateTokenOfType(eToken_end,eHTMLTag_unknown);
|
|
nsresult result=NS_OK;
|
|
|
|
if(aToken) {
|
|
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
|
AddToken(aToken,result,mTokenDeque);
|
|
} //if
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "&" has been consumed
|
|
* and we know we're at the start of an entity.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) {
|
|
PRUnichar theChar;
|
|
nsresult result=aScanner.GetChar(theChar);
|
|
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
if(NS_OK==result) {
|
|
if(nsString::IsAlpha(theChar)) { //handle common enity references &xxx; or �.
|
|
aToken = theRecycler->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
|
|
result = aToken->Consume(theChar,aScanner); //tell new token to finish consuming text...
|
|
}
|
|
else if(kHashsign==theChar) {
|
|
aToken = theRecycler->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
|
|
result=aToken->Consume(theChar,aScanner);
|
|
}
|
|
else {
|
|
//oops, we're actually looking at plain text...
|
|
nsAutoString temp("&");
|
|
aScanner.PutBack(theChar);
|
|
return ConsumeText(temp,aToken,aScanner);
|
|
}//if
|
|
if(aToken){
|
|
char cbuf[30];
|
|
nsString& theStr=aToken->GetStringValueXXX();
|
|
theStr.ToCString(cbuf, sizeof(cbuf)-1);
|
|
if((kHashsign!=theChar) && (-1==NS_EntityToUnicode(cbuf))){
|
|
//if you're here we have a bogus entity.
|
|
//convert it into a text token.
|
|
nsAutoString temp("&");
|
|
temp.Append(theStr);
|
|
CToken* theToken=theRecycler->CreateTokenOfType(eToken_text,eHTMLTag_text,temp);
|
|
theRecycler->RecycleToken(aToken);
|
|
aToken=theToken;
|
|
}
|
|
AddToken(aToken,result,mTokenDeque);
|
|
}
|
|
}//if
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is called just after whitespace has been
|
|
* consumed and we know we're at the start a whitespace run.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) {
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken = theRecycler->CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner);
|
|
AddToken(aToken,result,mTokenDeque);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "<!" has been consumed
|
|
* and we know we're at the start of a comment.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken = theRecycler->CreateTokenOfType(eToken_comment,eHTMLTag_comment);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner);
|
|
AddToken(aToken,result,mTokenDeque);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a known text char has
|
|
* been consumed and we should read a text run.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeText(const nsString& aString,CToken*& aToken,nsScanner& aScanner){
|
|
nsresult result=NS_OK;
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken=theRecycler->CreateTokenOfType(eToken_text,eHTMLTag_text,aString);
|
|
if(aToken) {
|
|
PRUnichar ch=0;
|
|
result=aToken->Consume(ch,aScanner);
|
|
if(result) {
|
|
nsString& temp=aToken->GetStringValueXXX();
|
|
if(0==temp.Length()){
|
|
delete aToken;
|
|
aToken = nsnull;
|
|
}
|
|
else result=NS_OK;
|
|
}
|
|
AddToken(aToken,result,mTokenDeque);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a newline has been consumed.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param aToken is the newly created newline token that is parsing
|
|
* @return error code
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken=theRecycler->CreateTokenOfType(eToken_newline,eHTMLTag_newline);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner);
|
|
AddToken(aToken,result,mTokenDeque);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is called just after a ? has been consumed.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param aToken is the newly created newline token that is parsing
|
|
* @return error code
|
|
*/
|
|
nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){
|
|
CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler();
|
|
aToken=theRecycler->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown);
|
|
nsresult result=NS_OK;
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner);
|
|
AddToken(aToken,result,mTokenDeque);
|
|
}
|
|
return result;
|
|
}
|
|
|