/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ /** * MODULE NOTES: * @update gess 4/1/98 * */ #include "nsHTMLTokenizer.h" #include "nsParser.h" #include "nsScanner.h" #include "nsElementTable.h" #include "nsHTMLEntities.h" #include "CParserContext.h" #include "nsReadableUtils.h" /************************************************************************ And now for the main class -- nsHTMLTokenizer... ************************************************************************/ static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID); static NS_DEFINE_IID(kClassIID, NS_HTMLTOKENIZER_IID); /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsISupports*)(this); } else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... *aInstancePtr = (nsITokenizer*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsHTMLTokenizer*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } /** * This method is defined in nsHTMLTokenizer.h. It is used to * cause the COM-like construction of an HTMLTokenizer. * * @update gess 4/8/98 * @param nsIParser** ptr to newly instantiated parser * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult,PRInt32 aMode,eParserDocType aDocType, eParserCommands aCommand) { NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr"); if (nsnull == aInstancePtrResult) { return NS_ERROR_NULL_POINTER; } nsHTMLTokenizer* it = new nsHTMLTokenizer(aMode,aDocType,aCommand); if (nsnull == it) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); } NS_IMPL_ADDREF(nsHTMLTokenizer) NS_IMPL_RELEASE(nsHTMLTokenizer) /** * Default constructor * * @update gess 4/9/98 * @param * @return */ nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode, eParserDocType aDocType, eParserCommands aCommand) : nsITokenizer(), mTokenDeque(0), mParseMode(aParseMode) { NS_INIT_REFCNT(); mDoXMLEmptyTags=((eDTDMode_strict==aParseMode) || (eDTDMode_transitional==aParseMode)); mDocType=aDocType; mRecordTrailingContent=PR_FALSE; mParserCommand=aCommand; mTokenAllocator=nsnull; mTokenScanPos=0; } /** * Destructor * * @update gess 4/9/98 * @param * @return */ nsHTMLTokenizer::~nsHTMLTokenizer(){ if(mTokenDeque.GetSize()){ CTokenDeallocator theDeallocator(mTokenAllocator->GetArenaPool()); mTokenDeque.ForEach(theDeallocator); } } /******************************************************************* Here begins the real working methods for the tokenizer. *******************************************************************/ void nsHTMLTokenizer::AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,nsTokenAllocator* aTokenAllocator) { if(aToken && aDeque) { if(NS_SUCCEEDED(aResult)) { aDeque->Push(aToken); } else { IF_FREE(aToken, aTokenAllocator); } } } /** * Retrieve a ptr to the global token recycler... * @update gess8/4/98 * @return ptr to recycler (or null) */ nsTokenAllocator* nsHTMLTokenizer::GetTokenAllocator(void) { return mTokenAllocator; } /** * This method provides access to the topmost token in the tokenDeque. * The token is not really removed from the list. * @update gess8/2/98 * @return ptr to token */ CToken* nsHTMLTokenizer::PeekToken() { return (CToken*)mTokenDeque.PeekFront(); } /** * This method provides access to the topmost token in the tokenDeque. * The token is really removed from the list; if the list is empty we return 0. * @update gess8/2/98 * @return ptr to token or NULL */ CToken* nsHTMLTokenizer::PopToken() { CToken* result=nsnull; result=(CToken*)mTokenDeque.PopFront(); return result; } /** * * @update gess8/2/98 * @param * @return */ CToken* nsHTMLTokenizer::PushTokenFront(CToken* theToken) { mTokenDeque.PushFront(theToken); return theToken; } /** * * @update gess8/2/98 * @param * @return */ CToken* nsHTMLTokenizer::PushToken(CToken* theToken) { mTokenDeque.Push(theToken); return theToken; } /** * * @update gess12/29/98 * @param * @return */ PRInt32 nsHTMLTokenizer::GetCount(void) { return mTokenDeque.GetSize(); } /** * * @update gess12/29/98 * @param * @return */ CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){ return (CToken*)mTokenDeque.ObjectAt(anIndex); } /** * @update gess 12/29/98 * @update harishd 08/04/00 * @param * @return */ nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk,nsTokenAllocator* aTokenAllocator) { mTokenAllocator=aTokenAllocator; mIsFinalChunk=aIsFinalChunk; mTokenScanPos=mTokenDeque.GetSize()+1; //cause scanDocStructure to search from here for new tokens... return NS_OK; } /** * * @update gess12/29/98 * @param * @return */ void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){ PRInt32 aCount=aDeque.GetSize(); //last but not least, let's check the misplaced content list. //if we find it, then we have to push it all into the body before continuing... PRInt32 anIndex=0; for(anIndex=0;anIndexGetTypeID(); if(theTag==aTag) { return theCount; } } } return kNotFound; } /** * This method scans the sequence of tokens to determine the * well formedness of each tag structure. This is used to * disable residual-style handling in well formed cases. * * @update gess 1Sep2000 * @param * @return */ nsresult nsHTMLTokenizer::ScanDocStructure(PRBool aFinalChunk) { nsresult result=NS_OK; CHTMLToken *theRootToken=0; //*** start by finding the first start tag that hasn't been reviewed. while(mTokenScanPos>0) { theRootToken=(CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos); if(theRootToken) { eHTMLTokenTypes theType=eHTMLTokenTypes(theRootToken->GetTokenType()); if(eToken_start==theType) { if(eFormUnknown==theRootToken->GetContainerInfo()) { break; } } } mTokenScanPos--; } /*---------------------------------------------------------------------- * Now that we know where to start, let's walk through the * tokens to see which are well-formed. Stop when you run out * of fresh tokens. *---------------------------------------------------------------------*/ theRootToken=(CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos); //init to root nsDeque theStack(0); eHTMLTags theRootTag=eHTMLTag_unknown; CHTMLToken *theToken=theRootToken; //init to root PRInt32 theStackDepth=0; static const PRInt32 theMaxStackDepth=200; //dont bother if we get ridiculously deep. while(theToken && (theStackDepthGetTokenType()); eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); PRBool theTagIsContainer=nsHTMLElement::IsContainer(theTag); //bug54117... if(theTagIsContainer) { PRBool theTagIsBlock=gHTMLElements[theTag].IsMemberOf(kBlockEntity); PRBool theTagIsInline= (theTagIsBlock) ? PR_FALSE : gHTMLElements[theTag].IsMemberOf(kInlineEntity); if(theTagIsBlock || theTagIsInline || (eHTMLTag_table==theTag)) { switch(theType) { case eToken_start: if(0==theStack.GetSize()) { //track the tag on the top of the stack... theRootToken=theToken; theRootTag=theTag; } theStack.Push(theToken); theStackDepth++; break; case eToken_end: { CHTMLToken *theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); if(theLastToken) { if(theTag==theLastToken->GetTypeID()) { theStack.Pop(); //yank it for real theStackDepth--; theLastToken->SetContainerInfo(eWellFormed); //in addition, let's look above this container to see if we can find //any tags that are already marked malformed. If so, pop them too! theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); while(theLastToken) { if(eMalformed==theRootToken->GetContainerInfo()) { theStack.Pop(); //yank the malformed token for real. theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); continue; } break; } } else { //the topmost token isn't what we expected, so that container must //be malformed. If the tag is a block, we don't really care (but we'll //mark it anyway). If it's an inline we DO care, especially if the //inline tried to contain a block (that's when RS handling kicks in). if(theTagIsInline) { PRInt32 theIndex=FindLastIndexOfTag(theTag,theStack); if(kNotFound!=theIndex) { theToken=(CHTMLToken*)theStack.ObjectAt(theIndex); theToken->SetContainerInfo(eMalformed); } //otherwise we ignore an out-of-place end tag. } else { } } } } break; default: break; } //switch } } theToken=(CHTMLToken*)mTokenDeque.ObjectAt(++mTokenScanPos); } return result; } nsresult nsHTMLTokenizer::DidTokenize(PRBool aFinalChunk) { return ScanDocStructure(aFinalChunk); } /** * This method repeatedly called by the tokenizer. * Each time, we determine the kind of token were about to * read, and then we call the appropriate method to handle * that token type. * * @update gess 3/25/98 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param anErrorCode: arg that will hold error condition * @return new token or null */ nsresult nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens) { PRUnichar theChar; CToken* theToken=0; nsresult result=aScanner.Peek(theChar); switch(result) { case kEOF: //We convert from eof to complete here, because we never really tried to get data. //All we did was try to see if data was available, which it wasn't. //It's important to return process complete, so that controlling logic can know that //everything went well, but we're done with token processing. return result; case NS_OK: default: if(ePlainText!=mDocType) { if(kLessThan==theChar) { return ConsumeTag(theChar,theToken,aScanner,aFlushTokens); } else if(kAmpersand==theChar){ return ConsumeEntity(theChar,theToken,aScanner); } } if((kCR==theChar) || (kLF==theChar)) { return ConsumeNewline(theChar,theToken,aScanner); } else { if(!nsCRT::IsAsciiSpace(theChar)) { if(theChar!=nsnull) { result=ConsumeText(theToken,aScanner); } else { aScanner.GetChar(theChar); // skip the embedded null char. Fix bug 64098. } break; } result=ConsumeWhitespace(theChar,theToken,aScanner); } break; } //switch return result; } /** * This method is called just after a "<" has been consumed * and we know we're at the start of some kind of tagged * element. We don't know yet if it's a tag or a comment. * * @update gess 5/12/98 * @param aChar is the last char read * @param aScanner is represents our input source * @param aToken is the out arg holding our new token * @return error code. */ nsresult nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { PRUnichar theNextChar, oldChar; nsresult result=aScanner.Peek(aChar,1); if(NS_OK==result) { switch(aChar) { case kForwardSlash: // Get the original "<" (we've already seen it with a Peek) aScanner.GetChar(oldChar); result=aScanner.Peek(theNextChar, 1); if(NS_OK==result) { if(nsCRT::IsAsciiAlpha(theNextChar)||(kGreaterThan==theNextChar)) { result=ConsumeEndTag(aChar,aToken,aScanner); } else result=ConsumeComment(aChar,aToken,aScanner); }//if break; case kExclamation: // Get the original "<" (we've already seen it with a Peek) aScanner.GetChar(oldChar); result=aScanner.Peek(theNextChar, 1); if(NS_OK==result) { if((kMinus==theNextChar) || (kGreaterThan==theNextChar)) { result=ConsumeComment(aChar,aToken,aScanner); } else result=ConsumeSpecialMarkup(aChar,aToken,aScanner); } break; case kQuestionMark: //it must be an XML processing instruction... // Get the original "<" (we've already seen it with a Peek) aScanner.GetChar(oldChar); result=ConsumeProcessingInstruction(aChar,aToken,aScanner); break; default: if(nsCRT::IsAsciiAlpha(aChar)) { // Get the original "<" (we've already seen it with a Peek) aScanner.GetChar(oldChar); result=ConsumeStartTag(aChar,aToken,aScanner,aFlushTokens); } else { // We are not dealing with a tag. So, don't consume the original // char and leave the decision to ConsumeText(). result=ConsumeText(aToken,aScanner); } } //switch } //if return result; } /** * This method is called just after we've consumed a start * tag, and we now have to consume its attributes. * * @update rickg 03.23.2000 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param aLeadingWS: contains ws chars that preceeded the first attribute * @return */ nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar,CStartToken* aToken,nsScanner& aScanner) { PRBool done=PR_FALSE; nsresult result=NS_OK; PRInt16 theAttrCount=0; nsTokenAllocator* theAllocator=this->GetTokenAllocator(); while((!done) && (result==NS_OK)) { CAttributeToken* theToken= NS_STATIC_CAST(CAttributeToken*, theAllocator->CreateTokenOfType(eToken_attribute,eHTMLTag_unknown)); if(theToken){ result=theToken->Consume(aChar,aScanner,PRBool(eViewSource==mParserCommand)); //tell new token to finish consuming text... //Much as I hate to do this, here's some special case code. //This handles the case of empty-tags in XML. Our last //attribute token will come through with a text value of "" //and a textkey of "/". We should destroy it, and tell the //start token it was empty. if(NS_SUCCEEDED(result)) { PRBool isUsableAttr=PR_TRUE; const nsAReadableString& key=theToken->GetKey(); const nsAReadableString& text=theToken->GetValue(); // support XML like syntax to fix bugs like 44186 if(!key.IsEmpty() && kForwardSlash==key.First() && text.IsEmpty()) { aToken->SetEmpty(PR_TRUE); isUsableAttr=!mDoXMLEmptyTags; } if(isUsableAttr) { theAttrCount++; AddToken((CToken*&)theToken,result,&mTokenDeque,theAllocator); } else { IF_FREE(theToken, mTokenAllocator); } } else { //if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result){ aToken->SetEmpty(PR_TRUE); IF_FREE(theToken, mTokenAllocator); if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result) result=NS_OK; } }//if if(NS_SUCCEEDED(result)){ result=aScanner.SkipWhitespace(); if(NS_SUCCEEDED(result)) { result=aScanner.Peek(aChar); if(NS_SUCCEEDED(result)) { if(aChar==kGreaterThan) { //you just ate the '>' aScanner.GetChar(aChar); //skip the '>' done=PR_TRUE; } else if(aChar==kLessThan) { done=PR_TRUE; } }//if } }//if }//while aToken->SetAttributeCount(theAttrCount); return result; } /** * In the case that we just read the given tag, we should go and * consume all the input until we find a matching end tag. * @update gess12/28/98 * @param * @return */ nsresult nsHTMLTokenizer::ConsumeScriptContent(nsScanner& aScanner,CToken*& aToken) { nsresult result=NS_OK; return result; } /** * * @update gess12/28/98 * @param * @return */ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { PRInt32 theDequeSize=mTokenDeque.GetSize(); //remember this for later in case you have to unwind... nsresult result=NS_OK; nsTokenAllocator* theAllocator=this->GetTokenAllocator(); aToken=theAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown); if(aToken) { // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204. nsReadingIterator origin; aScanner.CurrentPosition(origin); PRBool isHTML=((eHTML3Text==mDocType) || (eHTML4Text==mDocType)); result= aToken->Consume(aChar,aScanner,isHTML); //tell new token to finish consuming text... if(NS_SUCCEEDED(result)) { AddToken(aToken,result,&mTokenDeque,theAllocator); eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID(); //Good. Now, let's see if the next char is ">". //If so, we have a complete tag, otherwise, we have attributes. PRBool theTagHasAttributes=PR_FALSE; nsReadingIterator start, end; if(NS_OK==result) { result=(eViewSource==mParserCommand) ? aScanner.ReadWhitespace(start, end) : aScanner.SkipWhitespace(); aToken->mNewlineCount += aScanner.GetNewlinesSkipped(); if(NS_OK==result) { result=aScanner.Peek(aChar); if(NS_OK==result) { if(kGreaterThan!=aChar) { //look for '>' //push that char back, since we apparently have attributes... theTagHasAttributes=PR_TRUE; } //if else { aScanner.GetChar(aChar); } } //if }//if } CStartToken* theStartToken=NS_STATIC_CAST(CStartToken*,aToken); if(theTagHasAttributes) { if (eViewSource==mParserCommand) { // Since we conserve whitespace in view-source mode, // go back to the beginning of the whitespace section // and let the first attribute grab it. aScanner.SetPosition(start, PR_FALSE, PR_TRUE); } result=ConsumeAttributes(aChar,theStartToken,aScanner); } /* Now that that's over with, we have one more problem to solve. In the case that we just read a // Note: if aFlushTokens is TRUE then we have seen an if(!theStartToken->IsEmpty() || aFlushTokens) { theStartToken->SetEmpty(PR_FALSE); // Setting this would make cases like work. CToken* endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName); AddToken(text,result,&mTokenDeque,theAllocator); AddToken(endToken,result,&mTokenDeque,theAllocator); } else { IF_FREE(text, mTokenAllocator); } } } //EEEEECCCCKKKK!!! //This code is confusing, so pay attention. //If you're here, it's because we were in the midst of consuming a start //tag but ran out of data (not in the stream, but in this *part* of the stream. //For simplicity, we have to unwind our input. Therefore, we pop and discard //any new tokens we've cued this round. Later we can get smarter about this. if(!NS_SUCCEEDED(result)) { while(mTokenDeque.GetSize()>theDequeSize) { CToken* theToken=(CToken*)mTokenDeque.Pop(); IF_FREE(theToken, mTokenAllocator); } } } //if else IF_FREE(aToken, mTokenAllocator); } //if return result; } /** * * @update gess12/28/98 * @param * @return */ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { // Get the "/" (we've already seen it with a Peek) aScanner.GetChar(aChar); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); aToken=theAllocator->CreateTokenOfType(eToken_end,eHTMLTag_unknown); nsresult result=NS_OK; if(aToken) { PRBool isHTML=((eHTML3Text==mDocType) || (eHTML4Text==mDocType)); result= aToken->Consume(aChar,aScanner,isHTML); //tell new token to finish consuming text... AddToken(aToken,result,&mTokenDeque,theAllocator); if(NS_SUCCEEDED(result)) { eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID(); if(((theTag==eHTMLTag_textarea) || (theTag==eHTMLTag_xmp) || (theTag==eHTMLTag_plaintext) || (theTag==eHTMLTag_noscript)) && mRecordTrailingContent) { mRecordTrailingContent=PR_FALSE; } } } //if return result; } /** * This method is called just after a "&" has been consumed * and we know we're at the start of an entity. * * @update gess 3/25/98 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param anErrorCode: arg that will hold error condition * @return new token or null */ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { PRUnichar theChar; nsresult result=aScanner.Peek(theChar, 1); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); if(NS_OK==result) { if(nsCRT::IsAsciiAlpha(theChar)) { //handle common enity references &xxx; or �. // Get the "&" aScanner.GetChar(theChar); aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); // Get the first entity character aScanner.GetChar(theChar); result = aToken->Consume(theChar,aScanner,mParseMode); //tell new token to finish consuming text... } else if(kHashsign==theChar) { // Get the "&" aScanner.GetChar(theChar); aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); // Get the first numerical entity character aScanner.GetChar(theChar); result=aToken->Consume(theChar,aScanner,mParseMode); } else { //oops, we're actually looking at plain text... return ConsumeText(aToken,aScanner); }//if if(aToken){ if(mIsFinalChunk && (kEOF==result)) { result=NS_OK; //use as much of the entity as you can get. } AddToken(aToken,result,&mTokenDeque,theAllocator); } }//if return result; } /** * This method is called just after whitespace has been * consumed and we know we're at the start a whitespace run. * * @update gess 3/25/98 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param anErrorCode: arg that will hold error condition * @return new token or null */ nsresult nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { // Get the whitespace character aScanner.GetChar(aChar); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); aToken = theAllocator->CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace); nsresult result=NS_OK; if(aToken) { result=aToken->Consume(aChar,aScanner,mParseMode); AddToken(aToken,result,&mTokenDeque,theAllocator); } return result; } /** * This method is called just after a "GetTokenAllocator(); aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment); nsresult result=NS_OK; if(aToken) { result=aToken->Consume(aChar,aScanner,mParseMode); AddToken(aToken,result,&mTokenDeque,theAllocator); } return result; } /** * This method is called just after a known text char has * been consumed and we should read a text run. * * @update gess 3/25/98 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param anErrorCode: arg that will hold error condition * @return new token or null */ nsresult nsHTMLTokenizer::ConsumeText(CToken*& aToken,nsScanner& aScanner){ nsresult result=NS_OK; nsTokenAllocator* theAllocator=this->GetTokenAllocator(); CTextToken* theToken = (CTextToken*)theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); if(theToken) { PRUnichar ch=0; result=theToken->Consume(ch,aScanner,mParseMode); if(!NS_SUCCEEDED(result)) { if(0==theToken->GetTextLength()){ IF_FREE(aToken, mTokenAllocator); aToken = nsnull; } else result=NS_OK; } aToken = theToken; AddToken(aToken,result,&mTokenDeque,theAllocator); } return result; } /** * This method is called just after a "GetTokenAllocator(); if(theIndex==kNotFound) { if('['==theBufCopy.CharAt(0)) { aToken = theAllocator->CreateTokenOfType(eToken_cdatasection,eHTMLTag_comment); } else if (theBufCopy.EqualsWithConversion("ELEMENT",PR_FALSE,7) || theBufCopy.EqualsWithConversion("ATTLIST",PR_FALSE,7) || theBufCopy.EqualsWithConversion("ENTITY",PR_FALSE,6) || theBufCopy.EqualsWithConversion("NOTATION",PR_FALSE,8)) { aToken = theAllocator->CreateTokenOfType(eToken_markupDecl,eHTMLTag_markupDecl); } else { aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment); } } else aToken = theAllocator->CreateTokenOfType(eToken_doctypeDecl,eHTMLTag_doctypeDecl); if(aToken) { result=aToken->Consume(aChar,aScanner,mParseMode); AddToken(aToken,result,&mTokenDeque,theAllocator); } return result; } /** * This method is called just after a newline has been consumed. * * @update gess 3/25/98 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param aToken is the newly created newline token that is parsing * @return error code */ nsresult nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ // Get the newline character aScanner.GetChar(aChar); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); aToken=theAllocator->CreateTokenOfType(eToken_newline,eHTMLTag_newline); nsresult result=NS_OK; if(aToken) { result=aToken->Consume(aChar,aScanner,mParseMode); AddToken(aToken,result,&mTokenDeque,theAllocator); } return result; } /** * This method is called just after a ? has been consumed. * * @update gess 3/25/98 * @param aChar: last char read * @param aScanner: see nsScanner.h * @param aToken is the newly created newline token that is parsing * @return error code */ nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ // Get the "?" aScanner.GetChar(aChar); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); aToken=theAllocator->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown); nsresult result=NS_OK; if(aToken) { result=aToken->Consume(aChar,aScanner,mParseMode); AddToken(aToken,result,&mTokenDeque,theAllocator); } return result; } /** * This method keeps a copy of contents within the start token. * The stored content could later be used in displaying TEXTAREA, * and also in view source. * * @update harishd 11/09/99 * @param aStartToken: The token whose trailing contents are to be recorded * @param aScanner: see nsScanner.h * */ void nsHTMLTokenizer::RecordTrailingContent(CStartToken* aStartToken, nsScanner& aScanner, nsReadingIterator aOrigin) { if(aStartToken) { nsReadingIterator theCurrentPosition; aScanner.CurrentPosition(theCurrentPosition); nsString& trailingContent =aStartToken->mTrailingContent; PRUint32 oldLength = trailingContent.Length(); trailingContent.SetLength(oldLength + Distance(aOrigin, theCurrentPosition)); nsWritingIterator beginWriting; trailingContent.BeginWriting(beginWriting); beginWriting.advance(oldLength); copy_string( aOrigin, theCurrentPosition, beginWriting ); } }