/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ /** * MODULE NOTES: * @update gess 4/1/98 * */ #include "nsHTMLTokenizer.h" #include "nsParser.h" #include "nsScanner.h" #include "nsElementTable.h" #include "nsHTMLEntities.h" #include "CParserContext.h" /************************************************************************ And now for the main class -- nsHTMLTokenizer... ************************************************************************/ static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID); static NS_DEFINE_IID(kClassIID, NS_HTMLTOKENIZER_IID); /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsHTMLTokenizer*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } static CTokenRecycler* gTokenRecycler=0; void nsHTMLTokenizer::FreeTokenRecycler(void) { if(gTokenRecycler) { delete gTokenRecycler; gTokenRecycler=0; } } /** * This method is defined in nsHTMLTokenizer.h. It is used to * cause the COM-like construction of an HTMLTokenizer. * * @update gess 4/8/98 * @param nsIParser** ptr to newly instantiated parser * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult,PRInt32 aMode,eParserDocType aDocType, eParserCommands aCommand) { NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr"); if (nsnull == aInstancePtrResult) { return NS_ERROR_NULL_POINTER; } nsHTMLTokenizer* it = new nsHTMLTokenizer(aMode,aDocType,aCommand); if (nsnull == it) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); } NS_IMPL_ADDREF(nsHTMLTokenizer) NS_IMPL_RELEASE(nsHTMLTokenizer) /** * Default constructor * * @update gess 4/9/98 * @param * @return */ nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode, eParserDocType aDocType, eParserCommands aCommand) : nsITokenizer(), mTokenDeque(0), mParseMode(aParseMode) { NS_INIT_REFCNT(); mDoXMLEmptyTags=PR_FALSE; mDocType=aDocType; mRecordTrailingContent=PR_FALSE; mParserCommand=aCommand; } /** * Destructor * * @update gess 4/9/98 * @param * @return */ nsHTMLTokenizer::~nsHTMLTokenizer(){ if(mTokenDeque.GetSize()){ CTokenDeallocator theDeallocator; mTokenDeque.ForEach(theDeallocator); } } /******************************************************************* Here begins the real working methods for the tokenizer. *******************************************************************/ void nsHTMLTokenizer::AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,CTokenRecycler* aRecycler) { if(aToken && aDeque) { if(NS_SUCCEEDED(aResult)) { aDeque->Push(aToken); } else { if(aRecycler) { aRecycler->RecycleToken(aToken); } else delete aToken; aToken=0; } } } /** * Retrieve a ptr to the global token recycler... * @update gess8/4/98 * @return ptr to recycler (or null) */ nsITokenRecycler* nsHTMLTokenizer::GetTokenRecycler(void) { //let's move to this once we eliminate the leaking of tokens... if(!gTokenRecycler) gTokenRecycler=new CTokenRecycler(); return gTokenRecycler; } /** * This method provides access to the topmost token in the tokenDeque. * The token is not really removed from the list. * @update gess8/2/98 * @return ptr to token */ CToken* nsHTMLTokenizer::PeekToken() { return (CToken*)mTokenDeque.PeekFront(); } /** * This method provides access to the topmost token in the tokenDeque. * The token is really removed from the list; if the list is empty we return 0. * @update gess8/2/98 * @return ptr to token or NULL */ CToken* nsHTMLTokenizer::PopToken() { CToken* result=nsnull; result=(CToken*)mTokenDeque.PopFront(); if(result) result->mUseCount=0; return result; } /** * * @update gess8/2/98 * @param * @return */ CToken* nsHTMLTokenizer::PushTokenFront(CToken* theToken) { mTokenDeque.PushFront(theToken); theToken->mUseCount=1; return theToken; } /** * * @update gess8/2/98 * @param * @return */ CToken* nsHTMLTokenizer::PushToken(CToken* theToken) { mTokenDeque.Push(theToken); theToken->mUseCount=1; return theToken; } /** * * @update gess12/29/98 * @param * @return */ PRInt32 nsHTMLTokenizer::GetCount(void) { return mTokenDeque.GetSize(); } /** * * @update gess12/29/98 * @param * @return */ CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){ return (CToken*)mTokenDeque.ObjectAt(anIndex); } nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk) { return NS_OK; } /** * * @update gess12/29/98 * @param * @return */ void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){ PRInt32 aCount=aDeque.GetSize(); //last but not least, let's check the misplaced content list. //if we find it, then we have to push it all into the body before continuing... PRInt32 anIndex=0; for(anIndex=0;anIndexCreateTokenOfType(eToken_attribute,eHTMLTag_unknown); if(theToken){ if(aLeadingWS.Length()) { nsString& theKey=((CAttributeToken*)theToken)->GetKey(); theKey=aLeadingWS; aLeadingWS.Truncate(0); } result=theToken->Consume(aChar,aScanner,PRBool(eViewSource==mParserCommand)); //tell new token to finish consuming text... //Much as I hate to do this, here's some special case code. //This handles the case of empty-tags in XML. Our last //attribute token will come through with a text value of "" //and a textkey of "/". We should destroy it, and tell the //start token it was empty. if(NS_SUCCEEDED(result)) { nsString& key=((CAttributeToken*)theToken)->GetKey(); nsString& text=theToken->GetStringValueXXX(); if((mDoXMLEmptyTags) && (kForwardSlash==key.CharAt(0)) && (0==text.Length())){ //tada! our special case! Treat it like an empty start tag... aToken->SetEmpty(PR_TRUE); theRecycler->RecycleToken(theToken); } else { theAttrCount++; AddToken(theToken,result,&mTokenDeque,theRecycler); } } else { //if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result){ aToken->SetEmpty(PR_TRUE); theRecycler->RecycleToken(theToken); if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result) result=NS_OK; } }//if if(NS_SUCCEEDED(result)){ result=aScanner.SkipWhitespace(); if(NS_SUCCEEDED(result)) { result=aScanner.Peek(aChar); if(NS_SUCCEEDED(result)) { if(aChar==kGreaterThan) { //you just ate the '>' aScanner.GetChar(aChar); //skip the '>' done=PR_TRUE; } else if(aChar==kLessThan) { eHTMLTags theEndTag = (eHTMLTags)aToken->GetTypeID(); if(result==NS_OK&&(gHTMLElements[theEndTag].mSkipTarget)){ CToken* theEndToken=theRecycler->CreateTokenOfType(eToken_end,theEndTag); AddToken(theEndToken,NS_OK,&mTokenDeque,theRecycler); } done=PR_TRUE; } }//if } }//if }//while aToken->SetAttributeCount(theAttrCount); return result; } /** * In the case that we just read the given tag, we should go and * consume all the input until we find a matching end tag. * @update gess12/28/98 * @param * @return */ nsresult nsHTMLTokenizer::ConsumeScriptContent(nsScanner& aScanner,CToken*& aToken) { nsresult result=NS_OK; return result; } /** * * @update gess12/28/98 * @param * @return */ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { PRInt32 theDequeSize=mTokenDeque.GetSize(); //remember this for later in case you have to unwind... nsresult result=NS_OK; CTokenRecycler* theRecycler=(CTokenRecycler*)GetTokenRecycler(); aToken=theRecycler->CreateTokenOfType(eToken_start,eHTMLTag_unknown); if(aToken) { ((CStartToken*)aToken)->mOrigin=aScanner.GetOffset()-1; // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204. PRBool isHTML=((eHTML3Text==mDocType) || (eHTML4Text==mDocType)); result= aToken->Consume(aChar,aScanner,isHTML); //tell new token to finish consuming text... if(NS_SUCCEEDED(result)) { AddToken(aToken,result,&mTokenDeque,theRecycler); eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID(); //Good. Now, let's see if the next char is ">". //If so, we have a complete tag, otherwise, we have attributes. mScratch.Truncate(0); PRBool theTagHasAttributes=PR_FALSE; if(NS_OK==result) { result=(eViewSource==mParserCommand) ? aScanner.ReadWhitespace(mScratch) : aScanner.SkipWhitespace(); aToken->mNewlineCount += aScanner.GetNewlinesSkipped(); if(NS_OK==result) { result=aScanner.GetChar(aChar); if(NS_OK==result) { if(kGreaterThan!=aChar) { //look for '>' //push that char back, since we apparently have attributes... result=aScanner.PutBack(aChar); theTagHasAttributes=PR_TRUE; } //if } //if }//if } if(theTagHasAttributes) { result=ConsumeAttributes(aChar,(CStartToken*)aToken,aScanner,mScratch); } /* Now that that's over with, we have one more problem to solve. In the case that we just read a