/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ #include "nsDebug.h" #include "nsIDTDDebug.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" #include "nsParser.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsScanner.h" #include "nsTokenHandler.h" #include "nsIDTDDebug.h" #include "prenv.h" //this is here for debug reasons... #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" #include "nsDTDUtils.h" #include "nsTagHandler.h" #include "nsHTMLTokenizer.h" #include "nsTime.h" #include "nsIElementObserver.h" #include "nsViewSourceHTML.h" #include "nsParserNode.h" #ifdef XP_PC #include //this is here for debug reasons... #endif #include "prmem.h" #undef ENABLE_RESIDUALSTYLE #define RICKG_DEBUG #ifdef RICKG_DEBUG #include #endif static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); static NS_DEFINE_IID(kClassIID, NS_INAVHTML_DTD_IID); static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* kVerificationDir = "c:/temp"; static char gShowCRC=0; static CTokenRecycler* gRecycler=0; static eHTMLTags gFormElementTags[]= { eHTMLTag_button, eHTMLTag_fieldset, eHTMLTag_input, eHTMLTag_isindex, eHTMLTag_label, eHTMLTag_legend, eHTMLTag_option, eHTMLTag_optgroup, eHTMLTag_select, eHTMLTag_textarea}; static eHTMLTags gWhitespaceTags[]={ eHTMLTag_newline, eHTMLTag_whitespace}; #include "nsElementTable.h" /*************************************************************** This the ITagHandler deque deallocator, needed by the CTagHandlerRegister ***************************************************************/ class CTagHandlerDeallocator: public nsDequeFunctor{ public: virtual void* operator()(void* aObject) { nsITagHandler* tagHandler = (nsITagHandler*)aObject; delete tagHandler; return 0; } }; /*************************************************************** This funtor will be called for each item in the TagHandler que to check for a Tag name, and setting the current TagHandler when it is reached ***************************************************************/ class CTagFinder: public nsDequeFunctor{ public: CTagFinder(){} void Initialize(const nsString &aTagName) {mTagName = aTagName;} virtual ~CTagFinder() { } virtual void* operator()(void* aObject) { nsString* theString = ((nsITagHandler*)aObject)->GetString(); if( theString->Equals(mTagName)){ return aObject; } return(0); } nsAutoString mTagName; }; /*************************************************************** This a an object that will keep track of TagHandlers in the DTD. Uses a factory pattern ***************************************************************/ class CTagHandlerRegister { public: CTagHandlerRegister() : mTagHandlerDeque(new CTagHandlerDeallocator()) { } ~CTagHandlerRegister() { } void RegisterTagHandler(nsITagHandler *aTagHandler){ mTagHandlerDeque.Push(aTagHandler); } nsITagHandler* FindTagHandler(const nsString &aTagName){ nsITagHandler* foundHandler = nsnull; mTagFinder.Initialize(aTagName); mTagHandlerDeque.Begin(); foundHandler = (nsITagHandler*) mTagHandlerDeque.FirstThat(mTagFinder); return foundHandler; } nsDeque mTagHandlerDeque; CTagFinder mTagFinder; }; /************************************************************************ The CTagHandlerRegister for a CNavDTD. This is where special taghanders for our tags can be managed and called from Note: This can also be attached to some object so it can be refcounted and destroyed if you want this to go away when not imbedded. ************************************************************************/ //CTagHandlerRegister gTagHandlerRegister; /************************************************************************ And now for the main class -- CNavDTD... ************************************************************************/ /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult CNavDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kIDTDIID)) { //do IParser base class... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (CNavDTD*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } /** * This method is defined in nsIParser. It is used to * cause the COM-like construction of an nsParser. * * @update gess 4/8/98 * @param nsIParser** ptr to newly instantiated parser * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_NewNavHTMLDTD(nsIDTD** aInstancePtrResult) { CNavDTD* it = new CNavDTD(); if (it == 0) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); } NS_IMPL_ADDREF(CNavDTD) NS_IMPL_RELEASE(CNavDTD) /** * * * @update gess 6/9/98 * @param * @return */ static PRInt32 NavDispatchTokenHandler(CToken* aToken,nsIDTD* aDTD) { PRInt32 result=0; CHTMLToken* theToken= (CHTMLToken*)(aToken); eHTMLTokenTypes theType= (eHTMLTokenTypes)theToken->GetTokenType(); CNavDTD* theDTD=(CNavDTD*)aDTD; if(aDTD) { switch(theType) { case eToken_start: case eToken_whitespace: case eToken_newline: case eToken_text: result=theDTD->HandleStartToken(aToken); break; case eToken_end: result=theDTD->HandleEndToken(aToken); break; case eToken_comment: result=theDTD->HandleCommentToken(aToken); break; case eToken_entity: result=theDTD->HandleEntityToken(aToken); break; case eToken_attribute: result=theDTD->HandleAttributeToken(aToken); break; case eToken_style: result=theDTD->HandleStyleToken(aToken); break; case eToken_instruction: result=theDTD->HandleProcessingInstructionToken(aToken); break; default: result=0; }//switch }//if return result; } /** * Register a handler. * * @update gess 4/2/98 * @param * @return */ CITokenHandler* CNavDTD::AddTokenHandler(CITokenHandler* aHandler) { NS_ASSERTION(0!=aHandler,"Error: Null handler"); if(aHandler) { eHTMLTokenTypes type=(eHTMLTokenTypes)aHandler->GetTokenType(); if(typePopAttributeToken())){ gRecycler->RecycleToken(theToken); } mSharedNodes.Push(aNode); } } /** * * @update gess1/8/99 * @param * @return */ const nsIID& CNavDTD::GetMostDerivedIID(void)const { return kClassIID; } /** * Default destructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::~CNavDTD(){ DeleteTokenHandlers(); delete mHeadContext; delete mBodyContext; if(mTokenizer) delete (nsHTMLTokenizer*)mTokenizer; nsCParserNode* theNode=0; while((theNode=(nsCParserNode*)mSharedNodes.Pop())){ delete theNode; } NS_IF_RELEASE(mDTDDebug); } /** * Call this method if you want the DTD to construct a fresh * instance of itself. * @update gess7/23/98 * @param * @return */ nsresult CNavDTD::CreateNewInstance(nsIDTD** aInstancePtrResult){ return NS_NewNavHTMLDTD(aInstancePtrResult); } /** * Called by the parser to initiate dtd verification of the * internal context stack. * @update gess 7/23/98 * @param * @return */ PRBool CNavDTD::Verify(nsString& aURLRef,nsIParser* aParser){ PRBool result=PR_TRUE; /* * Disable some DTD debugging code in the parser that * breaks on some compilers because of some broken * streams code in prstrm.cpp. */ #if !defined(MOZ_DISABLE_DTD_DEBUG) if(!mDTDDebug){ nsresult rval = NS_NewDTDDebug(&mDTDDebug); if (NS_OK != rval) { fputs("Cannot create parser debugger.\n", stdout); result=-PR_FALSE; } else mDTDDebug->SetVerificationDirectory(kVerificationDir); } #endif if(mDTDDebug) { // mDTDDebug->Verify(this,aParser,mBodyContext->GetCount(),mBodyContext->mStack,aURLRef); } return result; } /** * This method is called to determine if the given DTD can parse * a document in a given source-type. * NOTE: Parsing always assumes that the end result will involve * storing the result in the main content model. * @update gess6/24/98 * @param * @return TRUE if this DTD can satisfy the request; FALSE otherwise. */ eAutoDetectResult CNavDTD::CanParse(nsString& aContentType, nsString& aCommand, nsString& aBuffer, PRInt32 aVersion) { eAutoDetectResult result=eUnknownDetect; if(!aCommand.Equals(kViewSourceCommand)) { if(PR_TRUE==aContentType.Equals(kHTMLTextContentType)) { result=ePrimaryDetect; } else { //otherwise, look into the buffer to see if you recognize anything... if(BufferContainsHTML(aBuffer)){ result=ePrimaryDetect; if(0==aContentType.Length()) aContentType=kHTMLTextContentType; } } } return result; } PRTime gStartTime; /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::WillBuildModel(nsString& aFilename,PRBool aNotifySink,nsString& aSourceType,nsIContentSink* aSink){ nsresult result=NS_OK; mFilename=aFilename; mHasOpenBody=PR_FALSE; mHadBody=PR_FALSE; mHadFrameset=PR_FALSE; mLineNumber=1; mHasOpenScript=PR_FALSE; if((aNotifySink) && (aSink)) { #ifdef RGESS_DEBUG gStartTime = PR_Now(); printf("Begin parsing...\n"); #endif result = aSink->WillBuildModel(); CStartToken theToken(eHTMLTag_html); HandleStartToken(&theToken); mSkipTarget=eHTMLTag_unknown; mComputedCRC32=0; mExpectedCRC32=0; } return result; } /** * This is called when it's time to read as many tokens from the tokenizer * as you can. Not all tokens may make sense, so you may not be able to * read them all (until more come in later). * * @update gess5/18/98 * @param aParser is the parser object that's driving this process * @return error code (almost always NS_OK) */ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { nsresult result=NS_OK; if(aTokenizer) { nsITokenizer* oldTokenizer=mTokenizer; mTokenizer=aTokenizer; mParser=(nsParser*)aParser; mSink=nsnull; result=aSink->QueryInterface(kIHTMLContentSinkIID, (void **)&mSink); if(mSink) { NS_ADDREF(mSink); gRecycler=(CTokenRecycler*)mTokenizer->GetTokenRecycler(); while(NS_SUCCEEDED(result)){ CToken* theToken=mTokenizer->PopToken(); if(theToken) { result=HandleToken(theToken,aParser); } else break; }//while mTokenizer=oldTokenizer; NS_IF_RELEASE(mSink); mSink=nsnull; } } else result=NS_ERROR_HTMLPARSER_BADTOKENIZER; return result; } /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink){ nsresult result= NS_OK; if((NS_OK==anErrorCode) && (!mHadBody) && (!mHadFrameset)) { CStartToken theToken(eHTMLTag_body); //open the body container... result=HandleStartToken(&theToken); mTokenizer->PrependTokens(mMisplacedContent); //push misplaced content result=BuildModel(aParser,mTokenizer,0,aSink); } if(aParser){ if(aNotifySink && aSink){ if((NS_OK==anErrorCode) && (mBodyContext->GetCount()>0)) { eHTMLTags theTarget; while(mBodyContext->GetCount() > 0) { theTarget = mBodyContext->Last(); if(gHTMLElements[theTarget].HasSpecialProperty(kBadContentWatch)) result = HandleSavedTokensAbove(theTarget); CloseContainersTo(theTarget,PR_FALSE); } //result = CloseContainersTo(0,eHTMLTag_unknown,PR_FALSE); } #ifdef RGESS_DEBUG PRTime theEnd= PR_Now(); PRTime creates, ustoms; LL_I2L(ustoms, 1000); LL_SUB(creates, theEnd, gStartTime); LL_DIV(creates, creates, ustoms); printf("End parse elapsed: %lldms\n",creates); #endif //let's only grab this state once! if(!gShowCRC) { gShowCRC=1; //this only indicates we'll not initialize again. char* theEnvString = PR_GetEnv("RICKG_CRC"); if(theEnvString){ if(('1'==theEnvString[0]) || ('Y'==theEnvString[0]) || ('y'==theEnvString[0])){ gShowCRC=2; //this indicates that the CRC flag was found in the environment. } } } if(2==gShowCRC) { if(mComputedCRC32!=mExpectedCRC32) { if(mExpectedCRC32!=0) { printf("CRC Computed: %u Expected CRC: %u\n,",mComputedCRC32,mExpectedCRC32); result = aSink->DidBuildModel(2); } else { printf("Computed CRC: %u.\n",mComputedCRC32); result = aSink->DidBuildModel(3); } } else result = aSink->DidBuildModel(0); } else result=aSink->DidBuildModel(0); if(mDTDDebug) { mDTDDebug->DumpVectorRecord(); } } } return result; } /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be * moved over to the delegate. Ah, so much to do... * * @update gess 5/21/98 * @param aToken * @param aParser * @return */ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){ nsresult result=NS_OK; if(aToken) { CHTMLToken* theToken= (CHTMLToken*)(aToken); eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); PRBool execSkipContent=PR_FALSE; theToken->mRecycle=PR_TRUE; //assume every token coming into this system needs recycling. /* --------------------------------------------------------------------------------- To understand this little piece of code, you need to look below too. In essence, this code caches "skipped content" until we find a given skiptarget. Once we find the skiptarget, we take all skipped content up to that point and coallate it. Then we push those tokens back onto the tokenizer deque. --------------------------------------------------------------------------------- */ if(mSkipTarget){ //handle a preexisting target... if((theTag==mSkipTarget) && (eToken_end==theType)){ mSkipTarget=eHTMLTag_unknown; //stop skipping. //mTokenizer->PushTokenFront(aToken); //push the end token... execSkipContent=PR_TRUE; gRecycler->RecycleToken(aToken); theToken=(CHTMLToken*)mSkippedContent.PopFront(); // result=HandleStartToken(theToken); } else { mSkippedContent.Push(theToken); return result; } } /* --------------------------------------------------------------------------------- This section of code is used to "move" misplaced content from one location in our document model to another. (Consider what would happen if we found a

tag and text in the head.) To move content, we throw it onto the misplacedcontent deque until we can deal with it. --------------------------------------------------------------------------------- */ if(!execSkipContent) { static eHTMLTags passThru[]= {eHTMLTag_html,eHTMLTag_comment,eHTMLTag_newline,eHTMLTag_whitespace,eHTMLTag_script}; if(!FindTagInSet(theTag,passThru,sizeof(passThru)/sizeof(eHTMLTag_unknown))){ if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) { if((!mHadBody) && (!mHadFrameset)){ if(mHasOpenHead) { //just fall through and handle current token if(!gHTMLElements[eHTMLTag_head].IsChildOfHead(theTag)){ mMisplacedContent.Push(aToken); aToken->mRecycle=PR_FALSE; return result; } } else { if(gHTMLElements[eHTMLTag_body].SectionContains(theTag,PR_TRUE)){ mTokenizer->PushTokenFront(aToken); //put this token back... mTokenizer->PrependTokens(mMisplacedContent); //push misplaced content theToken=(CHTMLToken*)gRecycler->CreateTokenOfType(eToken_start,eHTMLTag_body); //now open a body... } } } } } } if(theToken){ //Before dealing with the token normally, we need to deal with skip targets if((!execSkipContent) && (theType!=eToken_end) && (eHTMLTag_unknown==mSkipTarget) && (gHTMLElements[theTag].mSkipTarget)){ //create a new target mSkipTarget=gHTMLElements[theTag].mSkipTarget; mSkippedContent.Push(theToken); } else { CITokenHandler* theHandler=GetTokenHandler(theType); if(theHandler) { mParser=(nsParser*)aParser; result=(*theHandler)(theToken,this); if(NS_SUCCEEDED(result) || (NS_ERROR_HTMLPARSER_BLOCK==result)) { if(theToken->mRecycle) gRecycler->RecycleToken(theToken); } else if(result==NS_ERROR_HTMLPARSER_STOPPARSING) return result; else return NS_OK; /*************************************************************/ // CAUTION: Here we are forgetting to push the ATTRIBUTE Tokens. // So, before you uncomment this part please make sure // that the attribute tokens are also accounted for. //else if(NS_ERROR_HTMLPARSER_MISPLACED!=result) // mTokenizer->PushTokenFront(theToken); //else result=NS_OK; /***************************************************************/ if (mDTDDebug) { //mDTDDebug->Verify(this, mParser, mBodyContext->GetCount(), mBodyContext->mStack, mFilename); } } //if } } }//if return result; } /** * This method causes all tokens to be dispatched to the given tag handler. * * @update gess 3/25/98 * @param aHandler -- object to receive subsequent tokens... * @return error code (usually 0) */ nsresult CNavDTD::CaptureTokenPump(nsITagHandler* aHandler) { nsresult result=NS_OK; return result; } /** * This method releases the token-pump capture obtained in CaptureTokenPump() * * @update gess 3/25/98 * @param aHandler -- object that received tokens... * @return error code (usually 0) */ nsresult CNavDTD::ReleaseTokenPump(nsITagHandler* aHandler){ nsresult result=NS_OK; return result; } /** * This gets called after we've handled a given start tag. * It's a generic hook to let us to post processing. * @param aToken contains the tag in question * @param aChildTag is the tag itself. * @return status */ nsresult CNavDTD::DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag){ nsresult result=NS_OK; switch(aChildTag){ case eHTMLTag_pre: case eHTMLTag_listing: { CToken* theNextToken=mTokenizer->PeekToken(); if(theNextToken) { eHTMLTokenTypes theType=eHTMLTokenTypes(theNextToken->GetTokenType()); if(eToken_newline==theType){ mTokenizer->PopToken(); //skip 1st newline inside PRE and LISTING }//if }//if } break; case eHTMLTag_plaintext: case eHTMLTag_xmp: //grab the skipped content and dump it out as text... { const nsString& theText=aNode.GetSkippedContent(); if(0=0;i--){ if(aTagStack[i]==aTag) return i; } return kNotFound; } #endif /** * Call this to find the index of a given child, or (if not found) * the index of its nearest synonym. * * @update gess 3/25/98 * @param aTagStack -- list of open tags * @param aTag -- tag to test for containership * @return index of kNotFound */ static PRInt32 GetIndexOfChildOrSynonym(nsEntryStack& aTagStack,eHTMLTags aChildTag) { PRInt32 theChildIndex=aTagStack.GetTopmostIndexOf(aChildTag); if(kNotFound==theChildIndex) { CTagList* theSynTags=gHTMLElements[aChildTag].GetSynonymousTags(); //get the list of tags that THIS tag can close if(theSynTags) { theChildIndex=theSynTags->GetTopmostIndexOf(aTagStack); } else{ theChildIndex=aTagStack.GetCount(); PRInt32 theGroup=nsHTMLElement::GetSynonymousGroups(gHTMLElements[aChildTag].mParentBits); while(-1<--theChildIndex) { eHTMLTags theTag=aTagStack[theChildIndex]; if(gHTMLElements[theTag].IsMemberOf(theGroup)) { break; } } } } return theChildIndex; } /** * This method is called to determine whether or not the child * tag is happy being OPENED in the context of the current * tag stack. This is only called if the current parent thinks * it wants to contain the given childtag. * * @param aChildTag -- tag enum of child to be opened * @param aTagStack -- ref to current tag stack in DTD. * @return PR_TRUE if child agrees to be opened here. */ static PRBool CanBeContained(eHTMLTags aChildTag,nsEntryStack& aTagStack) { /* # Interesting test cases: Result: * 1.