/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ #include "nsDebug.h" #include "nsIDTDDebug.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" #include "nsParser.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsScanner.h" #include "nsTokenHandler.h" #include "nsIDTDDebug.h" #include "prenv.h" //this is here for debug reasons... #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" #include "nsDTDUtils.h" #include "nsTagHandler.h" #include "nsViewSourceHTML.h" #include "nsHTMLTokenizer.h" #include "nsTime.h" #include "nsIElementObserver.h" #ifdef XP_PC #include //this is here for debug reasons... #include #endif #include "prmem.h" #undef ENABLE_RESIDUALSTYLE #define RICKG_DEBUG 0 #ifdef RICKG_DEBUG #include #endif static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); static NS_DEFINE_IID(kClassIID, NS_INAVHTML_DTD_IID); static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* kVerificationDir = "c:/temp"; static char gShowCRC=0; static eHTMLTags gFormElementTags[]= { eHTMLTag_button, eHTMLTag_fieldset, eHTMLTag_input, eHTMLTag_isindex, eHTMLTag_label, eHTMLTag_legend, eHTMLTag_option, eHTMLTag_optgroup, eHTMLTag_select, eHTMLTag_textarea}; static eHTMLTags gTableChildTags[]={ eHTMLTag_caption, eHTMLTag_col, eHTMLTag_colgroup, eHTMLTag_tbody, eHTMLTag_tfoot, eHTMLTag_tr, eHTMLTag_th, eHTMLTag_thead, eHTMLTag_td}; static eHTMLTags gWhitespaceTags[]={ eHTMLTag_newline, eHTMLTag_whitespace}; static eHTMLTags gHeadChildTags[]={ eHTMLTag_caption, eHTMLTag_col, eHTMLTag_colgroup, eHTMLTag_tbody, eHTMLTag_tfoot, eHTMLTag_tr, eHTMLTag_thead, eHTMLTag_td}; static eHTMLTags gNonPropagatedTags[]={ eHTMLTag_head, eHTMLTag_html, eHTMLTag_body}; #include "nsElementTable.h" /*************************************************************** This the ITagHandler deque deallocator, needed by the CTagHandlerRegister ***************************************************************/ class CTagHandlerDeallocator: public nsDequeFunctor{ public: virtual void* operator()(void* aObject) { nsITagHandler* tagHandler = (nsITagHandler*)aObject; delete tagHandler; return 0; } }; /*************************************************************** This funtor will be called for each item in the TagHandler que to check for a Tag name, and setting the current TagHandler when it is reached ***************************************************************/ class CTagFinder: public nsDequeFunctor{ public: CTagFinder(){} void Initialize(const nsString &aTagName) {mTagName = aTagName;} virtual ~CTagFinder() { } virtual void* operator()(void* aObject) { nsString* theString = ((nsITagHandler*)aObject)->GetString(); if( theString->Equals(mTagName)){ return aObject; } return(0); } nsAutoString mTagName; }; /*************************************************************** This a an object that will keep track of TagHandlers in the DTD. Uses a factory pattern ***************************************************************/ class CTagHandlerRegister { public: CTagHandlerRegister() : mTagHandlerDeque(new CTagHandlerDeallocator()) { } ~CTagHandlerRegister() { } void RegisterTagHandler(nsITagHandler *aTagHandler){ mTagHandlerDeque.Push(aTagHandler); } nsITagHandler* FindTagHandler(const nsString &aTagName){ nsITagHandler* foundHandler = nsnull; mTagFinder.Initialize(aTagName); mTagHandlerDeque.Begin(); foundHandler = (nsITagHandler*) mTagHandlerDeque.FirstThat(mTagFinder); return foundHandler; } nsDeque mTagHandlerDeque; CTagFinder mTagFinder; }; /************************************************************************ The CTagHandlerRegister for a CNavDTD. This is where special taghanders for our tags can be managed and called from Note: This can also be attached to some object so it can be refcounted and destroyed if you want this to go away when not imbedded. ************************************************************************/ //CTagHandlerRegister gTagHandlerRegister; /************************************************************************ And now for the main class -- CNavDTD... ************************************************************************/ /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult CNavDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kIDTDIID)) { //do IParser base class... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (CNavDTD*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } /** * This method is defined in nsIParser. It is used to * cause the COM-like construction of an nsParser. * * @update gess 4/8/98 * @param nsIParser** ptr to newly instantiated parser * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_NewNavHTMLDTD(nsIDTD** aInstancePtrResult) { CNavDTD* it = new CNavDTD(); if (it == 0) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); } NS_IMPL_ADDREF(CNavDTD) NS_IMPL_RELEASE(CNavDTD) /** * * * @update gess 6/9/98 * @param * @return */ static PRInt32 NavDispatchTokenHandler(CToken* aToken,nsIDTD* aDTD) { PRInt32 result=0; CHTMLToken* theToken= (CHTMLToken*)(aToken); eHTMLTokenTypes theType= (eHTMLTokenTypes)theToken->GetTokenType(); CNavDTD* theDTD=(CNavDTD*)aDTD; if(aDTD) { switch(theType) { case eToken_start: case eToken_whitespace: case eToken_newline: case eToken_text: result=theDTD->HandleStartToken(aToken); break; case eToken_end: result=theDTD->HandleEndToken(aToken); break; case eToken_comment: result=theDTD->HandleCommentToken(aToken); break; case eToken_entity: result=theDTD->HandleEntityToken(aToken); break; case eToken_attribute: result=theDTD->HandleAttributeToken(aToken); break; case eToken_style: result=theDTD->HandleStyleToken(aToken); break; case eToken_instruction: result=theDTD->HandleProcessingInstructionToken(aToken); break; default: result=0; }//switch }//if return result; } /** * Register a handler. * * @update gess 4/2/98 * @param * @return */ CITokenHandler* CNavDTD::AddTokenHandler(CITokenHandler* aHandler) { NS_ASSERTION(0!=aHandler,"Error: Null handler"); if(aHandler) { eHTMLTokenTypes type=(eHTMLTokenTypes)aHandler->GetTokenType(); if(typeSetVerificationDirectory(kVerificationDir); } if(mDTDDebug) { // mDTDDebug->Verify(this,aParser,mBodyContext->GetCount(),mBodyContext->mStack,aURLRef); } return result; } /** * This method is called to determine if the given DTD can parse * a document in a given source-type. * NOTE: Parsing always assumes that the end result will involve * storing the result in the main content model. * @update gess6/24/98 * @param * @return TRUE if this DTD can satisfy the request; FALSE otherwise. */ eAutoDetectResult CNavDTD::CanParse(nsString& aContentType, nsString& aCommand, nsString& aBuffer, PRInt32 aVersion) { eAutoDetectResult result=eUnknownDetect; if(!aCommand.Equals(kViewSourceCommand)) { if(PR_TRUE==aContentType.Equals(kHTMLTextContentType)) { result=ePrimaryDetect; } else { //otherwise, look into the buffer to see if you recognize anything... if(BufferContainsHTML(aBuffer)){ result=ePrimaryDetect; if(0==aContentType.Length()) aContentType=kHTMLTextContentType; } } } return result; } PRTime gStartTime; /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::WillBuildModel(nsString& aFilename,PRBool aNotifySink,nsString& aSourceType,nsIContentSink* aSink){ nsresult result=NS_OK; mFilename=aFilename; mHasOpenBody=PR_FALSE; mHadBodyOrFrameset=PR_FALSE; mLineNumber=1; mHasOpenScript=PR_FALSE; mSink=(nsIHTMLContentSink*)aSink; if((aNotifySink) && (mSink)) { #ifdef RGESS_DEBUG gStartTime = PR_Now(); printf("Begin parsing...\n"); #endif result = mSink->WillBuildModel(); mSkipTarget=eHTMLTag_unknown; mComputedCRC32=0; mExpectedCRC32=0; } return result; } CTokenRecycler* gRecycler=0; /** * This is called when it's time to read as many tokens from the tokenizer * as you can. Not all tokens may make sense, so you may not be able to * read them all (until more come in later). * * @update gess5/18/98 * @param aParser is the parser object that's driving this process * @return error code (almost always 0) */ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { nsresult result=NS_OK; NS_ADDREF(aSink); if(aTokenizer) { nsITokenizer* oldTokenizer=mTokenizer; mTokenizer=aTokenizer; mParser=(nsParser*)aParser; mSink=(nsIHTMLContentSink*)aSink; gRecycler=(CTokenRecycler*)mTokenizer->GetTokenRecycler(); while(NS_OK==result){ CToken* theToken=mTokenizer->PopToken(); if(theToken) { result=HandleToken(theToken,aParser); } else break; }//while mTokenizer=oldTokenizer; } else result=NS_ERROR_HTMLPARSER_BADTOKENIZER; NS_IF_RELEASE(aSink); return result; } /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink){ nsresult result= NS_OK; if((NS_OK==anErrorCode) && (!mHadBodyOrFrameset)) { CStartToken theToken(eHTMLTag_body); //open the body container... result=HandleStartToken(&theToken); } if(aParser){ mSink=(nsIHTMLContentSink*)aSink; if(aNotifySink && mSink){ if((NS_OK==anErrorCode) && (mBodyContext->GetCount()>0)) { while(mBodyContext->GetCount() > 0) { eHTMLTags theTarget = mBodyContext->Last(); if(gHTMLElements[theTarget].HasSpecialProperty(kBadContentWatch)) result = HandleSavedTokensAbove(theTarget); CloseContainersTo(theTarget,PR_FALSE); } //result = CloseContainersTo(0,eHTMLTag_unknown,PR_FALSE); } #ifdef RGESS_DEBUG PRTime theEnd= PR_Now(); PRTime creates, ustoms; LL_I2L(ustoms, 1000); LL_SUB(creates, theEnd, gStartTime); LL_DIV(creates, creates, ustoms); printf("End parse elapsed: %lldms\n",creates); #endif //let's only grab this state once! if(!gShowCRC) { gShowCRC=1; //this only indicates we'll not initialize again. char* theEnvString = PR_GetEnv("RICKG_CRC"); if(theEnvString){ if(('1'==theEnvString[0]) || ('Y'==theEnvString[0]) || ('y'==theEnvString[0])){ gShowCRC=2; //this indicates that the CRC flag was found in the environment. } } } if(2==gShowCRC) { if(mComputedCRC32!=mExpectedCRC32) { if(mExpectedCRC32!=0) { printf("CRC Computed: %u Expected CRC: %u\n,",mComputedCRC32,mExpectedCRC32); result = mSink->DidBuildModel(2); } else { printf("Computed CRC: %u.\n",mComputedCRC32); result = mSink->DidBuildModel(3); } } else result = mSink->DidBuildModel(0); } else result=mSink->DidBuildModel(0); if(mDTDDebug) { mDTDDebug->DumpVectorRecord(); } } } return result; } /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be * moved over to the delegate. Ah, so much to do... * * @update gess 5/21/98 * @param aToken * @param aParser * @return */ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){ nsresult result=NS_OK; if(aToken) { CHTMLToken* theToken= (CHTMLToken*)(aToken); eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); PRBool execSkipContent=PR_FALSE; /* --------------------------------------------------------------------------------- To understand this little piece of code, you need to look below too. In essence, this code caches "skipped content" until we find a given skiptarget. Once we find the skiptarget, we take all skipped content up to that point and coallate it. Then we push those tokens back onto the tokenizer deque. --------------------------------------------------------------------------------- */ if(mSkipTarget){ //handle a preexisting target... if((theTag==mSkipTarget) && (eToken_end==theType)){ mSkipTarget=eHTMLTag_unknown; //stop skipping. //mTokenizer->PushTokenFront(aToken); //push the end token... execSkipContent=PR_TRUE; gRecycler->RecycleToken(aToken); theToken=(CHTMLToken*)mSkippedContent.PopFront(); // result=HandleStartToken(theToken); } else { mSkippedContent.Push(theToken); return result; } } /* --------------------------------------------------------------------------------- This section of code is used to "move" misplaced content from one location in our document model to another. (Consider what would happen if we found a

tag and text in the head.) To move content, we throw it onto the misplacedcontent deque until we can deal with it. --------------------------------------------------------------------------------- */ if(!execSkipContent) { static eHTMLTags passThru[]= {eHTMLTag_html,eHTMLTag_comment,eHTMLTag_newline,eHTMLTag_whitespace,eHTMLTag_script}; if(!FindTagInSet(theTag,passThru,sizeof(passThru)/sizeof(eHTMLTag_unknown))){ if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) { if(!mHadBodyOrFrameset){ if(mHasOpenHead) { //just fall through and handle current token if(!gHTMLElements[eHTMLTag_head].IsChildOfHead(theTag)){ mMisplacedContent.Push(aToken); return result; } } else { if(gHTMLElements[eHTMLTag_body].SectionContains(theTag,PR_TRUE)){ mTokenizer->PushTokenFront(aToken); //put this token back... mTokenizer->PrependTokens(mMisplacedContent); //push misplaced content theToken=(CHTMLToken*)gRecycler->CreateTokenOfType(eToken_start,eHTMLTag_body); //now open a body... } } } } } } if(theToken){ //Before dealing with the token normally, we need to deal with skip targets if((!execSkipContent) && (theType!=eToken_end) && (eHTMLTag_unknown==mSkipTarget) && (gHTMLElements[theTag].mSkipTarget)){ //create a new target mSkipTarget=gHTMLElements[theTag].mSkipTarget; mSkippedContent.Push(theToken); } else { CITokenHandler* theHandler=GetTokenHandler(theType); if(theHandler) { mParser=(nsParser*)aParser; result=(*theHandler)(theToken,this); if(NS_SUCCEEDED(result) || (NS_ERROR_HTMLPARSER_BLOCK==result)) { gRecycler->RecycleToken(theToken); } else if(NS_ERROR_HTMLPARSER_MISPLACED!=result) mTokenizer->PushTokenFront(theToken); else result=NS_OK; if (mDTDDebug) { //mDTDDebug->Verify(this, mParser, mBodyContext->GetCount(), mBodyContext->mStack, mFilename); } } //if } } }//if return result; } /** * This method causes all tokens to be dispatched to the given tag handler. * * @update gess 3/25/98 * @param aHandler -- object to receive subsequent tokens... * @return error code (usually 0) */ nsresult CNavDTD::CaptureTokenPump(nsITagHandler* aHandler) { nsresult result=NS_OK; return result; } /** * This method releases the token-pump capture obtained in CaptureTokenPump() * * @update gess 3/25/98 * @param aHandler -- object that received tokens... * @return error code (usually 0) */ nsresult CNavDTD::ReleaseTokenPump(nsITagHandler* aHandler){ nsresult result=NS_OK; return result; } /** * This gets called after we've handled a given start tag. * It's a generic hook to let us to post processing. * @param aToken contains the tag in question * @param aChildTag is the tag itself. * @return status */ nsresult CNavDTD::DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag){ nsresult result=NS_OK; switch(aChildTag){ case eHTMLTag_body: case eHTMLTag_frameset: mHadBodyOrFrameset=PR_TRUE; break; case eHTMLTag_pre: case eHTMLTag_listing: { CToken* theNextToken=mTokenizer->PeekToken(); if(theNextToken) { eHTMLTokenTypes theType=eHTMLTokenTypes(theNextToken->GetTokenType()); if(eToken_newline==theType){ mTokenizer->PopToken(); //skip 1st newline inside PRE and LISTING }//if }//if } break; case eHTMLTag_plaintext: case eHTMLTag_xmp: //grab the skipped content and dump it out as text... { const nsString& theText=aNode.GetSkippedContent(); if(0=0;i--){ if(aTagStack[i]==aTag) return i; } return kNotFound; } /** * This method is called to determine whether or not a START tag * can be autoclosed. This means that based on the current * context, the stack should be closed to the nearest matching * tag. * * @param aTag -- tag enum of child to be tested * @return index of tag to be closed */ static PRInt32 FindAutoCloseTargetForStartTag2(eHTMLTags aChild,nsEntryStack& aTagStack,CNavDTD& aDTD) { PRInt32 max=aTagStack.GetCount(); if(nsHTMLElement::IsContainer(aChild)){ PRInt32 index=max; CTagList* theRootTags=gHTMLElements[aChild].GetRootTags(); while(--index>=0){ eHTMLTags thePrevTag=aTagStack.TagAt(index); if((theRootTags) && (theRootTags->Contains(thePrevTag))) { return index+1; } if(aDTD.CanOmit(thePrevTag,aChild)){ return index+1; } if(aDTD.CanContain(thePrevTag,aChild)){ return index+1; } } } return max; //which means don't close anything... } /** * This method is called to determine whether or not a START tag * can be autoclosed. This means that based on the current * context, the stack should be closed to the nearest matching * tag. * * @param aTag -- tag enum of child to be tested * @return PR_TRUE if autoclosure should occur */ static eHTMLTags FindAutoCloseTargetForStartTag(eHTMLTags aCurrentTag,nsEntryStack& aTagStack) { int theTopIndex = aTagStack.GetCount(); eHTMLTags thePrevTag=aTagStack.Last(); if(nsHTMLElement::IsContainer(aCurrentTag)){ if(thePrevTag==aCurrentTag) { return (gHTMLElements[aCurrentTag].CanContainSelf()) ? eHTMLTag_unknown: aCurrentTag; } CTagList* theRootTags=gHTMLElements[aCurrentTag].GetRootTags(); PRInt32 theRootIndex=(theRootTags) ? theRootTags->GetTopmostIndexOf(aTagStack) : kNotFound; if(nsHTMLElement::IsBlockCloser(aCurrentTag)) { if(theRootTags) { CTagList* theStartTags=gHTMLElements[aCurrentTag].GetAutoCloseStartTags(); PRInt32 thePeerIndex=kNotFound; if(theStartTags){ thePeerIndex=theStartTags->GetBottommostIndexOf(aTagStack,theRootIndex+1); } else { //this extra check is need to handle case like this:

//the new div can close the P,but doesn't close the top DIV. thePeerIndex=GetTopmostIndexOf(aCurrentTag,aTagStack); if(gHTMLElements[aCurrentTag].CanContainSelf()) { thePeerIndex++; } } if(theRootIndexGetTopmostIndexOf(aTagStack); if(kNotFound!=thePeerIndex){ if(thePeerIndex==theTopIndex-1) { //the guy you can autoclose is on the top of the stack... return thePrevTag; } //if } //if }//if } //if isblockcloser if(kNotFound

<- while((theRootIndex<--theTopIndex) && (!gHTMLElements[aTagStack[theTopIndex]].CanContain(aCurrentTag))) { } return aTagStack[theTopIndex+1]; //return aTagStack.mTags[theRootIndex+1]; } //else } //if return eHTMLTag_unknown; } /** * Call this to find the index of a given child, or (if not found) * the index of its nearest synonym. * * @update gess 3/25/98 * @param aTagStack -- list of open tags * @param aTag -- tag to test for containership * @return index of kNotFound */ static PRInt32 GetIndexOfChildOrSynonym(nsEntryStack& aTagStack,eHTMLTags aChildTag) { PRInt32 theChildIndex=aTagStack.GetTopmostIndexOf(aChildTag); if(kNotFound==theChildIndex) { CTagList* theSynTags=gHTMLElements[aChildTag].GetSynonymousTags(); //get the list of tags that THIS tag can close if(theSynTags) { theChildIndex=theSynTags->GetTopmostIndexOf(aTagStack); } else{ theChildIndex=aTagStack.GetCount(); PRInt32 theGroup=gHTMLElements[aChildTag].mParentBits; while(-1<--theChildIndex) { eHTMLTags theTag=aTagStack[theChildIndex]; if(gHTMLElements[theTag].IsMemberOf(theGroup)) { break; } } } } return theChildIndex; } /** * This method is called to determine whether or not the child * tag is happy being OPENED in the context of the current * tag stack. This is only called if the current parent thinks * it wants to contain the given childtag. * * @param aChildTag -- tag enum of child to be opened * @param aTagStack -- ref to current tag stack in DTD. * @return PR_TRUE if child agrees to be opened here. */ static PRBool CanBeContained(eHTMLTags aParentTag,eHTMLTags aChildTag,nsEntryStack& aTagStack) { PRBool result=PR_TRUE; /* # Interesting test cases: Result: * 1.
... allow nested
* 4. ... */ //Note: This method is going away. First we need to get the elementtable to do closures right, and // therefore we must get residual style handling to work. /* // I've removed this on general principle. // If style tags want to contain each other, the state should be in the element table. if(nsHTMLElement::IsStyleTag(aParentTag)) if(nsHTMLElement::IsStyleTag(aChildTag)) return PR_TRUE; */ if(aTagStack.GetCount()){ CTagList* theRootTags=gHTMLElements[aChildTag].GetRootTags(); if(theRootTags) { PRInt32 theRootIndex=theRootTags->GetTopmostIndexOf(aTagStack); PRInt32 theChildIndex=GetIndexOfChildOrSynonym(aTagStack,aChildTag); if((theRootIndex==theChildIndex) && (gHTMLElements[aChildTag].CanContainSelf())) result=PR_TRUE; else result=PRBool(theRootIndex>theChildIndex); } } return result; } enum eProcessRule {eIgnore,eTest}; eProcessRule GetProcessRule(eHTMLTags aParentTag,eHTMLTags aChildTag){ int mParentGroup=gHTMLElements[aParentTag].mParentBits; int mChildGroup=gHTMLElements[aChildTag].mParentBits; eProcessRule result=eTest; switch(mParentGroup){ case kSpecial: case kPhrase: case kFontStyle: case kFormControl: switch(mChildGroup){ case kBlock: case kHTMLContent: case kExtensions: //case kFlowEntity: case kList: case kBlockEntity: case kHeading: case kHeadMisc: case kPreformatted: case kNone: result=eIgnore; } break; default: break; } return result; } /** * This method gets called when a start token has been * encountered in the parse process. If the current container * can contain this tag, then add it. Otherwise, you have * two choices: 1) create an implicit container for this tag * to be stored in * 2) close the top container, and add this to * whatever container ends up on top. * * @update gess 3/25/98 * @param aToken -- next (start) token to be handled * @param aNode -- CParserNode representing this start token * @return PR_TRUE if all went well; PR_FALSE if error occured */ nsresult CNavDTD::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsIParserNode& aNode) { NS_PRECONDITION(0!=aToken,kNullToken); nsresult result=NS_OK; PRBool theCanContainResult=PR_FALSE; PRBool theChildAgrees=PR_TRUE; PRInt32 theIndex=mBodyContext->GetCount(); do { eHTMLTags theParentTag=mBodyContext->TagAt(--theIndex); if(CanOmit(theParentTag,aChildTag)){ //call handleOmittedTag()... return result; } eProcessRule theRule=eTest; //GetProcessRule(theParentTag,aChildTag); switch(theRule){ case eTest: theCanContainResult=CanContain(theParentTag,aChildTag); theChildAgrees=PR_TRUE; if(theCanContainResult) { eHTMLTags theAncestor=gHTMLElements[aChildTag].mExcludingAncestor; if(eHTMLTag_unknown!=theAncestor){ theChildAgrees=!HasOpenContainer(theAncestor); } if(theChildAgrees){ theAncestor=gHTMLElements[aChildTag].mRequiredAncestor; if(eHTMLTag_unknown!=theAncestor){ theChildAgrees=HasOpenContainer(theAncestor); } } } if(!(theCanContainResult && theChildAgrees)) { if (!CanPropagate(theParentTag,aChildTag)) { if(nsHTMLElement::IsContainer(aChildTag)){ CloseContainersTo(theIndex,theParentTag,PR_TRUE); }//if else break; }//if else { CreateContextStackFor(aChildTag); theIndex=mBodyContext->GetCount(); } }//if break; case eIgnore: default: break; }//switch } while(!(theCanContainResult && theChildAgrees)); if(nsHTMLElement::IsContainer(aChildTag)){ result=OpenContainer(aNode,PR_TRUE); } else { //we're writing a leaf... result=AddLeaf(aNode); } return result; } #if 0 /** * This method gets called when a start token has been * encountered in the parse process. If the current container * can contain this tag, then add it. Otherwise, you have * two choices: 1) create an implicit container for this tag * to be stored in * 2) close the top container, and add this to * whatever container ends up on top. * * @update gess 3/25/98 * @param aToken -- next (start) token to be handled * @param aNode -- CParserNode representing this start token * @return PR_TRUE if all went well; PR_FALSE if error occured */ nsresult CNavDTD::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsIParserNode& aNode) { NS_PRECONDITION(0!=aToken,kNullToken); nsresult result=NS_OK; //Sick as it sounds, I have to make sure the body has been //opened before other tags can be added to the content sink... PRBool rickgSkip=PR_FALSE; if(!rickgSkip) { /*********************************************************************** Subtlety alert: The REAL story on when tags are opened is somewhat confusing, but it's important to understand. Since this is where we deal with it, it's time for a quick disseration. Here goes: Given a stack of open tags, a new (child) tag comes along and we need to see if it can be opened in place. There are at least 2 reasons why it cannot be opened: 1) the parent says so; 2) the child says so. Parents refuse to take children they KNOW they can't contain. Consider that the tag is only *supposed* to contain certain tags -- no one would expect it to accept a rogue
  • tag (for example). Here's an interested case we should not break:
  • The DT is not a child of the LI, so the LI closes. Then the DT also closes the DD, and it's parent DT. At last, it reopens itself below DL. ***********************************************************************/ eHTMLTags theParentTag=mBodyContext->Last(); PRBool theCanContainResult=CanContain(theParentTag,aChildTag); PRBool theChildAgrees=(theCanContainResult) ? CanBeContained(theParentTag,aChildTag,mBodyContext->mStack) : PR_FALSE; if(!(theCanContainResult && theChildAgrees)) { PRInt32 theIndex=FindAutoCloseTargetForStartTag2(aChildTag,mBodyContext->mStack,*this); eHTMLTags theTarg2=mBodyContext->TagAt(theIndex); eHTMLTags theTarget=FindAutoCloseTargetForStartTag(aChildTag,mBodyContext->mStack); NS_ASSERTION(theTarg2==theTarget,"Error: target mismatch"); if(eHTMLTag_unknown!=theTarget){ result=CloseContainersTo(theTarget,PR_TRUE); theParentTag=mBodyContext->Last(); theCanContainResult=CanContain(theParentTag,aChildTag); } } if(PR_FALSE==theCanContainResult){ if(CanPropagate(theParentTag,aChildTag)) result=CreateContextStackFor(aChildTag); else result=kCantPropagate; if(NS_OK!=result) { //if you're here, then the new topmost container can't contain aToken. //You must determine what container hierarchy you need to hold aToken, //and create that on the parsestack. result=ReduceContextStackFor(aChildTag); PRBool theCanContainResult=CanContain(mBodyContext->Last(),aChildTag); if(PR_FALSE==theCanContainResult) { //we unwound too far; now we have to recreate a valid context stack. result=CreateContextStackFor(aChildTag); } } } }//if(!rickGSkip)... if(nsHTMLElement::IsContainer(aChildTag)){ //first, let's see if it's a style element... if(!nsHTMLElement::IsStyleTag(aChildTag)) { //it wasn't a style container, so open the element container... CloseTransientStyles(aChildTag); } result=OpenContainer(aNode,PR_TRUE); } else { //we're writing a leaf... result=AddLeaf(aNode); } return result; } #endif #ifdef RICKG_DEBUG void WriteTokenToLog(CToken* aToken) { static fstream outputStream("c:/temp/tokenlog.html",ios::out); aToken->DebugDumpSource(outputStream); //write token without close bracket... } #endif /************************************************************** Define the a functor used to notify observers... **************************************************************/ class nsObserverNotifier: public nsDequeFunctor{ public: nsObserverNotifier(eHTMLTags aTag,PRUint32 aCount,const PRUnichar** aKeys, const PRUnichar** aValues,PRUint32 aUniqueKey){ mCount=aCount; mKeys=aKeys; mValues=aValues; mUniqueKey=aUniqueKey; mTag=aTag; } virtual void* operator()(void* anObject) { nsIElementObserver* theObserver= (nsIElementObserver*)anObject; if(theObserver) { mResult = theObserver->Notify(mUniqueKey,mTag,mCount,mKeys,mValues); } if(NS_OK==mResult) return 0; return anObject; } const PRUnichar** mKeys; const PRUnichar** mValues; PRUint32 mCount; PRUint32 mUniqueKey; nsresult mResult; eHTMLTags mTag; }; /** * This gets called before we've handled a given start tag. * It's a generic hook to let us do pre processing. * @param aToken contains the tag in question * @param aChildTag is the tag itself. * @param aNode is the node (tag) with associated attributes. * @return TRUE if tag processing should continue; FALSE if the tag has been handled. */ nsresult CNavDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsCParserNode& aNode){ nsresult result=NS_OK; PRInt32 theAttrCount = aNode.GetAttributeCount(); //first let's see if there's some skipped content to deal with... if(gHTMLElements[aTag].mSkipTarget) { result=CollectSkippedContent(aNode,theAttrCount); } /********************************************************** THIS WILL ULTIMATELY BECOME THE REAL OBSERVER API... **********************************************************/ static CObserverDictionary gObserverDictionary; if(aTag < NS_HTML_TAG_MAX){ nsDeque* theDeque=gObserverDictionary.GetObserversForTag(aTag); if(theDeque){ PRUint32 theDequeSize=theDeque->GetSize(); if(0GetDocumentCharset(charsetValue, charsetSource); // Add pseudo attribute in the end if(index < 50) { theKeys[index]=theCharsetKey.GetUnicode(); theValues[index] = charsetValue.GetUnicode(); index++; } if(index < 50) { theKeys[index]=theSourceKey.GetUnicode(); PRInt32 sourceInt = charsetSource; intValue.Append(sourceInt,10); theValues[index] = intValue.GetUnicode(); index++; } CParserContext* pc=mParser->PeekContext(); void* theDocID=(pc) ? pc-> mKey : 0; nsObserverNotifier theNotifier(aTag,index,theKeys,theValues,(PRUint32)theDocID); theDeque->FirstThat(theNotifier); result=theNotifier.mResult; }//if } } if(eHTMLTag_meta==aTag) { PRInt32 theCount=aNode.GetAttributeCount(); if(1 if(theKey.EqualsIgnoreCase("HTTP-EQUIV")) { const nsString& theKey2=aNode.GetKeyAt(1); if(theKey2.EqualsIgnoreCase("CONTENT")) { nsScanner* theScanner=mParser->GetScanner(); if(theScanner) { const nsString& theValue=aNode.GetValueAt(1); PRInt32 charsetValueStart = theValue.RFind("charset=", PR_TRUE ) ; if(kNotFound != charsetValueStart) { charsetValueStart += 8; // 8 = "charset=".length PRInt32 charsetValueEnd = theValue.FindCharInSet("\'\";", charsetValueStart ); if(kNotFound == charsetValueEnd ) charsetValueEnd = theValue.Length(); nsAutoString theCharset; theValue.Mid(theCharset, charsetValueStart, charsetValueEnd - charsetValueStart); theScanner->SetDocumentCharset(theCharset, kCharsetFromMetaTag); // XXX this should be delete after META charset really work nsParser::gHackMetaCharset = theCharset; } //if } //if } } //if else #endif if(theKey.EqualsIgnoreCase("NAME")) { const nsString& theValue1=aNode.GetValueAt(0); if(theValue1.EqualsIgnoreCase("\"CRC\"")) { const nsString& theKey2=aNode.GetKeyAt(1); if(theKey2.EqualsIgnoreCase("CONTENT")) { const nsString& theValue2=aNode.GetValueAt(1); PRInt32 err=0; mExpectedCRC32=theValue2.ToInteger(&err); } //if } //if } //else } //if }//if if(NS_OK==result) { result=gHTMLElements[aTag].HasSpecialProperty(kDiscardTag) ? 1 : NS_OK; } PRBool isHeadChild=gHTMLElements[eHTMLTag_head].IsChildOfHead(aTag); //this code is here to make sure the head is closed before we deal //with any tags that don't belong in the head. if(NS_OK==result) { if(mHasOpenHead){ static eHTMLTags skip2[]={eHTMLTag_newline,eHTMLTag_whitespace}; if(!FindTagInSet(aTag,skip2,sizeof(skip2)/sizeof(eHTMLTag_unknown))){ if(!isHeadChild){ CEndToken theToken(eHTMLTag_head); nsCParserNode theNode(&theToken,mLineNumber); result=CloseHead(theNode); } } } } return result; } /** * This method gets called when a start token has been encountered that the parent * wants to omit. * * @update gess 3/25/98 * @param aToken -- next (start) token to be handled * @param aChildTag -- id of the child in question * @param aParent -- id of the parent in question * @param aNode -- CParserNode representing this start token * @return PR_TRUE if all went well; PR_FALSE if error occured */ nsresult CNavDTD::HandleOmittedTag(CToken* aToken,eHTMLTags aChildTag,eHTMLTags aParent,nsIParserNode& aNode) { nsresult result=NS_OK; // Some tags need no to be opened regardless of what the parent says. if(gHTMLElements[aChildTag].HasSpecialProperty(kLegalOpen)) { return !NS_OK; } //The trick here is to see if the parent can contain the child, but prefers not to. //Only if the parent CANNOT contain the child should we look to see if it's potentially a child //of another section. If it is, the cache it for later. // 1. Get the root node for the child. See if the ultimate node is the BODY, FRAMESET, HEAD or HTML PRInt32 theTagCount = mBodyContext->GetCount(); if(gHTMLElements[aParent].HasSpecialProperty(kBadContentWatch)) { eHTMLTags theTag; PRInt32 theBCIndex; PRBool isNotWhiteSpace = PR_FALSE; PRInt32 attrCount = aToken->GetAttributeCount(); while(theTagCount > 0) { theTag = mBodyContext->TagAt(--theTagCount); if(!gHTMLElements[theTag].HasSpecialProperty(kBadContentWatch)) { if(!gHTMLElements[theTag].CanContain(aChildTag)) return result; theBCIndex = theTagCount; break; } } if(!FindTagInSet(aChildTag,gWhitespaceTags,sizeof(gWhitespaceTags)/sizeof(aChildTag))) { isNotWhiteSpace = mSaveBadTokens = PR_TRUE; } if(mSaveBadTokens) { mBodyContext->SaveToken(aToken,theBCIndex); // If the token is attributed then save those attributes too. if(attrCount > 0) { nsCParserNode* theAttrNode = (nsCParserNode*)&aNode; while(attrCount > 0){ mBodyContext->SaveToken(theAttrNode->PopAttributeToken(),theBCIndex); attrCount--; } } if(!IsContainer(aChildTag) && isNotWhiteSpace) { mSaveBadTokens = PR_FALSE; } result=NS_ERROR_HTMLPARSER_MISPLACED; } } return result; } /** * This method gets called when a start token has been * encountered in the parse process. If the current container * can contain this tag, then add it. Otherwise, you have * two choices: 1) create an implicit container for this tag * to be stored in * 2) close the top container, and add this to * whatever container ends up on top. * * @update gess 1/04/99 * @param aToken -- next (start) token to be handled * @param aNode -- CParserNode representing this start token * @return PR_TRUE if all went well; PR_FALSE if error occured */ nsresult CNavDTD::HandleStartToken(CToken* aToken) { NS_PRECONDITION(0!=aToken,kNullToken); #ifdef RICKG_DEBUG WriteTokenToLog(aToken); #endif //Begin by gathering up attributes... eHTMLTags theChildTag=(eHTMLTags)aToken->GetTypeID(); nsCParserNode attrNode((CHTMLToken*)aToken,mLineNumber,GetTokenRecycler()); PRInt16 attrCount=aToken->GetAttributeCount(); nsresult result=(0==attrCount) ? NS_OK : CollectAttributes(attrNode,theChildTag,attrCount); eHTMLTags theParent=mBodyContext->Last(); if(NS_OK==result) { if(NS_OK==WillHandleStartTag(aToken,theChildTag,attrNode)) { if(nsHTMLElement::IsSectionTag(theChildTag)){ switch(theChildTag){ case eHTMLTag_body: case eHTMLTag_head: if(mHadBodyOrFrameset) { result=HandleOmittedTag(aToken,theChildTag,theParent,attrNode); if(result == NS_OK) return result; } break; default: break; } } PRBool theHeadIsParent=nsHTMLElement::IsChildOfHead(theChildTag); switch(theChildTag) { case eHTMLTag_area: if (mHasOpenMap && mSink) result=mSink->AddLeaf(attrNode); break; case eHTMLTag_comment: case eHTMLTag_userdefined: break; //drop them on the floor for now... case eHTMLTag_script: theHeadIsParent=(!mHasOpenBody); //intentionally fall through... mHasOpenScript=PR_TRUE; default: { if(theHeadIsParent) result=AddHeadLeaf(attrNode); else if(CanOmit(theParent,theChildTag)) result=HandleOmittedTag(aToken,theChildTag,theParent,attrNode); else result=HandleDefaultStartToken(aToken,theChildTag,attrNode); } break; } //switch //now do any post processing necessary on the tag... if(NS_OK==result) DidHandleStartTag(attrNode,theChildTag); } } //if if(eHTMLTag_newline==theChildTag) mLineNumber++; return result; } /** * Call this to see if you have a closeable peer on the stack that * is ABOVE one of its root tags. * * @update gess 4/11/99 * @param aRootTagList -- list of root tags for aTag * @param aTag -- tag to test for containership * @return PR_TRUE if given tag can contain other tags */ static PRBool HasCloseablePeerAboveRoot(CTagList& aRootTagList,nsEntryStack& aTagStack,eHTMLTags aTag,PRBool anEndTag) { PRInt32 theRootIndex=aRootTagList.GetTopmostIndexOf(aTagStack); CTagList* theCloseTags=(anEndTag) ? gHTMLElements[aTag].GetAutoCloseEndTags() : gHTMLElements[aTag].GetAutoCloseStartTags(); PRInt32 theChildIndex=-1; PRBool result=PR_FALSE; if(theCloseTags) { theChildIndex=theCloseTags->GetTopmostIndexOf(aTagStack); } else { if((anEndTag) || (!gHTMLElements[aTag].CanContainSelf())) theChildIndex=aTagStack.GetTopmostIndexOf(aTag); } // I changed this to theRootIndex<=theChildIndex so to handle this case: //