/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ #include "nsDebug.h" #include "nsIDTDDebug.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" #include "nsParser.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsScanner.h" #include "nsIDTDDebug.h" #include "prenv.h" //this is here for debug reasons... #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" #include "nsDTDUtils.h" #include "nsTagHandler.h" #include "nsHTMLTokenizer.h" #include "nsTime.h" #include "nsIElementObserver.h" #include "nsViewSourceHTML.h" #include "nsParserNode.h" #include "nsHTMLEntities.h" #ifdef XP_PC #include //this is here for debug reasons... #endif #include "prmem.h" #undef ENABLE_RESIDUALSTYLE //#define RICKG_DEBUG #ifdef RICKG_DEBUG #include #endif static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); static NS_DEFINE_IID(kClassIID, NS_INAVHTML_DTD_IID); static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* kVerificationDir = "c:/temp"; static CTokenRecycler* gRecycler=0; #ifdef RICKG_DEBUG static char gShowCRC=0; #endif static eHTMLTags gFormElementTags[]= { eHTMLTag_button, eHTMLTag_fieldset, eHTMLTag_input, eHTMLTag_isindex, eHTMLTag_label, eHTMLTag_legend, eHTMLTag_option, eHTMLTag_optgroup, eHTMLTag_select, eHTMLTag_textarea}; static eHTMLTags gWhitespaceTags[]={ eHTMLTag_newline, eHTMLTag_whitespace}; #include "nsElementTable.h" #ifdef MOZ_PERF_METRICS # define START_TIMER() \ if(mParser) MOZ_TIMER_START(mParser->mParseTime); \ if(mParser) MOZ_TIMER_START(mParser->mDTDTime); # define STOP_TIMER() \ if(mParser) MOZ_TIMER_STOP(mParser->mParseTime); \ if(mParser) MOZ_TIMER_STOP(mParser->mDTDTime); #else # define STOP_TIMER() # define START_TIMER() #endif /*************************************************************** This the ITagHandler deque deallocator, needed by the CTagHandlerRegister ***************************************************************/ class CTagHandlerDeallocator: public nsDequeFunctor{ public: virtual void* operator()(void* aObject) { nsITagHandler* tagHandler = (nsITagHandler*)aObject; delete tagHandler; return 0; } }; /*************************************************************** This funtor will be called for each item in the TagHandler que to check for a Tag name, and setting the current TagHandler when it is reached ***************************************************************/ class CTagFinder: public nsDequeFunctor{ public: CTagFinder(){} void Initialize(const nsString &aTagName) {mTagName = aTagName;} virtual ~CTagFinder() { } virtual void* operator()(void* aObject) { nsString* theString = ((nsITagHandler*)aObject)->GetString(); if( theString->Equals(mTagName)){ return aObject; } return(0); } nsAutoString mTagName; }; /*************************************************************** This a an object that will keep track of TagHandlers in the DTD. Uses a factory pattern ***************************************************************/ class CTagHandlerRegister { public: CTagHandlerRegister(); ~CTagHandlerRegister(); void RegisterTagHandler(nsITagHandler *aTagHandler){ mTagHandlerDeque.Push(aTagHandler); } nsITagHandler* FindTagHandler(const nsString &aTagName){ nsITagHandler* foundHandler = nsnull; mTagFinder.Initialize(aTagName); mTagHandlerDeque.Begin(); foundHandler = (nsITagHandler*) mTagHandlerDeque.FirstThat(mTagFinder); return foundHandler; } nsDeque mTagHandlerDeque; CTagFinder mTagFinder; }; MOZ_DECL_CTOR_COUNTER(CTagHandlerRegister); CTagHandlerRegister::CTagHandlerRegister() : mTagHandlerDeque(new CTagHandlerDeallocator()) { MOZ_COUNT_CTOR(CTagHandlerRegister); } CTagHandlerRegister::~CTagHandlerRegister() { MOZ_COUNT_DTOR(CTagHandlerRegister); } /************************************************************************ The CTagHandlerRegister for a CNavDTD. This is where special taghanders for our tags can be managed and called from Note: This can also be attached to some object so it can be refcounted and destroyed if you want this to go away when not imbedded. ************************************************************************/ //CTagHandlerRegister gTagHandlerRegister; /************************************************************************ And now for the main class -- CNavDTD... ************************************************************************/ /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult CNavDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kIDTDIID)) { //do IParser base class... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (CNavDTD*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } NS_IMPL_ADDREF(CNavDTD) NS_IMPL_RELEASE(CNavDTD) /** * Default constructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::CNavDTD() : nsIDTD(), mMisplacedContent(0), mSkippedContent(0), mSharedNodes(0) { NS_INIT_REFCNT(); mSink = 0; mParser=0; mDTDDebug=0; mLineNumber=1; mHasOpenBody=PR_FALSE; mHasOpenHead=0; mHasOpenForm=PR_FALSE; mHasOpenMap=PR_FALSE; mHeadContext=new nsDTDContext(); mBodyContext=new nsDTDContext(); mFormContext=0; mMapContext=0; mTokenizer=0; mComputedCRC32=0; mExpectedCRC32=0; mDTDState=NS_OK; if(!gHTMLElements) { InitializeElementTable(); } // DebugDumpContainmentRules2(*this,"c:/temp/DTDRules.new","New CNavDTD Containment Rules"); #ifdef RICKG_DEBUG nsHTMLElement::DebugDumpContainment("c:/temp/rules.new","ElementTable Rules"); nsHTMLElement::DebugDumpMembership("c:/temp/table.out"); nsHTMLElement::DebugDumpContainType("c:/temp/ctnrules.out"); #endif } nsCParserNode* CNavDTD::CreateNode(void) { nsCParserNode* result=0; if(0PopAttributeToken())){ gRecycler->RecycleToken(theToken); } mSharedNodes.Push(aNode); } } /** * * @update gess1/8/99 * @param * @return */ const nsIID& CNavDTD::GetMostDerivedIID(void)const { return kClassIID; } /** * Default destructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::~CNavDTD(){ delete mHeadContext; delete mBodyContext; if(mTokenizer) delete (nsHTMLTokenizer*)mTokenizer; nsCParserNode* theNode=0; while((theNode=(nsCParserNode*)mSharedNodes.Pop())){ delete theNode; } NS_IF_RELEASE(mSink); NS_IF_RELEASE(mDTDDebug); } /** * Call this method if you want the DTD to construct a fresh * instance of itself. * @update gess7/23/98 * @param * @return */ nsresult CNavDTD::CreateNewInstance(nsIDTD** aInstancePtrResult){ return NS_NewNavHTMLDTD(aInstancePtrResult); } /** * Called by the parser to initiate dtd verification of the * internal context stack. * @update gess 7/23/98 * @param * @return */ PRBool CNavDTD::Verify(nsString& aURLRef,nsIParser* aParser){ PRBool result=PR_TRUE; /* * Disable some DTD debugging code in the parser that * breaks on some compilers because of some broken * streams code in prstrm.cpp. */ #if !defined(MOZ_DISABLE_DTD_DEBUG) if(!mDTDDebug){ nsresult rval = NS_NewDTDDebug(&mDTDDebug); if (NS_OK != rval) { fputs("Cannot create parser debugger.\n", stdout); result=-PR_FALSE; } else mDTDDebug->SetVerificationDirectory(kVerificationDir); } #endif if(mDTDDebug) { // mDTDDebug->Verify(this,aParser,mBodyContext->GetCount(),mBodyContext->mStack,aURLRef); } return result; } /** * This method is called to determine if the given DTD can parse * a document in a given source-type. * NOTE: Parsing always assumes that the end result will involve * storing the result in the main content model. * @update gess6/24/98 * @param * @return TRUE if this DTD can satisfy the request; FALSE otherwise. */ eAutoDetectResult CNavDTD::CanParse(nsString& aContentType, nsString& aCommand, nsString& aBuffer, PRInt32 aVersion) { eAutoDetectResult result=eUnknownDetect; if(!aCommand.Equals(kViewSourceCommand)) { if(PR_TRUE==aContentType.Equals(kHTMLTextContentType)) { result=ePrimaryDetect; } else { //otherwise, look into the buffer to see if you recognize anything... if(BufferContainsHTML(aBuffer)){ result=ePrimaryDetect; if(0==aContentType.Length()) aContentType=kHTMLTextContentType; } } } return result; } /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::WillBuildModel(nsString& aFilename,PRBool aNotifySink,nsString& aSourceType,eParseMode aParseMode,nsIContentSink* aSink){ nsresult result=NS_OK; mFilename=aFilename; mHasOpenBody=PR_FALSE; mHadBody=PR_FALSE; mHadFrameset=PR_FALSE; mLineNumber=1; mHasOpenScript=PR_FALSE; mParseMode=aParseMode; if((aNotifySink) && (aSink)) { MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); STOP_TIMER(); if(aSink && (!mSink)) { result=aSink->QueryInterface(kIHTMLContentSinkIID, (void **)&mSink); } result = aSink->WillBuildModel(); MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); START_TIMER(); nsAutoString theTagName("html"); CStartToken theToken(theTagName,eHTMLTag_html); HandleStartToken(&theToken); mSkipTarget=eHTMLTag_unknown; mComputedCRC32=0; mExpectedCRC32=0; } return result; } /** * This is called when it's time to read as many tokens from the tokenizer * as you can. Not all tokens may make sense, so you may not be able to * read them all (until more come in later). * * @update gess5/18/98 * @param aParser is the parser object that's driving this process * @return error code (almost always NS_OK) */ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { nsresult result=NS_OK; if(aTokenizer) { nsITokenizer* oldTokenizer=mTokenizer; mTokenizer=aTokenizer; mParser=(nsParser*)aParser; if(mSink) { gRecycler=(CTokenRecycler*)mTokenizer->GetTokenRecycler(); while(NS_SUCCEEDED(result)){ if(mDTDState!=NS_ERROR_HTMLPARSER_STOPPARSING) { CToken* theToken=mTokenizer->PopToken(); if(theToken) { result=HandleToken(theToken,aParser); } else break; } else { result=mDTDState; break; } }//while mTokenizer=oldTokenizer; } } else result=NS_ERROR_HTMLPARSER_BADTOKENIZER; return result; } /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink){ nsresult result=NS_OK; if(aSink) { if((NS_OK==anErrorCode) && (!mHadBody) && (!mHadFrameset)) { CStartToken theToken(eHTMLTag_body); //open the body container... result=HandleStartToken(&theToken); mTokenizer->PrependTokens(mMisplacedContent); //push misplaced content result=BuildModel(aParser,mTokenizer,0,aSink); } if(aParser){ if(aNotifySink){ if((NS_OK==anErrorCode) && (mBodyContext->GetCount()>0)) { if(mSkipTarget) { CHTMLToken* theEndToken=nsnull; theEndToken=(CHTMLToken*)gRecycler->CreateTokenOfType(eToken_end,mSkipTarget); if(theEndToken) result=HandleToken(theEndToken,mParser); } if(result==NS_OK) { eHTMLTags theTarget; while(mBodyContext->GetCount() > 0) { theTarget = mBodyContext->Last(); if(gHTMLElements[theTarget].HasSpecialProperty(kBadContentWatch)) result = HandleSavedTokensAbove(theTarget); CloseContainersTo(theTarget,PR_FALSE); } } } MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::DidBuildModel(), this=%p\n", this)); STOP_TIMER(); #ifdef RICKG_DEBUG //let's only grab this state once! if(!gShowCRC) { gShowCRC=1; //this only indicates we'll not initialize again. char* theEnvString = PR_GetEnv("RICKG_CRC"); if(theEnvString){ if(('1'==theEnvString[0]) || ('Y'==theEnvString[0]) || ('y'==theEnvString[0])){ gShowCRC=2; //this indicates that the CRC flag was found in the environment. } } } if(2==gShowCRC) { if(mComputedCRC32!=mExpectedCRC32) { if(mExpectedCRC32!=0) { printf("CRC Computed: %u Expected CRC: %u\n,",mComputedCRC32,mExpectedCRC32); result = aSink->DidBuildModel(2); } else { printf("Computed CRC: %u.\n",mComputedCRC32); result = aSink->DidBuildModel(3); } } else result = aSink->DidBuildModel(0); } else result=aSink->DidBuildModel(0); #else result=aSink->DidBuildModel(0); #endif MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::DidBuildModel(), this=%p\n", this)); START_TIMER(); if(mDTDDebug) { mDTDDebug->DumpVectorRecord(); } } } } return result; } /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be * moved over to the delegate. Ah, so much to do... * * @update gess 5/21/98 * @param aToken * @param aParser * @return */ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){ nsresult result=NS_OK; if(aToken) { CHTMLToken* theToken= (CHTMLToken*)(aToken); eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); PRBool execSkipContent=PR_FALSE; theToken->mRecycle=PR_TRUE; //assume every token coming into this system needs recycling. /* --------------------------------------------------------------------------------- To understand this little piece of code, you need to look below too. In essence, this code caches "skipped content" until we find a given skiptarget. Once we find the skiptarget, we take all skipped content up to that point and coallate it. Then we push those tokens back onto the tokenizer deque. --------------------------------------------------------------------------------- */ if(mSkipTarget){ //handle a preexisting target... if((theTag==mSkipTarget) && (eToken_end==theType)){ mSkipTarget=eHTMLTag_unknown; //stop skipping. //mTokenizer->PushTokenFront(aToken); //push the end token... execSkipContent=PR_TRUE; gRecycler->RecycleToken(aToken); theToken=(CHTMLToken*)mSkippedContent.PopFront(); theType=eToken_start; // result=HandleStartToken(theToken); } else { mSkippedContent.Push(theToken); return result; } } /* --------------------------------------------------------------------------------- This section of code is used to "move" misplaced content from one location in our document model to another. (Consider what would happen if we found a

tag and text in the head.) To move content, we throw it onto the misplacedcontent deque until we can deal with it. --------------------------------------------------------------------------------- */ if(!execSkipContent) { static eHTMLTags passThru[]= { eHTMLTag_html,eHTMLTag_comment,eHTMLTag_newline, eHTMLTag_whitespace,eHTMLTag_script,eHTMLTag_noscript, eHTMLTag_nolayer,eHTMLTag_markupDecl,eHTMLTag_userdefined}; if(!FindTagInSet(theTag,passThru,sizeof(passThru)/sizeof(eHTMLTag_unknown))){ if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) { if((!mHadBody) && (!mHadFrameset)){ if(mHasOpenHead) { //just fall through and handle current token if(!gHTMLElements[eHTMLTag_head].IsChildOfHead(theTag)){ mMisplacedContent.Push(aToken); aToken->mRecycle=PR_FALSE; return result; } } else { if(gHTMLElements[eHTMLTag_body].SectionContains(theTag,PR_TRUE)){ mTokenizer->PushTokenFront(aToken); //put this token back... mTokenizer->PrependTokens(mMisplacedContent); //push misplaced content theToken=(CHTMLToken*)gRecycler->CreateTokenOfType(eToken_start,theTag=eHTMLTag_body); theType=eToken_start; //now open a body... } } } } } } if(theToken){ //Before dealing with the token normally, we need to deal with skip targets if((!execSkipContent) && (theType!=eToken_end) && (eHTMLTag_unknown==mSkipTarget) && (gHTMLElements[theTag].mSkipTarget)){ //create a new target mSkipTarget=gHTMLElements[theTag].mSkipTarget; mSkippedContent.Push(theToken); } else { mParser=(nsParser*)aParser; switch(theType) { case eToken_start: case eToken_whitespace: case eToken_newline: case eToken_text: result=HandleStartToken(theToken); break; case eToken_end: result=HandleEndToken(theToken); break; case eToken_comment: result=HandleCommentToken(theToken); break; case eToken_entity: result=HandleEntityToken(theToken); break; case eToken_attribute: result=HandleAttributeToken(theToken); break; case eToken_style: result=HandleStyleToken(theToken); break; case eToken_instruction: result=HandleProcessingInstructionToken(theToken); break; case eToken_doctypeDecl: result=HandleDocTypeDeclToken(theToken); break; default: break; }//switch if(NS_SUCCEEDED(result) || (NS_ERROR_HTMLPARSER_BLOCK==result)) { if(theToken->mRecycle) gRecycler->RecycleToken(theToken); } else if(result==NS_ERROR_HTMLPARSER_STOPPARSING) return result; else return NS_OK; /*************************************************************/ // CAUTION: Here we are forgetting to push the ATTRIBUTE Tokens. // So, before you uncomment this part please make sure // that the attribute tokens are also accounted for. //else if(NS_ERROR_HTMLPARSER_MISPLACED!=result) // mTokenizer->PushTokenFront(theToken); //else result=NS_OK; /***************************************************************/ #if 0 if (mDTDDebug) { mDTDDebug->Verify(this, mParser, mBodyContext->GetCount(), mBodyContext->mStack, mFilename); } #endif } } }//if return result; } /** * This method causes all tokens to be dispatched to the given tag handler. * * @update gess 3/25/98 * @param aHandler -- object to receive subsequent tokens... * @return error code (usually 0) */ nsresult CNavDTD::CaptureTokenPump(nsITagHandler* aHandler) { nsresult result=NS_OK; return result; } /** * This method releases the token-pump capture obtained in CaptureTokenPump() * * @update gess 3/25/98 * @param aHandler -- object that received tokens... * @return error code (usually 0) */ nsresult CNavDTD::ReleaseTokenPump(nsITagHandler* aHandler){ nsresult result=NS_OK; return result; } /** * This gets called after we've handled a given start tag. * It's a generic hook to let us to post processing. * @param aToken contains the tag in question * @param aChildTag is the tag itself. * @return status */ nsresult CNavDTD::DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag){ nsresult result=NS_OK; switch(aChildTag){ case eHTMLTag_pre: case eHTMLTag_listing: { CToken* theNextToken=mTokenizer->PeekToken(); if(theNextToken) { eHTMLTokenTypes theType=eHTMLTokenTypes(theNextToken->GetTokenType()); if(eToken_newline==theType){ mTokenizer->PopToken(); //skip 1st newline inside PRE and LISTING }//if }//if } break; case eHTMLTag_plaintext: case eHTMLTag_xmp: //grab the skipped content and dump it out as text... { MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this)); STOP_TIMER() const nsString& theText=aNode.GetSkippedContent(); if(0=0;i--){ if(aTagStack[i]==aTag) return i; } return kNotFound; } #endif /** * Call this to find the index of a given child, or (if not found) * the index of its nearest synonym. * * @update gess 3/25/98 * @param aTagStack -- list of open tags * @param aTag -- tag to test for containership * @return index of kNotFound */ static PRInt32 GetIndexOfChildOrSynonym(nsEntryStack& aTagStack,eHTMLTags aChildTag) { PRInt32 theChildIndex=aTagStack.GetTopmostIndexOf(aChildTag); if(kNotFound==theChildIndex) { TagList* theSynTags=gHTMLElements[aChildTag].GetSynonymousTags(); //get the list of tags that THIS tag can close if(theSynTags) { theChildIndex=GetTopmostIndexOf(aTagStack,*theSynTags); } else{ theChildIndex=aTagStack.GetCount(); PRInt32 theGroup=nsHTMLElement::GetSynonymousGroups(gHTMLElements[aChildTag].mParentBits); while(-1<--theChildIndex) { eHTMLTags theTag=aTagStack[theChildIndex]; if(gHTMLElements[theTag].IsMemberOf(theGroup)) { break; } } } } return theChildIndex; } /** * This method is called to determine whether or not the child * tag is happy being OPENED in the context of the current * tag stack. This is only called if the current parent thinks * it wants to contain the given childtag. * * @param aChildTag -- tag enum of child to be opened * @param aTagStack -- ref to current tag stack in DTD. * @return PR_TRUE if child agrees to be opened here. */ static PRBool CanBeContained(eHTMLTags aChildTag,nsEntryStack& aTagStack) { /* # Interesting test cases: Result: * 1.