/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ //#define ENABLE_CRC //#define ALLOW_TR_AS_CHILD_OF_TABLE //by setting this to true, TR is allowable directly in TABLE. #define ENABLE_RESIDUALSTYLE #include "nsDebug.h" #include "nsIAtom.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" #include "nsParser.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsScanner.h" #include "prenv.h" //this is here for debug reasons... #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" #include "nsDTDUtils.h" #include "nsHTMLTokenizer.h" #include "nsTime.h" #include "nsParserNode.h" #include "nsHTMLEntities.h" #include "nsLinebreakConverter.h" #include "nsIFormProcessor.h" #include "nsVoidArray.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" #include "prmem.h" #include "nsIServiceManager.h" #ifdef NS_DEBUG #include "nsLoggingSink.h" #endif static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); static NS_DEFINE_IID(kClassIID, NS_INAVHTML_DTD_IID); static NS_DEFINE_CID(kFormProcessorCID, NS_FORMPROCESSOR_CID); #ifdef DEBUG static const char kNullToken[] = "Error: Null token given"; static const char kInvalidTagStackPos[] = "Error: invalid tag stack position"; #endif #ifdef ENABLE_CRC static char gShowCRC; #endif #include "nsElementTable.h" #ifdef MOZ_PERF_METRICS # define START_TIMER() \ if(mParser) MOZ_TIMER_START(mParser->mParseTime); \ if(mParser) MOZ_TIMER_START(mParser->mDTDTime); # define STOP_TIMER() \ if(mParser) MOZ_TIMER_STOP(mParser->mParseTime); \ if(mParser) MOZ_TIMER_STOP(mParser->mDTDTime); #else # define STOP_TIMER() # define START_TIMER() #endif /************************************************************************ And now for the main class -- CNavDTD... ************************************************************************/ #define NS_DTD_FLAG_NONE 0x00000000 #define NS_DTD_FLAG_HAS_OPEN_HEAD 0x00000001 #define NS_DTD_FLAG_HAS_OPEN_BODY 0x00000002 #define NS_DTD_FLAG_HAS_OPEN_FORM 0x00000004 #define NS_DTD_FLAG_HAS_OPEN_SCRIPT 0x00000008 #define NS_DTD_FLAG_HAD_BODY 0x00000010 #define NS_DTD_FLAG_HAD_FRAMESET 0x00000020 #define NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE 0x00000040 #define NS_DTD_FLAG_SCRIPT_ENABLED 0x00000100 #define NS_DTD_FLAG_FRAMES_ENABLED 0x00000200 #define NS_DTD_FLAG_ALTERNATE_CONTENT 0x00000400 // NOFRAMES, NOSCRIPT #define NS_DTD_FLAG_MISPLACED_CONTENT 0x00000800 #define NS_DTD_FLAG_STOP_PARSING 0x00001000 /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult CNavDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kIDTDIID)) { //do IParser base class... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (CNavDTD*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } NS_IMPL_ADDREF(CNavDTD) NS_IMPL_RELEASE(CNavDTD) /** * Default constructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::CNavDTD() : nsIDTD(), mMisplacedContent(0), mSkippedContent(0), mSink(0), mTokenAllocator(0), mTempContext(0), mParser(0), mTokenizer(0), mDTDMode(eDTDMode_quirks), mDocType(eHTML3_Quirks), // why not eHTML_Quirks? mParserCommand(eViewNormal), mSkipTarget(eHTMLTag_unknown), mLineNumber(1), mOpenMapCount(0), mFlags(NS_DTD_FLAG_NONE) #ifdef ENABLE_CRC ,mComputedCRC32(0), mExpectedCRC32(0) #endif { mBodyContext=new nsDTDContext(); } /** * * @update gess1/8/99 * @param * @return */ const nsIID& CNavDTD::GetMostDerivedIID(void)const { return kClassIID; } #ifdef NS_DEBUG nsLoggingSink* GetLoggingSink() { //these are used when you want to generate a log file for contentsink construction... static PRBool checkForPath=PR_TRUE; static nsLoggingSink *theSink=0; static const char* gLogPath=0; if(checkForPath) { // we're only going to check the environment once per session. gLogPath = /* "c:/temp/parse.log"; */ PR_GetEnv("PARSE_LOGFILE"); checkForPath=PR_FALSE; } if(gLogPath && (!theSink)) { static nsLoggingSink gLoggingSink; PRIntn theFlags = 0; // create the file exists, only open for read/write // otherwise, create it if(PR_Access(gLogPath,PR_ACCESS_EXISTS) != PR_SUCCESS) theFlags = PR_CREATE_FILE; theFlags |= PR_RDWR; // open the record file PRFileDesc *theLogFile = PR_Open(gLogPath,theFlags,0); gLoggingSink.SetOutputStream(theLogFile,PR_TRUE); theSink=&gLoggingSink; } return theSink; } #endif /** * Default destructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::~CNavDTD(){ if(mBodyContext) { delete mBodyContext; mBodyContext=0; } if(mTempContext) { delete mTempContext; mTempContext=0; } #ifdef NS_DEBUG if(mSink) { nsLoggingSink *theLogSink=GetLoggingSink(); if(mSink==theLogSink) { theLogSink->ReleaseProxySink(); } } #endif NS_IF_RELEASE(mSink); } /** * Call this method if you want the DTD to construct a fresh * instance of itself. * @update gess 25May2000 * @param * @return */ nsresult CNavDTD::CreateNewInstance(nsIDTD** aInstancePtrResult) { nsresult result = NS_NewNavHTMLDTD(aInstancePtrResult); NS_ENSURE_SUCCESS(result, result); CNavDTD* dtd = NS_STATIC_CAST(CNavDTD*, *aInstancePtrResult); dtd->mDTDMode = mDTDMode; dtd->mParserCommand = mParserCommand; dtd->mDocType = mDocType; return result; } /** * This method is called to determine if the given DTD can parse * a document in a given source-type. * NOTE: Parsing always assumes that the end result will involve * storing the result in the main content model. * @param aParserContext -- the context for this document (knows * the content type, document type, parser command, etc). * @return eUnknownDetect if you don't know how to parse it, * eValidDetect if you do, but someone may have a better idea, * ePrimaryDetect if you think you know best */ NS_IMETHODIMP_(eAutoDetectResult) CNavDTD::CanParse(CParserContext& aParserContext) { NS_ASSERTION(!aParserContext.mMimeType.IsEmpty(), "How'd we get here with an unknown type?"); if (aParserContext.mParserCommand != eViewSource && aParserContext.mDocType != eXML) { // This means that we're // 1) Looking at a type the parser claimed to know how to handle (so XML // or HTML or a plaintext type) // 2) Not looking at XML // // Therefore, we want to handle this data with this DTD return ePrimaryDetect; } return eUnknownDetect; } /** * The parser uses a code sandwich to wrap the parsing process. Before * the process begins, WillBuildModel() is called. Afterwards the parser * calls DidBuildModel(). * @update rickg 03.20.2000 * @param aParserContext * @param aSink * @return error code (almost always 0) */ nsresult CNavDTD::WillBuildModel(const CParserContext& aParserContext, nsITokenizer* aTokenizer, nsIContentSink* aSink) { nsresult result=NS_OK; mFilename=aParserContext.mScanner->GetFilename(); mFlags = NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE; // residual style is always on. This will also reset the flags mLineNumber = 1; mDTDMode = aParserContext.mDTDMode; mParserCommand = aParserContext.mParserCommand; mMimeType = aParserContext.mMimeType; mDocType = aParserContext.mDocType; mSkipTarget = eHTMLTag_unknown; mTokenizer = aTokenizer; mBodyContext->SetNodeAllocator(&mNodeAllocator); if(!aParserContext.mPrevContext && aSink) { #ifdef DEBUG mBodyContext->ResetCounters(); #endif STOP_TIMER(); MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); result = aSink->WillBuildModel(); MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); START_TIMER(); if (NS_SUCCEEDED(result) && !mSink) { result = CallQueryInterface(aSink, &mSink); if (NS_FAILED(result)) { mFlags |= NS_DTD_FLAG_STOP_PARSING; return result; } } //let's see if the environment is set up for us to write output to //a logging sink. If so, then we'll create one, and make it the //proxy for the real sink we're given from the parser. #ifdef NS_DEBUG nsLoggingSink *theLogSink=GetLoggingSink(); if(theLogSink) { theLogSink->SetProxySink(mSink); mSink=theLogSink; } #endif if(mSink) { PRBool enabled; mSink->IsEnabled(eHTMLTag_frameset, &enabled); if(enabled) { mFlags |= NS_DTD_FLAG_FRAMES_ENABLED; } mSink->IsEnabled(eHTMLTag_script, &enabled); if(enabled) { mFlags |= NS_DTD_FLAG_SCRIPT_ENABLED; } } #ifdef ENABLE_CRC mComputedCRC32=0; mExpectedCRC32=0; #endif } return result; } /** * This is called when it's time to read as many tokens from the tokenizer * as you can. Not all tokens may make sense, so you may not be able to * read them all (until more come in later). * * @update gess5/18/98 * @param aParser is the parser object that's driving this process * @return error code (almost always NS_OK) */ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { NS_PRECONDITION(mBodyContext!=nsnull,"Create a context before calling build model"); nsresult result = NS_OK; if (aTokenizer && aParser) { nsITokenizer* oldTokenizer = mTokenizer; mTokenizer = aTokenizer; mParser = (nsParser*)aParser; mTokenAllocator = mTokenizer->GetTokenAllocator(); if (mSink) { if (mBodyContext->GetCount() == 0) { CStartToken* theToken=nsnull; if(ePlainText==mDocType) { //we do this little trick for text files, in both normal and viewsource mode... theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_pre)); if(theToken) { mTokenizer->PushTokenFront(theToken); } } // always open a body if frames are disabled.... if(!(mFlags & NS_DTD_FLAG_FRAMES_ENABLED)) { theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body"))); mTokenizer->PushTokenFront(theToken); } //if the content model is empty, then begin by opening ... theToken = (CStartToken*)mTokenizer->GetTokenAt(0); if (theToken) { eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID(); eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType()); if (theTag != eHTMLTag_html || theType != eToken_start) { theToken = NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html"))); if (theToken) { mTokenizer->PushTokenFront(theToken); //this token should get pushed on the context stack. } } } else { theToken = NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html"))); if (theToken) { mTokenizer->PushTokenFront(theToken); //this token should get pushed on the context stack. } } } mSink->WillProcessTokens(); while (NS_SUCCEEDED(result)) { if (!(mFlags & NS_DTD_FLAG_STOP_PARSING)) { CToken* theToken = mTokenizer->PopToken(); if (theToken) { result = HandleToken(theToken,aParser); } else break; } else { result = NS_ERROR_HTMLPARSER_STOPPARSING; break; } if ((NS_ERROR_HTMLPARSER_INTERRUPTED == mSink->DidProcessAToken())) { // The content sink has requested that DTD interrupt processing tokens // So we need to make sure the parser is in a state where it can be // interrupted. // The mParser->CanInterrupt will return TRUE if BuildModel was called // from a place in the parser where it prepared to handle a return value of // NS_ERROR_HTMLPARSER_INTERRUPTED. // If the parser has mPrevContext then it may be processing // Script so we should not allow it to be interrupted. if ((mParser->CanInterrupt()) && (nsnull == mParser->PeekContext()->mPrevContext) && (eHTMLTag_unknown==mSkipTarget)) { result = NS_ERROR_HTMLPARSER_INTERRUPTED; break; } } }//while mTokenizer = oldTokenizer; } else { result = mFlags & NS_DTD_FLAG_STOP_PARSING ? NS_ERROR_HTMLPARSER_STOPPARSING : result; } } return result; } /** * @param aTarget - Tag that was neglected in the document. * @param aType - Specifies the type of the target. Ex. start, end, text, etc. * @param aParser - Parser to drive this process * @param aSink - HTML Content sink */ nsresult CNavDTD::BuildNeglectedTarget(eHTMLTags aTarget, eHTMLTokenTypes aType, nsIParser* aParser, nsIContentSink* aSink) { NS_ASSERTION(mTokenizer, "tokenizer is null! unable to build target."); NS_ASSERTION(mTokenAllocator, "unable to create tokens without an allocator."); if (!mTokenizer || !mTokenAllocator) return NS_OK; if (eHTMLTag_unknown != mSkipTarget && eHTMLTag_title == aTarget) { PRInt32 size = mSkippedContent.GetSize(); // Note: The first location of the skipped content // deque contains the opened-skip-target. Do not include // that when guessing title contents. The term "guessing" // is used because the document did not contain an end title // and hence it's almost impossible to know what markup // should belong in the title. The assumption used here is that // if the markup is anything other than "text", or "entity" or, // "whitespace" then it's least likely to belong in the title. PRInt32 index; for (index = 1; index < size; index++) { CHTMLToken* token = NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.ObjectAt(index)); NS_ASSERTION(token, "there is a null token in the skipped content list!"); eHTMLTokenTypes type = eHTMLTokenTypes(token->GetTokenType()); if (eToken_whitespace != type && eToken_newline != type && eToken_text != type && eToken_entity != type && eToken_attribute != type) { // Now pop the tokens that do not belong ( just a guess work ) // in the title and push them into the tokens queue. while (size != index++) { token = NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.Pop()); mTokenizer->PushTokenFront(token); } break; } } } CHTMLToken* target = NS_STATIC_CAST(CHTMLToken*, mTokenAllocator->CreateTokenOfType(aType, aTarget)); mTokenizer->PushTokenFront(target); return BuildModel(aParser, mTokenizer, 0, aSink); } /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::DidBuildModel(nsresult anErrorCode, PRBool aNotifySink, nsIParser* aParser, nsIContentSink* aSink) { if (!aSink) return NS_OK; nsresult result = NS_OK; if (aParser && aNotifySink) { if (NS_OK == anErrorCode) { if (eHTMLTag_unknown != mSkipTarget) { // Looks like there is an open target ( ex.