/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set sw=2 ts=2 et tw=80: */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ //#define ALLOW_TR_AS_CHILD_OF_TABLE //by setting this to true, TR is allowable directly in TABLE. #define ENABLE_RESIDUALSTYLE #include "nsDebug.h" #include "nsIAtom.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" #include "nsParser.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsScanner.h" #include "prenv.h" //this is here for debug reasons... #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" #include "nsDTDUtils.h" #include "nsHTMLTokenizer.h" #include "nsTime.h" #include "nsParserNode.h" #include "nsHTMLEntities.h" #include "nsLinebreakConverter.h" #include "nsIFormProcessor.h" #include "nsVoidArray.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" #include "prmem.h" #include "nsIServiceManager.h" #ifdef NS_DEBUG #include "nsLoggingSink.h" #endif /* * Ignore kFontStyle and kPhrase tags when the stack is deep, bug 58917. */ #define FONTSTYLE_IGNORE_DEPTH (MAX_REFLOW_DEPTH*80/100) #define PHRASE_IGNORE_DEPTH (MAX_REFLOW_DEPTH*90/100) static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); static NS_DEFINE_IID(kClassIID, NS_INAVHTML_DTD_IID); static NS_DEFINE_CID(kFormProcessorCID, NS_FORMPROCESSOR_CID); #ifdef DEBUG static const char kNullToken[] = "Error: Null token given"; static const char kInvalidTagStackPos[] = "Error: invalid tag stack position"; #endif #include "nsElementTable.h" #ifdef MOZ_PERF_METRICS # define START_TIMER() \ if(mParser) MOZ_TIMER_START(mParser->mParseTime); \ if(mParser) MOZ_TIMER_START(mParser->mDTDTime); # define STOP_TIMER() \ if(mParser) MOZ_TIMER_STOP(mParser->mParseTime); \ if(mParser) MOZ_TIMER_STOP(mParser->mDTDTime); #else # define STOP_TIMER() # define START_TIMER() #endif /************************************************************************ And now for the main class -- CNavDTD... ************************************************************************/ #define NS_DTD_FLAG_NONE 0x00000000 #define NS_DTD_FLAG_HAS_OPEN_HEAD 0x00000001 #define NS_DTD_FLAG_HAS_OPEN_BODY 0x00000002 #define NS_DTD_FLAG_HAS_OPEN_FORM 0x00000004 #define NS_DTD_FLAG_HAS_OPEN_SCRIPT 0x00000008 #define NS_DTD_FLAG_HAD_BODY 0x00000010 #define NS_DTD_FLAG_HAD_FRAMESET 0x00000020 #define NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE 0x00000040 #define NS_DTD_FLAG_ALTERNATE_CONTENT 0x00000080 // NOFRAMES, NOSCRIPT #define NS_DTD_FLAG_MISPLACED_CONTENT 0x00000100 #define NS_DTD_FLAG_IN_MISPLACED_CONTENT 0x00000200 #define NS_DTD_FLAG_STOP_PARSING 0x00000400 /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult CNavDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kIDTDIID)) { //do IParser base class... *aInstancePtr = (nsIDTD*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (CNavDTD*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } NS_IMPL_ADDREF(CNavDTD) NS_IMPL_RELEASE(CNavDTD) /** * Default constructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::CNavDTD() : nsIDTD(), mMisplacedContent(0), mSkippedContent(0), mSink(0), mTokenAllocator(0), mTempContext(0), mParser(0), mTokenizer(0), mDTDMode(eDTDMode_quirks), mDocType(eHTML3_Quirks), // why not eHTML_Quirks? mParserCommand(eViewNormal), mSkipTarget(eHTMLTag_unknown), mLineNumber(1), mOpenMapCount(0), mFlags(NS_DTD_FLAG_NONE) { mBodyContext=new nsDTDContext(); } /** * * @update gess1/8/99 * @param * @return */ const nsIID& CNavDTD::GetMostDerivedIID(void)const { return kClassIID; } #ifdef NS_DEBUG nsLoggingSink* GetLoggingSink() { //these are used when you want to generate a log file for contentsink construction... static PRBool checkForPath=PR_TRUE; static nsLoggingSink *theSink=0; static const char* gLogPath=0; if(checkForPath) { // we're only going to check the environment once per session. gLogPath = /* "c:/temp/parse.log"; */ PR_GetEnv("PARSE_LOGFILE"); checkForPath=PR_FALSE; } if(gLogPath && (!theSink)) { static nsLoggingSink gLoggingSink; PRIntn theFlags = 0; // create the file exists, only open for read/write // otherwise, create it if(PR_Access(gLogPath,PR_ACCESS_EXISTS) != PR_SUCCESS) theFlags = PR_CREATE_FILE; theFlags |= PR_RDWR; // open the record file PRFileDesc *theLogFile = PR_Open(gLogPath,theFlags,0); gLoggingSink.SetOutputStream(theLogFile,PR_TRUE); theSink=&gLoggingSink; } return theSink; } #endif /** * Default destructor * * @update gess 4/9/98 * @param * @return */ CNavDTD::~CNavDTD(){ if(mBodyContext) { delete mBodyContext; mBodyContext=0; } if(mTempContext) { delete mTempContext; mTempContext=0; } #ifdef NS_DEBUG if(mSink) { nsLoggingSink *theLogSink=GetLoggingSink(); if(mSink==theLogSink) { theLogSink->ReleaseProxySink(); } } #endif NS_IF_RELEASE(mSink); } /** * Call this method if you want the DTD to construct a fresh * instance of itself. * @update gess 25May2000 * @param * @return */ nsresult CNavDTD::CreateNewInstance(nsIDTD** aInstancePtrResult) { nsresult result = NS_NewNavHTMLDTD(aInstancePtrResult); NS_ENSURE_SUCCESS(result, result); CNavDTD* dtd = NS_STATIC_CAST(CNavDTD*, *aInstancePtrResult); dtd->mDTDMode = mDTDMode; dtd->mParserCommand = mParserCommand; dtd->mDocType = mDocType; return result; } /** * This method is called to determine if the given DTD can parse * a document in a given source-type. * NOTE: Parsing always assumes that the end result will involve * storing the result in the main content model. * @param aParserContext -- the context for this document (knows * the content type, document type, parser command, etc). * @return eUnknownDetect if you don't know how to parse it, * eValidDetect if you do, but someone may have a better idea, * ePrimaryDetect if you think you know best */ NS_IMETHODIMP_(eAutoDetectResult) CNavDTD::CanParse(CParserContext& aParserContext) { NS_ASSERTION(!aParserContext.mMimeType.IsEmpty(), "How'd we get here with an unknown type?"); if (aParserContext.mParserCommand != eViewSource && aParserContext.mDocType != eXML) { // This means that we're // 1) Looking at a type the parser claimed to know how to handle (so XML // or HTML or a plaintext type) // 2) Not looking at XML // // Therefore, we want to handle this data with this DTD return ePrimaryDetect; } return eUnknownDetect; } /** * The parser uses a code sandwich to wrap the parsing process. Before * the process begins, WillBuildModel() is called. Afterwards the parser * calls DidBuildModel(). * @update rickg 03.20.2000 * @param aParserContext * @param aSink * @return error code (almost always 0) */ nsresult CNavDTD::WillBuildModel(const CParserContext& aParserContext, nsITokenizer* aTokenizer, nsIContentSink* aSink) { nsresult result=NS_OK; mFilename=aParserContext.mScanner->GetFilename(); mFlags = NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE; // residual style is always on. This will also reset the flags mLineNumber = 1; mDTDMode = aParserContext.mDTDMode; mParserCommand = aParserContext.mParserCommand; mMimeType = aParserContext.mMimeType; mDocType = aParserContext.mDocType; mSkipTarget = eHTMLTag_unknown; mTokenizer = aTokenizer; mBodyContext->SetNodeAllocator(&mNodeAllocator); if(!aParserContext.mPrevContext && aSink) { STOP_TIMER(); MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); result = aSink->WillBuildModel(); MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); START_TIMER(); if (NS_SUCCEEDED(result) && !mSink) { result = CallQueryInterface(aSink, &mSink); if (NS_FAILED(result)) { mFlags |= NS_DTD_FLAG_STOP_PARSING; return result; } } //let's see if the environment is set up for us to write output to //a logging sink. If so, then we'll create one, and make it the //proxy for the real sink we're given from the parser. #ifdef NS_DEBUG nsLoggingSink *theLogSink=GetLoggingSink(); if(theLogSink) { theLogSink->SetProxySink(mSink); mSink=theLogSink; } #endif if(mSink) { PRBool enabled = PR_TRUE; mSink->IsEnabled(eHTMLTag_frameset, &enabled); if(enabled) { mFlags |= NS_IPARSER_FLAG_FRAMES_ENABLED; } mSink->IsEnabled(eHTMLTag_script, &enabled); if(enabled) { mFlags |= NS_IPARSER_FLAG_SCRIPT_ENABLED; } } } return result; } /** * This is called when it's time to read as many tokens from the tokenizer * as you can. Not all tokens may make sense, so you may not be able to * read them all (until more come in later). * * @update gess5/18/98 * @param aParser is the parser object that's driving this process * @return error code (almost always NS_OK) */ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { NS_PRECONDITION(mBodyContext!=nsnull,"Create a context before calling build model"); nsresult result = NS_OK; if (aTokenizer && aParser) { nsITokenizer* oldTokenizer = mTokenizer; mTokenizer = aTokenizer; mParser = (nsParser*)aParser; mTokenAllocator = mTokenizer->GetTokenAllocator(); if (mSink) { if (mBodyContext->GetCount() == 0) { CStartToken* theToken=nsnull; if(ePlainText==mDocType) { //we do this little trick for text files, in both normal and viewsource mode... theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_pre)); if(theToken) { mTokenizer->PushTokenFront(theToken); } } // always open a body if frames are disabled.... if(!(mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) { theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body"))); mTokenizer->PushTokenFront(theToken); } //if the content model is empty, then begin by opening ... theToken = (CStartToken*)mTokenizer->GetTokenAt(0); if (theToken) { eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID(); eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType()); if (theTag != eHTMLTag_html || theType != eToken_start) { theToken = NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html"))); if (theToken) { mTokenizer->PushTokenFront(theToken); //this token should get pushed on the context stack. } } } else { theToken = NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html"))); if (theToken) { mTokenizer->PushTokenFront(theToken); //this token should get pushed on the context stack. } } } mSink->WillProcessTokens(); while (NS_SUCCEEDED(result)) { if (!(mFlags & NS_DTD_FLAG_STOP_PARSING)) { CToken* theToken = mTokenizer->PopToken(); if (theToken) { result = HandleToken(theToken,aParser); } else break; } else { result = NS_ERROR_HTMLPARSER_STOPPARSING; break; } if ((NS_ERROR_HTMLPARSER_INTERRUPTED == mSink->DidProcessAToken())) { // The content sink has requested that DTD interrupt processing tokens // So we need to make sure the parser is in a state where it can be // interrupted. // The mParser->CanInterrupt will return TRUE if BuildModel was called // from a place in the parser where it prepared to handle a return value of // NS_ERROR_HTMLPARSER_INTERRUPTED. // If the parser has mPrevContext then it may be processing // Script so we should not allow it to be interrupted. // We also need to make sure that an interruption does not override // a request to block the parser. if ((mParser->CanInterrupt()) && (nsnull == mParser->PeekContext()->mPrevContext) && (eHTMLTag_unknown==mSkipTarget) && NS_SUCCEEDED(result)) { result = NS_ERROR_HTMLPARSER_INTERRUPTED; break; } } }//while mTokenizer = oldTokenizer; } else { result = mFlags & NS_DTD_FLAG_STOP_PARSING ? NS_ERROR_HTMLPARSER_STOPPARSING : result; } } return result; } /** * @param aTarget - Tag that was neglected in the document. * @param aType - Specifies the type of the target. Ex. start, end, text, etc. * @param aParser - Parser to drive this process * @param aSink - HTML Content sink */ nsresult CNavDTD::BuildNeglectedTarget(eHTMLTags aTarget, eHTMLTokenTypes aType, nsIParser* aParser, nsIContentSink* aSink) { NS_ASSERTION(mTokenizer, "tokenizer is null! unable to build target."); NS_ASSERTION(mTokenAllocator, "unable to create tokens without an allocator."); if (!mTokenizer || !mTokenAllocator) return NS_OK; CHTMLToken* target = NS_STATIC_CAST(CHTMLToken*, mTokenAllocator->CreateTokenOfType(aType, aTarget)); mTokenizer->PushTokenFront(target); return BuildModel(aParser, mTokenizer, 0, aSink); } /** * * @update gess5/18/98 * @param * @return */ nsresult CNavDTD::DidBuildModel(nsresult anErrorCode, PRBool aNotifySink, nsIParser* aParser, nsIContentSink* aSink) { if (!aSink) return NS_OK; nsresult result = NS_OK; if (aParser && aNotifySink) { if (NS_OK == anErrorCode) { if (eHTMLTag_unknown != mSkipTarget) { // Looks like there is an open target ( ex.
tag
and text in the head.) To move content, we throw it onto the misplacedcontent
deque until we can deal with it.
---------------------------------------------------------------------------------
*/
if(!execSkipContent) {
switch(theTag) {
case eHTMLTag_html:
case eHTMLTag_noframes:
case eHTMLTag_noscript:
case eHTMLTag_script:
case eHTMLTag_doctypeDecl:
case eHTMLTag_instruction:
break;
default:
if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) {
if(!(mFlags & (NS_DTD_FLAG_HAD_BODY |
NS_DTD_FLAG_HAD_FRAMESET |
NS_DTD_FLAG_ALTERNATE_CONTENT))) {
//For bug examples from this code, see bugs: 18928, 20989.
//At this point we know the body/frameset aren't open.
//If the child belongs in the head, then handle it (which may open the head);
//otherwise, push it onto the misplaced stack.
PRBool isExclusive=PR_FALSE;
PRBool theChildBelongsInHead=gHTMLElements[eHTMLTag_head].IsChildOfHead(theTag,isExclusive);
if(theChildBelongsInHead && !isExclusive) {
if (mMisplacedContent.GetSize() == 0) {
// This tag can either be in the body or the head. Since
// there is no indication that the body should be open,
// put this token in the head.
break;
}
// Otherwise, we have received some indication that the body is
// "open", so push this token onto the misplaced content stack.
theChildBelongsInHead = PR_FALSE;
}
if(!theChildBelongsInHead) {
//If you're here then we found a child of the body that was out of place.
//We're going to move it to the body by storing it temporarily on the misplaced stack.
//However, in quirks mode, a few tags request, ambiguosly, for a BODY. - Bugs 18928, 24204.-
PushIntoMisplacedStack(aToken);
if (IsAlternateTag(theTag)) {
// These tags' contents are consumed as CDATA. If we simply
// pushed them on the misplaced content stack, the CDATA
// contents would force us to open a body, which could be
// wrong. So we collect the whole tag as misplaced in one
// gulp. Note that the tokenizer guarantees that there will
// be an end tag.
CToken *current = aToken;
while (current->GetTokenType() != eToken_end ||
current->GetTypeID() != theTag) {
current = NS_STATIC_CAST(CToken *, mTokenizer->PopToken());
NS_ASSERTION(current, "The tokenizer is not creating good "
"alternate tags");
PushIntoMisplacedStack(current);
}
// XXX Add code to also collect incorrect attributes on the
// end tag.
}
if(DoesRequireBody(aToken,mTokenizer)) {
CToken* theBodyToken=NS_STATIC_CAST(CToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body")));
result=HandleToken(theBodyToken,aParser);
}
return result;
}
} //if
} //if
}//switch
} //if
if(theToken){
//Before dealing with the token normally, we need to deal with skip targets
CStartToken* theStartToken=NS_STATIC_CAST(CStartToken*,aToken);
if((!execSkipContent) &&
(theType!=eToken_end) &&
(eHTMLTag_unknown==mSkipTarget) &&
(gHTMLElements[theTag].mSkipTarget) &&
(!theStartToken->IsEmpty())) { // added empty token check for bug 44186
//create a new target
NS_ASSERTION(mSkippedContent.GetSize() == 0, "all the skipped content tokens did not get handled");
mSkippedContent.Empty();
mSkipTarget=gHTMLElements[theTag].mSkipTarget;
mSkippedContent.Push(theToken);
}
else {
mParser=(nsParser*)aParser;
switch(theType) {
case eToken_text:
case eToken_start:
case eToken_whitespace:
case eToken_newline:
result=HandleStartToken(theToken); break;
case eToken_end:
result=HandleEndToken(theToken); break;
case eToken_cdatasection:
case eToken_comment:
case eToken_markupDecl:
result=HandleCommentToken(theToken); break;
case eToken_entity:
result=HandleEntityToken(theToken); break;
case eToken_attribute:
result=HandleAttributeToken(theToken); break;
case eToken_instruction:
result=HandleProcessingInstructionToken(theToken); break;
case eToken_doctypeDecl:
result=HandleDocTypeDeclToken(theToken); break;
default:
break;
}//switch
if(NS_SUCCEEDED(result) || (NS_ERROR_HTMLPARSER_BLOCK==result)) {
IF_FREE(theToken, mTokenAllocator);
}
else if(result==NS_ERROR_HTMLPARSER_STOPPARSING) {
mFlags |= NS_DTD_FLAG_STOP_PARSING;
}
else {
return NS_OK;
}
}
}
}//if
return result;
}
/**
* This gets called after we've handled a given start tag.
* It's a generic hook to let us to post processing.
* @param aToken contains the tag in question
* @param aChildTag is the tag itself.
* @return status
*/
nsresult CNavDTD::DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag){
nsresult result=NS_OK;
switch(aChildTag){
case eHTMLTag_pre:
case eHTMLTag_listing:
{
CToken* theNextToken=mTokenizer->PeekToken();
if(theNextToken) {
eHTMLTokenTypes theType=eHTMLTokenTypes(theNextToken->GetTokenType());
if(eToken_newline==theType){
mLineNumber += theNextToken->GetNewlineCount();
theNextToken=mTokenizer->PopToken(); //skip 1st newline inside PRE and LISTING
IF_FREE(theNextToken, mTokenAllocator); // fix for Bug 29379
}//if
}//if
}
break;
default:
break;
}//switch
//handle