328 lines
11 KiB
C++
328 lines
11 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
|
|
|
|
#include "CParserContext.h"
|
|
#include "nsToken.h"
|
|
#include "prenv.h"
|
|
|
|
MOZ_DECL_CTOR_COUNTER(CParserContext);
|
|
|
|
/**
|
|
* Your friendly little constructor. Ok, it's not the friendly, but the only guy
|
|
* using it is the parser.
|
|
* @update gess7/23/98
|
|
* @param aScanner
|
|
* @param aKey
|
|
* @param aListener
|
|
*/
|
|
CParserContext::CParserContext(nsScanner* aScanner,
|
|
void *aKey,
|
|
eParserCommands aCommand,
|
|
nsIStreamObserver* aListener,
|
|
nsIDTD *aDTD,
|
|
eAutoDetectResult aStatus,
|
|
PRBool aCopyUnused)
|
|
{
|
|
MOZ_COUNT_CTOR(CParserContext);
|
|
|
|
mScanner=aScanner;
|
|
mKey=aKey;
|
|
mPrevContext=0;
|
|
mListener=aListener;
|
|
NS_IF_ADDREF(mListener);
|
|
mParseMode=eParseMode_unknown;
|
|
mAutoDetectStatus=aStatus;
|
|
mTransferBuffer=0;
|
|
mDTD=aDTD;
|
|
NS_IF_ADDREF(mDTD);
|
|
mTransferBufferSize=eTransferBufferSize;
|
|
mParserEnabled=PR_TRUE;
|
|
mStreamListenerState=eNone;
|
|
mMultipart=PR_TRUE;
|
|
mContextType=eCTNone;
|
|
mCopyUnused=aCopyUnused;
|
|
mParserCommand=aCommand;
|
|
mChannel=0;
|
|
}
|
|
|
|
/**
|
|
* Your friendly little constructor. Ok, it's not the friendly, but the only guy
|
|
* using it is the parser.
|
|
* @update gess7/23/98
|
|
* @param aScanner
|
|
* @param aKey
|
|
* @param aListener
|
|
*/
|
|
CParserContext::CParserContext(const CParserContext &aContext) : mMimeType() {
|
|
MOZ_COUNT_CTOR(CParserContext);
|
|
|
|
mScanner=aContext.mScanner;
|
|
mKey=aContext.mKey;
|
|
mPrevContext=0;
|
|
mListener=aContext.mListener;
|
|
NS_IF_ADDREF(mListener);
|
|
|
|
mParseMode=aContext.mParseMode;
|
|
mAutoDetectStatus=aContext.mAutoDetectStatus;
|
|
mTransferBuffer=aContext.mTransferBuffer;
|
|
mDTD=aContext.mDTD;
|
|
NS_IF_ADDREF(mDTD);
|
|
|
|
mTransferBufferSize=eTransferBufferSize;
|
|
mParserEnabled=aContext.mParserEnabled;
|
|
mStreamListenerState=aContext.mStreamListenerState;
|
|
mMultipart=aContext.mMultipart;
|
|
mContextType=aContext.mContextType;
|
|
mChannel=aContext.mChannel;
|
|
mParserCommand=aContext.mParserCommand;
|
|
SetMimeType(aContext.mMimeType);
|
|
}
|
|
|
|
|
|
/**
|
|
* Destructor for parser context
|
|
* NOTE: DO NOT destroy the dtd here.
|
|
* @update gess7/11/98
|
|
*/
|
|
CParserContext::~CParserContext(){
|
|
|
|
MOZ_COUNT_DTOR(CParserContext);
|
|
|
|
if(mScanner)
|
|
delete mScanner;
|
|
|
|
if(mTransferBuffer)
|
|
delete [] mTransferBuffer;
|
|
|
|
NS_IF_RELEASE(mDTD);
|
|
|
|
//Remember that it's ok to simply ingore the PrevContext.
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* Set's the mimetype for this context
|
|
* @update rickg 03.18.2000
|
|
*/
|
|
void CParserContext::SetMimeType(const nsString& aMimeType){
|
|
mMimeType.Assign(aMimeType);
|
|
|
|
mDocType=ePlainText;
|
|
|
|
if(mMimeType.EqualsWithConversion(kHTMLTextContentType))
|
|
mDocType=eHTML4Text;
|
|
else if(mMimeType.EqualsWithConversion(kXMLTextContentType))
|
|
mDocType=eXMLText;
|
|
else if(mMimeType.EqualsWithConversion(kXULTextContentType))
|
|
mDocType=eXMLText;
|
|
else if(mMimeType.EqualsWithConversion(kRDFTextContentType))
|
|
mDocType=eXMLText;
|
|
else if(mMimeType.EqualsWithConversion(kXIFTextContentType))
|
|
mDocType=eXMLText;
|
|
}
|
|
|
|
/*************************************************************************************************
|
|
First, let's define our modalities:
|
|
|
|
1. compatibility-mode: behave as much like nav4 as possible (unless it's too broken to bother)
|
|
2. standard-mode: do html as well as you can per spec, and throw out navigator quirks
|
|
3. strict-mode: adhere to the strict DTD specificiation to the highest degree possible
|
|
|
|
Assume the doctype is in the following form:
|
|
<!DOCTYPE [Top Level Element] [Availability] "[Registration]// [Owner-ID] // [Type] [desc-text] // [Language]" "URI|text-identifier">
|
|
[HTML] [PUBLIC|...] [+|-] [W3C|IETF|...] [DTD] "..." [EN]|...] "..."
|
|
|
|
|
|
Here are the new rules for DTD handling; comments welcome:
|
|
|
|
XHTML and XML documents are always strict-mode:
|
|
example: <!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">
|
|
|
|
HTML strict dtd's enable strict-mode:
|
|
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
|
|
example: <!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">
|
|
|
|
HTML 4.0 (or greater) transitional, frameset, (etc), without URI enables compatibility-mode:
|
|
example: <!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">
|
|
|
|
HTML 4.0 (or greater) transitional, frameset, (etc), with a URI that points to the strict.dtd will become strict:
|
|
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
|
|
"http://www.w3.org/TR/REC-html40/strict.dtd">
|
|
|
|
doctypes with systemID's or internal subset are handled in strict-mode:
|
|
example: <!DOCTYPE HTML PUBLIC PublicID SystemID>
|
|
example: <!DOCTYPE HTML SYSTEM SystemID>
|
|
example: <!DOCTYPE HTML (PUBLIC PublicID SystemID? | SYSTEM SystemID) [ Internal-SS ]>
|
|
|
|
All other doctypes (<4.0), and documents without a doctype are handled in compatibility-mode.
|
|
|
|
*****************************************************************************************************/
|
|
|
|
/**
|
|
* This is called when it's time to find out
|
|
* what mode the parser/DTD should run for this document.
|
|
* (Each parsercontext can have it's own mode).
|
|
*
|
|
* @update gess 02/17/00
|
|
* @return parsermode (define in nsIParser.h)
|
|
*/
|
|
eParseMode CParserContext::DetermineParseMode(const nsString& theBuffer) {
|
|
const char* theModeStr= PR_GetEnv("PARSE_MODE");
|
|
|
|
mParseMode = eParseMode_unknown;
|
|
|
|
PRInt32 theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,0,10);
|
|
if(kNotFound<theIndex) {
|
|
|
|
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
|
|
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
|
|
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
|
|
PRInt32 theSubIndex=theBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8)); //skip to the type and desc-text...
|
|
PRInt32 theErr=0;
|
|
PRInt32 theMajorVersion=3;
|
|
|
|
//note that if we don't find '>', then we just scan the first 512 bytes.
|
|
|
|
if(0<=theSubIndex) {
|
|
PRInt32 theStartPos=theSubIndex+5;
|
|
PRInt32 theCount=theEnd-theStartPos;
|
|
|
|
if(kNotFound<theSubIndex) {
|
|
|
|
theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
|
|
if(0<=theSubIndex) {
|
|
mDocType=eXHTMLText;
|
|
mParseMode=eParseMode_strict;
|
|
return mParseMode;
|
|
}
|
|
else {
|
|
theSubIndex=theBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
|
|
if(0<=theSubIndex) {
|
|
mDocType=eHTML4Text;
|
|
mParseMode=eParseMode_strict;
|
|
theMajorVersion=4;
|
|
theSubIndex+=15;
|
|
}
|
|
else {
|
|
theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
|
|
if(0<=theSubIndex) {
|
|
mDocType=eHTML4Text;
|
|
mParseMode=eParseMode_strict;
|
|
theMajorVersion=3;
|
|
}
|
|
else {
|
|
theSubIndex=theBuffer.Find("HYPERTEXT MARKUP",PR_TRUE,theStartPos,theCount);
|
|
if(0<=theSubIndex) {
|
|
mDocType=eHTML3Text;
|
|
mParseMode=eParseMode_quirks;
|
|
theSubIndex+=20;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
theStartPos=theSubIndex+5;
|
|
theCount=theEnd-theStartPos;
|
|
nsAutoString theNum;
|
|
|
|
//get the next substring from the buffer, which should be a number.
|
|
//now see what the version number is...
|
|
|
|
theStartPos=theBuffer.FindCharInSet("123456789",theStartPos);
|
|
if(0<=theStartPos) {
|
|
PRInt32 theTerminal=theBuffer.FindCharInSet(" />",theStartPos+1);
|
|
if(theTerminal) {
|
|
theBuffer.Mid(theNum,theStartPos,theTerminal-theStartPos);
|
|
}
|
|
else theBuffer.Mid(theNum,theStartPos,3);
|
|
theMajorVersion=theNum.ToInteger(&theErr);
|
|
}
|
|
|
|
//now see what the
|
|
theStartPos+=theNum.Length();
|
|
theCount=theEnd-theStartPos;
|
|
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
|
|
(theBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
|
|
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
|
|
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
|
|
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
|
|
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
|
|
mParseMode=eParseMode_quirks;
|
|
}
|
|
|
|
//one last thing: look for a URI that specifies the strict.dtd
|
|
theStartPos+=6;
|
|
theCount=theEnd-theStartPos;
|
|
theSubIndex=theBuffer.Find("STRICT.DTD",PR_TRUE,theStartPos,theCount);
|
|
if(0<theSubIndex) {
|
|
//Since we found it, regardless of what's in the descr-text, kick into strict mode.
|
|
mParseMode=eParseMode_strict;
|
|
mDocType=eHTML4Text;
|
|
}
|
|
|
|
if (0==theErr){
|
|
switch(theMajorVersion) {
|
|
case 0: case 1: case 2: case 3:
|
|
if(mDocType!=eXHTMLText){
|
|
mParseMode=eParseMode_quirks; //be as backward compatible as possible
|
|
mDocType=eHTML3Text;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
//XXX hack -- someday, the next line of code will be criticized
|
|
//for it's lack of vision...
|
|
if(theMajorVersion>20) {
|
|
mParseMode=eParseMode_noquirks;
|
|
}
|
|
break;
|
|
} //switch
|
|
}
|
|
|
|
} //if
|
|
else {
|
|
PRInt32 thePos=theBuffer.Find("HTML",PR_TRUE,1,50);
|
|
if(kNotFound!=thePos) {
|
|
mDocType=eHTML4Text;
|
|
PRInt32 theIDPos=theBuffer.Find("PublicID",thePos);
|
|
if(kNotFound==theIDPos)
|
|
theIDPos=theBuffer.Find("SystemID",thePos);
|
|
mParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
|
|
}
|
|
}
|
|
}
|
|
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
|
|
mParseMode=eParseMode_strict;
|
|
}
|
|
|
|
if(theModeStr) {
|
|
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
|
|
mParseMode=eParseMode_strict;
|
|
}
|
|
else mParseMode = (eParseMode_unknown==mParseMode)? eParseMode_quirks : mParseMode;
|
|
|
|
return mParseMode;
|
|
}
|