A wad of changes to the parser. See my update
git-svn-id: svn://10.0.0.236/trunk@17433 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
@@ -29,11 +29,12 @@
|
||||
#include "nsCRT.h"
|
||||
#include "nsParser.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsParserTypes.h"
|
||||
#include "nsIParser.h"
|
||||
#include "nsTokenHandler.h"
|
||||
#include "nsDTDUtils.h"
|
||||
#include "nsIContentSink.h"
|
||||
#include "nsIHTMLContentSink.h"
|
||||
#include "nsHTMLTokenizer.h"
|
||||
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
@@ -183,6 +184,7 @@ CViewSourceHTML::CViewSourceHTML() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
mSink=0;
|
||||
mFilename;
|
||||
mLineNumber=0;
|
||||
mTokenizer=0;
|
||||
mIsHTML=PR_FALSE;
|
||||
}
|
||||
|
||||
@@ -197,6 +199,16 @@ CViewSourceHTML::~CViewSourceHTML(){
|
||||
mParser=0; //just to prove we destructed...
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess1/8/99
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
const nsIID& CViewSourceHTML::GetMostDerivedIID(void) const{
|
||||
return kClassIID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this method if you want the DTD to construct a fresh
|
||||
* instance of itself.
|
||||
@@ -286,13 +298,45 @@ NS_IMETHODIMP CViewSourceHTML::WillBuildModel(nsString& aFilename,PRBool aNotify
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* The parser uses a code sandwich to wrap the parsing process. Before
|
||||
* the process begins, WillBuildModel() is called. Afterwards the parser
|
||||
* calls DidBuildModel().
|
||||
* @update gess5/18/98
|
||||
* @param aFilename is the name of the file being parsed.
|
||||
* @return error code (almost always 0)
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::BuildModel(nsIParser* aParser) {
|
||||
nsresult result=NS_OK;
|
||||
|
||||
nsHTMLTokenizer* theTokenizer=(nsHTMLTokenizer*)GetTokenizer();
|
||||
nsITokenRecycler* theRecycler=GetTokenRecycler();
|
||||
if(theTokenizer) {
|
||||
while(NS_OK==result){
|
||||
CToken* theToken=theTokenizer->PopToken();
|
||||
if(theToken) {
|
||||
result=HandleToken(theToken,aParser);
|
||||
if(NS_SUCCEEDED(result)) {
|
||||
theRecycler->RecycleToken(theToken);
|
||||
}
|
||||
else if(NS_ERROR_HTMLPARSER_BLOCK!=result){
|
||||
theTokenizer->PushTokenFront(theToken);
|
||||
}
|
||||
// theRootDTD->Verify(kEmptyString,aParser);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::DidBuildModel(PRInt32 anErrorCode,PRBool aNotifySink,nsIParser* aParser){
|
||||
NS_IMETHODIMP CViewSourceHTML::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser){
|
||||
nsresult result= NS_OK;
|
||||
|
||||
//ADD CODE HERE TO CLOSE OPEN CONTAINERS...
|
||||
@@ -319,344 +363,27 @@ NS_IMETHODIMP CViewSourceHTML::DidBuildModel(PRInt32 anErrorCode,PRBool aNotifyS
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static eHTMLTags gSkippedContentTags[]={ eHTMLTag_script, eHTMLTag_style, eHTMLTag_title, eHTMLTag_textarea};
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess11/9/98
|
||||
* @update gess8/4/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRInt32 theDequeSize=mTokenDeque.GetSize();
|
||||
nsresult result=NS_OK;
|
||||
|
||||
aToken=gTokenRecycler.CreateTokenOfType(eToken_start,eHTMLTag_unknown,gEmpty);
|
||||
|
||||
if(aToken) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(NS_OK==result) {
|
||||
if(((CStartToken*)aToken)->IsAttributed()) {
|
||||
result=ConsumeAttributes(aChar,aScanner,(CStartToken*)aToken);
|
||||
}
|
||||
|
||||
//EEEEECCCCKKKK!!!
|
||||
//This code is confusing, so pay attention.
|
||||
//If you're here, it's because we were in the midst of consuming a start
|
||||
//tag but ran out of data (not in the stream, but in this *part* of the stream.
|
||||
//For simplicity, we have to unwind our input. Therefore, we pop and discard
|
||||
//any new tokens we've cued this round. Later we can get smarter about this.
|
||||
if(NS_OK!=result) {
|
||||
while(mTokenDeque.GetSize()>theDequeSize) {
|
||||
delete (CToken*)mTokenDeque.PopBack();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
nsITokenRecycler* CViewSourceHTML::GetTokenRecycler(void){
|
||||
nsITokenizer* theTokenizer=GetTokenizer();
|
||||
return theTokenizer->GetTokenRecycler();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a known text char has
|
||||
* been consumed and we should read a text run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
* Retrieve the preferred tokenizer for use by this DTD.
|
||||
* @update gess12/28/98
|
||||
* @param none
|
||||
* @return ptr to tokenizer
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken){
|
||||
nsresult result=NS_OK;
|
||||
aToken=gTokenRecycler.CreateTokenOfType(eToken_text,eHTMLTag_text,aString);
|
||||
if(aToken) {
|
||||
PRUnichar ch=0;
|
||||
result=aToken->Consume(ch,aScanner);
|
||||
if(result) {
|
||||
nsString& temp=aToken->GetStringValueXXX();
|
||||
if(0==temp.Length()){
|
||||
delete aToken;
|
||||
aToken = nsnull;
|
||||
}
|
||||
else result=kNoError;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after we've consumed a start
|
||||
* tag, and we now have to consume its attributes.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @return
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken) {
|
||||
PRBool done=PR_FALSE;
|
||||
nsresult result=NS_OK;
|
||||
PRInt16 theAttrCount=0;
|
||||
|
||||
while((!done) && (result==NS_OK)) {
|
||||
CAttributeToken* theToken= (CAttributeToken*)gTokenRecycler.CreateTokenOfType(eToken_attribute,eHTMLTag_unknown,gEmpty);
|
||||
if(theToken){
|
||||
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
|
||||
//Much as I hate to do this, here's some special case code.
|
||||
//This handles the case of empty-tags in XML. Our last
|
||||
//attribute token will come through with a text value of ""
|
||||
//and a textkey of "/". We should destroy it, and tell the
|
||||
//start token it was empty.
|
||||
nsString& key=theToken->GetKey();
|
||||
nsString& text=theToken->GetStringValueXXX();
|
||||
if((key[0]==kForwardSlash) && (0==text.Length())){
|
||||
//tada! our special case! Treat it like an empty start tag...
|
||||
aToken->SetEmpty(PR_TRUE);
|
||||
delete theToken;
|
||||
}
|
||||
else if(NS_OK==result){
|
||||
theAttrCount++;
|
||||
mTokenDeque.Push(theToken);
|
||||
}//if
|
||||
else delete theToken; //we can't keep it...
|
||||
}//if
|
||||
|
||||
if(NS_OK==result){
|
||||
result=aScanner.Peek(aChar);
|
||||
if(aChar==kGreaterThan) { //you just ate the '>'
|
||||
aScanner.GetChar(aChar); //skip the '>'
|
||||
done=PR_TRUE;
|
||||
}//if
|
||||
}//if
|
||||
}//while
|
||||
|
||||
aToken->SetAttributeCount(theAttrCount);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "&" has been consumed
|
||||
* and we know we're at the start of an entity.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
PRUnichar theChar;
|
||||
nsresult result=aScanner.GetChar(theChar);
|
||||
|
||||
if(NS_OK==result) {
|
||||
if(nsString::IsAlpha(theChar)) { //handle common enity references &xxx; or �.
|
||||
aToken = gTokenRecycler.CreateTokenOfType(eToken_entity,eHTMLTag_entity,gEmpty);
|
||||
result = aToken->Consume(theChar,aScanner); //tell new token to finish consuming text...
|
||||
}
|
||||
else if(kHashsign==theChar) {
|
||||
aToken = gTokenRecycler.CreateTokenOfType(eToken_entity,eHTMLTag_entity,gEmpty);
|
||||
result=aToken->Consume(0,aScanner);
|
||||
}
|
||||
else {
|
||||
//oops, we're actually looking at plain text...
|
||||
nsAutoString temp("&");
|
||||
aScanner.PutBack(theChar);
|
||||
result=ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
}//if
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after whitespace has been
|
||||
* consumed and we know we're at the start a whitespace run.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
aToken = gTokenRecycler.CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace,gEmpty);
|
||||
nsresult result=kNoError;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<!" has been consumed
|
||||
* and we know we're at the start of a comment.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken = gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
|
||||
nsresult result=NS_OK;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a newline has been consumed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param aToken is the newly created newline token that is parsing
|
||||
* @return error code
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
|
||||
aToken=gTokenRecycler.CreateTokenOfType(eToken_newline,eHTMLTag_newline,gEmpty);
|
||||
nsresult result=NS_OK;
|
||||
if(aToken) {
|
||||
result=aToken->Consume(aChar,aScanner);
|
||||
}
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is called just after a "<" has been consumed
|
||||
* and we know we're at the start of some kind of tagged
|
||||
* element. We don't know yet if it's a tag or a comment.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @param aChar is the last char read
|
||||
* @param aScanner is represents our input source
|
||||
* @param aToken is the out arg holding our new token
|
||||
* @return error code (may return kInterrupted).
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
|
||||
|
||||
nsresult result=aScanner.GetChar(aChar);
|
||||
|
||||
if(NS_OK==result) {
|
||||
|
||||
switch(aChar) {
|
||||
case kForwardSlash:
|
||||
PRUnichar ch;
|
||||
result=aScanner.Peek(ch);
|
||||
if(NS_OK==result) {
|
||||
if(nsString::IsAlpha(ch))
|
||||
aToken=gTokenRecycler.CreateTokenOfType(eToken_end,eHTMLTag_unknown,gEmpty);
|
||||
else aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_unknown,gEmpty);
|
||||
}//if
|
||||
break;
|
||||
|
||||
case kExclamation:
|
||||
aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
|
||||
break;
|
||||
|
||||
case kQuestionMark: //it must be an XML processing instruction...
|
||||
aToken=gTokenRecycler.CreateTokenOfType(eToken_instruction,eHTMLTag_unknown,gEmpty);
|
||||
break;
|
||||
|
||||
default:
|
||||
if(nsString::IsAlpha(aChar))
|
||||
return ConsumeStartTag(aChar,aScanner,aToken);
|
||||
else if(kEOF!=aChar) {
|
||||
nsAutoString temp("<");
|
||||
return ConsumeText(temp,aScanner,aToken);
|
||||
}
|
||||
} //switch
|
||||
|
||||
if((0!=aToken) && (NS_OK==result)) {
|
||||
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
|
||||
if(result) {
|
||||
delete aToken;
|
||||
aToken=0;
|
||||
}
|
||||
} //if
|
||||
} //if
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method repeatedly called by the tokenizer.
|
||||
* Each time, we determine the kind of token were about to
|
||||
* read, and then we call the appropriate method to handle
|
||||
* that token type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aChar: last char read
|
||||
* @param aScanner: see nsScanner.h
|
||||
* @param anErrorCode: arg that will hold error condition
|
||||
* @return new token or null
|
||||
*/
|
||||
NS_IMETHODIMP CViewSourceHTML::ConsumeToken(CToken*& aToken,nsIParser* aParser) {
|
||||
aToken=0;
|
||||
if(mTokenDeque.GetSize()>0) {
|
||||
aToken=(CToken*)mTokenDeque.Pop();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
mParser=(nsParser*)aParser;
|
||||
|
||||
nsresult result=NS_OK;
|
||||
CScanner* theScanner=mParser->GetScanner();
|
||||
if(NS_OK==result){
|
||||
PRUnichar theChar;
|
||||
result=theScanner->GetChar(theChar);
|
||||
switch(result) {
|
||||
case kEOF:
|
||||
//We convert from eof to complete here, because we never really tried to get data.
|
||||
//All we did was try to see if data was available, which it wasn't.
|
||||
//It's important to return process complete, so that controlling logic can know that
|
||||
//everything went well, but we're done with token processing.
|
||||
result=kProcessComplete;
|
||||
break;
|
||||
|
||||
case kInterrupted:
|
||||
theScanner->RewindToMark();
|
||||
break;
|
||||
|
||||
case NS_OK:
|
||||
default:
|
||||
switch(theChar) {
|
||||
case kLessThan:
|
||||
result=ConsumeTag(theChar,*theScanner,aToken);
|
||||
break;
|
||||
|
||||
case kAmpersand:
|
||||
result=ConsumeEntity(theChar,*theScanner,aToken);
|
||||
break;
|
||||
|
||||
case kCR: case kLF:
|
||||
result=ConsumeNewline(theChar,*theScanner,aToken);
|
||||
break;
|
||||
|
||||
case kNotFound:
|
||||
break;
|
||||
|
||||
default:
|
||||
if(!nsString::IsSpace(theChar)) {
|
||||
nsAutoString temp(theChar);
|
||||
result=ConsumeText(temp,*theScanner,aToken);
|
||||
break;
|
||||
}
|
||||
result=ConsumeWhitespace(theChar,*theScanner,aToken);
|
||||
break;
|
||||
} //switch
|
||||
break;
|
||||
} //switch
|
||||
// if(NS_OK==result)
|
||||
// result=theScanner->Eof();
|
||||
} //if
|
||||
return result;
|
||||
nsITokenizer* CViewSourceHTML::GetTokenizer(void) {
|
||||
if(!mTokenizer)
|
||||
mTokenizer=new nsHTMLTokenizer();
|
||||
return mTokenizer;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -872,20 +599,16 @@ PRBool WriteTag(nsCParserNode& aNode,nsIContentSink& aSink,PRBool anEndToken,PRB
|
||||
SetStyle(eHTMLTag_b,PR_FALSE,aSink);
|
||||
}
|
||||
|
||||
{ //next write the equal sign...
|
||||
theString="=";
|
||||
CTextToken theToken(theString);
|
||||
nsCParserNode theNode(&theToken,aNode.GetSourceLineNumber());
|
||||
aSink.AddLeaf(theNode);
|
||||
}
|
||||
|
||||
//begin by writing the value...
|
||||
{
|
||||
SetColor("blue",PR_TRUE,aSink);
|
||||
theString=aNode.GetValueAt(theIndex);
|
||||
CTextToken theToken(theString);
|
||||
nsCParserNode theNode(&theToken,aNode.GetSourceLineNumber());
|
||||
aSink.AddLeaf(theNode);
|
||||
if(0<theString.Length()){
|
||||
theString.Insert('=',0);
|
||||
CTextToken theToken(theString);
|
||||
nsCParserNode theNode(&theToken,aNode.GetSourceLineNumber());
|
||||
aSink.AddLeaf(theNode);
|
||||
}
|
||||
SetStyle(eHTMLTag_font,PR_FALSE,aSink);
|
||||
}
|
||||
}
|
||||
@@ -975,15 +698,15 @@ NS_IMETHODIMP CViewSourceHTML::HandleToken(CToken* aToken,nsIParser* aParser) {
|
||||
if(0<attrCount){ //go collect the attributes...
|
||||
int attr=0;
|
||||
for(attr=0;attr<attrCount;attr++){
|
||||
CToken* theToken=mParser->PeekToken();
|
||||
CToken* theToken=mTokenizer->PeekToken();
|
||||
if(theToken) {
|
||||
eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType());
|
||||
if(eToken_attribute==theType){
|
||||
mParser->PopToken(); //pop it for real...
|
||||
mTokenizer->PopToken(); //pop it for real...
|
||||
theNode.AddAttribute(theToken);
|
||||
}
|
||||
}
|
||||
else return kInterrupted;
|
||||
else return kEOF;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1024,13 +747,3 @@ nsresult CViewSourceHTML::ReleaseTokenPump(nsITagHandler* aHandler){
|
||||
nsresult result=NS_OK;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess8/4/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsITokenRecycler* CViewSourceHTML::GetTokenRecycler(void){
|
||||
return &gTokenRecycler;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user