A wad of changes to the parser. See my update

git-svn-id: svn://10.0.0.236/trunk@17433 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
rickg%netscape.com
1999-01-09 01:09:02 +00:00
parent d67d163fe3
commit c8fc6237d4
92 changed files with 7354 additions and 8666 deletions

View File

@@ -29,11 +29,12 @@
#include "nsCRT.h"
#include "nsParser.h"
#include "nsScanner.h"
#include "nsParserTypes.h"
#include "nsIParser.h"
#include "nsTokenHandler.h"
#include "nsDTDUtils.h"
#include "nsIContentSink.h"
#include "nsIHTMLContentSink.h"
#include "nsHTMLTokenizer.h"
#include "prenv.h" //this is here for debug reasons...
#include "prtypes.h" //this is here for debug reasons...
@@ -183,6 +184,7 @@ CViewSourceHTML::CViewSourceHTML() : nsIDTD(), mTokenDeque(gTokenKiller) {
mSink=0;
mFilename;
mLineNumber=0;
mTokenizer=0;
mIsHTML=PR_FALSE;
}
@@ -197,6 +199,16 @@ CViewSourceHTML::~CViewSourceHTML(){
mParser=0; //just to prove we destructed...
}
/**
*
* @update gess1/8/99
* @param
* @return
*/
const nsIID& CViewSourceHTML::GetMostDerivedIID(void) const{
return kClassIID;
}
/**
* Call this method if you want the DTD to construct a fresh
* instance of itself.
@@ -286,13 +298,45 @@ NS_IMETHODIMP CViewSourceHTML::WillBuildModel(nsString& aFilename,PRBool aNotify
return result;
}
/**
* The parser uses a code sandwich to wrap the parsing process. Before
* the process begins, WillBuildModel() is called. Afterwards the parser
* calls DidBuildModel().
* @update gess5/18/98
* @param aFilename is the name of the file being parsed.
* @return error code (almost always 0)
*/
NS_IMETHODIMP CViewSourceHTML::BuildModel(nsIParser* aParser) {
nsresult result=NS_OK;
nsHTMLTokenizer* theTokenizer=(nsHTMLTokenizer*)GetTokenizer();
nsITokenRecycler* theRecycler=GetTokenRecycler();
if(theTokenizer) {
while(NS_OK==result){
CToken* theToken=theTokenizer->PopToken();
if(theToken) {
result=HandleToken(theToken,aParser);
if(NS_SUCCEEDED(result)) {
theRecycler->RecycleToken(theToken);
}
else if(NS_ERROR_HTMLPARSER_BLOCK!=result){
theTokenizer->PushTokenFront(theToken);
}
// theRootDTD->Verify(kEmptyString,aParser);
}
else break;
}
}
return result;
}
/**
*
* @update gess5/18/98
* @param
* @return
*/
NS_IMETHODIMP CViewSourceHTML::DidBuildModel(PRInt32 anErrorCode,PRBool aNotifySink,nsIParser* aParser){
NS_IMETHODIMP CViewSourceHTML::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser){
nsresult result= NS_OK;
//ADD CODE HERE TO CLOSE OPEN CONTAINERS...
@@ -319,344 +363,27 @@ NS_IMETHODIMP CViewSourceHTML::DidBuildModel(PRInt32 anErrorCode,PRBool aNotifyS
return result;
}
static eHTMLTags gSkippedContentTags[]={ eHTMLTag_script, eHTMLTag_style, eHTMLTag_title, eHTMLTag_textarea};
/**
*
* @update gess11/9/98
* @update gess8/4/98
* @param
* @return
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
PRInt32 theDequeSize=mTokenDeque.GetSize();
nsresult result=NS_OK;
aToken=gTokenRecycler.CreateTokenOfType(eToken_start,eHTMLTag_unknown,gEmpty);
if(aToken) {
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
if(NS_OK==result) {
if(((CStartToken*)aToken)->IsAttributed()) {
result=ConsumeAttributes(aChar,aScanner,(CStartToken*)aToken);
}
//EEEEECCCCKKKK!!!
//This code is confusing, so pay attention.
//If you're here, it's because we were in the midst of consuming a start
//tag but ran out of data (not in the stream, but in this *part* of the stream.
//For simplicity, we have to unwind our input. Therefore, we pop and discard
//any new tokens we've cued this round. Later we can get smarter about this.
if(NS_OK!=result) {
while(mTokenDeque.GetSize()>theDequeSize) {
delete (CToken*)mTokenDeque.PopBack();
}
}
} //if
} //if
return result;
nsITokenRecycler* CViewSourceHTML::GetTokenRecycler(void){
nsITokenizer* theTokenizer=GetTokenizer();
return theTokenizer->GetTokenRecycler();
}
/**
* This method is called just after a known text char has
* been consumed and we should read a text run.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param anErrorCode: arg that will hold error condition
* @return new token or null
* Retrieve the preferred tokenizer for use by this DTD.
* @update gess12/28/98
* @param none
* @return ptr to tokenizer
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken){
nsresult result=NS_OK;
aToken=gTokenRecycler.CreateTokenOfType(eToken_text,eHTMLTag_text,aString);
if(aToken) {
PRUnichar ch=0;
result=aToken->Consume(ch,aScanner);
if(result) {
nsString& temp=aToken->GetStringValueXXX();
if(0==temp.Length()){
delete aToken;
aToken = nsnull;
}
else result=kNoError;
}
}
return result;
}
/**
* This method is called just after we've consumed a start
* tag, and we now have to consume its attributes.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @return
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken) {
PRBool done=PR_FALSE;
nsresult result=NS_OK;
PRInt16 theAttrCount=0;
while((!done) && (result==NS_OK)) {
CAttributeToken* theToken= (CAttributeToken*)gTokenRecycler.CreateTokenOfType(eToken_attribute,eHTMLTag_unknown,gEmpty);
if(theToken){
result=theToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
//Much as I hate to do this, here's some special case code.
//This handles the case of empty-tags in XML. Our last
//attribute token will come through with a text value of ""
//and a textkey of "/". We should destroy it, and tell the
//start token it was empty.
nsString& key=theToken->GetKey();
nsString& text=theToken->GetStringValueXXX();
if((key[0]==kForwardSlash) && (0==text.Length())){
//tada! our special case! Treat it like an empty start tag...
aToken->SetEmpty(PR_TRUE);
delete theToken;
}
else if(NS_OK==result){
theAttrCount++;
mTokenDeque.Push(theToken);
}//if
else delete theToken; //we can't keep it...
}//if
if(NS_OK==result){
result=aScanner.Peek(aChar);
if(aChar==kGreaterThan) { //you just ate the '>'
aScanner.GetChar(aChar); //skip the '>'
done=PR_TRUE;
}//if
}//if
}//while
aToken->SetAttributeCount(theAttrCount);
return result;
}
/**
* This method is called just after a "&" has been consumed
* and we know we're at the start of an entity.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param anErrorCode: arg that will hold error condition
* @return new token or null
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
PRUnichar theChar;
nsresult result=aScanner.GetChar(theChar);
if(NS_OK==result) {
if(nsString::IsAlpha(theChar)) { //handle common enity references &xxx; or &#000.
aToken = gTokenRecycler.CreateTokenOfType(eToken_entity,eHTMLTag_entity,gEmpty);
result = aToken->Consume(theChar,aScanner); //tell new token to finish consuming text...
}
else if(kHashsign==theChar) {
aToken = gTokenRecycler.CreateTokenOfType(eToken_entity,eHTMLTag_entity,gEmpty);
result=aToken->Consume(0,aScanner);
}
else {
//oops, we're actually looking at plain text...
nsAutoString temp("&");
aScanner.PutBack(theChar);
result=ConsumeText(temp,aScanner,aToken);
}
}//if
return result;
}
/**
* This method is called just after whitespace has been
* consumed and we know we're at the start a whitespace run.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param anErrorCode: arg that will hold error condition
* @return new token or null
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
aToken = gTokenRecycler.CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace,gEmpty);
nsresult result=kNoError;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
}
return kNoError;
}
/**
* This method is called just after a "<!" has been consumed
* and we know we're at the start of a comment.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param anErrorCode: arg that will hold error condition
* @return new token or null
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
aToken = gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
}
return result;
}
/**
* This method is called just after a newline has been consumed.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param aToken is the newly created newline token that is parsing
* @return error code
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
aToken=gTokenRecycler.CreateTokenOfType(eToken_newline,eHTMLTag_newline,gEmpty);
nsresult result=NS_OK;
if(aToken) {
result=aToken->Consume(aChar,aScanner);
}
return kNoError;
}
/**
* This method is called just after a "<" has been consumed
* and we know we're at the start of some kind of tagged
* element. We don't know yet if it's a tag or a comment.
*
* @update gess 5/12/98
* @param aChar is the last char read
* @param aScanner is represents our input source
* @param aToken is the out arg holding our new token
* @return error code (may return kInterrupted).
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) {
nsresult result=aScanner.GetChar(aChar);
if(NS_OK==result) {
switch(aChar) {
case kForwardSlash:
PRUnichar ch;
result=aScanner.Peek(ch);
if(NS_OK==result) {
if(nsString::IsAlpha(ch))
aToken=gTokenRecycler.CreateTokenOfType(eToken_end,eHTMLTag_unknown,gEmpty);
else aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_unknown,gEmpty);
}//if
break;
case kExclamation:
aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
break;
case kQuestionMark: //it must be an XML processing instruction...
aToken=gTokenRecycler.CreateTokenOfType(eToken_instruction,eHTMLTag_unknown,gEmpty);
break;
default:
if(nsString::IsAlpha(aChar))
return ConsumeStartTag(aChar,aScanner,aToken);
else if(kEOF!=aChar) {
nsAutoString temp("<");
return ConsumeText(temp,aScanner,aToken);
}
} //switch
if((0!=aToken) && (NS_OK==result)) {
result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text...
if(result) {
delete aToken;
aToken=0;
}
} //if
} //if
return result;
}
/**
* This method repeatedly called by the tokenizer.
* Each time, we determine the kind of token were about to
* read, and then we call the appropriate method to handle
* that token type.
*
* @update gess 3/25/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param anErrorCode: arg that will hold error condition
* @return new token or null
*/
NS_IMETHODIMP CViewSourceHTML::ConsumeToken(CToken*& aToken,nsIParser* aParser) {
aToken=0;
if(mTokenDeque.GetSize()>0) {
aToken=(CToken*)mTokenDeque.Pop();
return NS_OK;
}
mParser=(nsParser*)aParser;
nsresult result=NS_OK;
CScanner* theScanner=mParser->GetScanner();
if(NS_OK==result){
PRUnichar theChar;
result=theScanner->GetChar(theChar);
switch(result) {
case kEOF:
//We convert from eof to complete here, because we never really tried to get data.
//All we did was try to see if data was available, which it wasn't.
//It's important to return process complete, so that controlling logic can know that
//everything went well, but we're done with token processing.
result=kProcessComplete;
break;
case kInterrupted:
theScanner->RewindToMark();
break;
case NS_OK:
default:
switch(theChar) {
case kLessThan:
result=ConsumeTag(theChar,*theScanner,aToken);
break;
case kAmpersand:
result=ConsumeEntity(theChar,*theScanner,aToken);
break;
case kCR: case kLF:
result=ConsumeNewline(theChar,*theScanner,aToken);
break;
case kNotFound:
break;
default:
if(!nsString::IsSpace(theChar)) {
nsAutoString temp(theChar);
result=ConsumeText(temp,*theScanner,aToken);
break;
}
result=ConsumeWhitespace(theChar,*theScanner,aToken);
break;
} //switch
break;
} //switch
// if(NS_OK==result)
// result=theScanner->Eof();
} //if
return result;
nsITokenizer* CViewSourceHTML::GetTokenizer(void) {
if(!mTokenizer)
mTokenizer=new nsHTMLTokenizer();
return mTokenizer;
}
/**
@@ -872,20 +599,16 @@ PRBool WriteTag(nsCParserNode& aNode,nsIContentSink& aSink,PRBool anEndToken,PRB
SetStyle(eHTMLTag_b,PR_FALSE,aSink);
}
{ //next write the equal sign...
theString="=";
CTextToken theToken(theString);
nsCParserNode theNode(&theToken,aNode.GetSourceLineNumber());
aSink.AddLeaf(theNode);
}
//begin by writing the value...
{
SetColor("blue",PR_TRUE,aSink);
theString=aNode.GetValueAt(theIndex);
CTextToken theToken(theString);
nsCParserNode theNode(&theToken,aNode.GetSourceLineNumber());
aSink.AddLeaf(theNode);
if(0<theString.Length()){
theString.Insert('=',0);
CTextToken theToken(theString);
nsCParserNode theNode(&theToken,aNode.GetSourceLineNumber());
aSink.AddLeaf(theNode);
}
SetStyle(eHTMLTag_font,PR_FALSE,aSink);
}
}
@@ -975,15 +698,15 @@ NS_IMETHODIMP CViewSourceHTML::HandleToken(CToken* aToken,nsIParser* aParser) {
if(0<attrCount){ //go collect the attributes...
int attr=0;
for(attr=0;attr<attrCount;attr++){
CToken* theToken=mParser->PeekToken();
CToken* theToken=mTokenizer->PeekToken();
if(theToken) {
eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType());
if(eToken_attribute==theType){
mParser->PopToken(); //pop it for real...
mTokenizer->PopToken(); //pop it for real...
theNode.AddAttribute(theToken);
}
}
else return kInterrupted;
else return kEOF;
}
}
}
@@ -1024,13 +747,3 @@ nsresult CViewSourceHTML::ReleaseTokenPump(nsITagHandler* aHandler){
nsresult result=NS_OK;
return result;
}
/**
*
* @update gess8/4/98
* @param
* @return
*/
nsITokenRecycler* CViewSourceHTML::GetTokenRecycler(void){
return &gTokenRecycler;
}