designed to allow for easy conversion from the NGLayoutContent model to other formats. It's primary focus is to allow easy conversion to HTML3.2 and HTML4.0 formats. Other formats may be supported in the future. git-svn-id: svn://10.0.0.236/trunk@5523 18797224-902f-48f8-a5cc-f745e15eee43
869 lines
20 KiB
C++
869 lines
20 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
|
|
//#define __INCREMENTAL 1
|
|
|
|
#include "nsHTMLParser.h"
|
|
#include "nsIContentSink.h"
|
|
#include "nsString.h"
|
|
#include "nsCRT.h"
|
|
#include "COtherDTD.h"
|
|
#include "CNavDTD.h"
|
|
#include "nsScanner.h"
|
|
#include "prenv.h" //this is here for debug reasons...
|
|
#include "plstr.h"
|
|
#include <fstream.h>
|
|
#include "nsIInputStream.h"
|
|
#include "nsIParserFilter.h"
|
|
|
|
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
|
|
#ifdef XP_PC
|
|
#include <direct.h> //this is here for debug reasons...
|
|
#endif
|
|
#include <time.h>
|
|
#include "prmem.h"
|
|
*/
|
|
|
|
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
|
static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID);
|
|
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
|
|
|
|
static const char* kNullURL = "Error: Null URL given";
|
|
static const char* kNullFilename= "Error: Null filename given";
|
|
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
|
|
|
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
|
|
|
|
//#define DEBUG_SAVE_SOURCE_DOC 1
|
|
#ifdef DEBUG_SAVE_SOURCE_DOC
|
|
fstream* gTempStream=0;
|
|
#endif
|
|
|
|
|
|
/**
|
|
* This method is defined in nsIParser. It is used to
|
|
* cause the COM-like construction of an nsHTMLParser.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param nsIParser** ptr to newly instantiated parser
|
|
* @return NS_xxx error result
|
|
*/
|
|
NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult)
|
|
{
|
|
nsHTMLParser *it = new nsHTMLParser();
|
|
|
|
if (it == 0) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult);
|
|
}
|
|
|
|
class CTokenDeallocator: public nsDequeFunctor{
|
|
public:
|
|
virtual void operator()(void* anObject) {
|
|
CToken* aToken = (CToken*)anObject;
|
|
delete aToken;
|
|
}
|
|
};
|
|
|
|
CTokenDeallocator gTokenKiller;
|
|
|
|
/**
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
|
NS_INIT_REFCNT();
|
|
mParserFilter = nsnull;
|
|
mListener = nsnull;
|
|
mTransferBuffer=0;
|
|
mSink=0;
|
|
mCurrentPos=0;
|
|
mMarkPos=0;
|
|
mParseMode=eParseMode_unknown;
|
|
mDTD=0;
|
|
mScanner = 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Default destructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsHTMLParser::~nsHTMLParser() {
|
|
NS_IF_RELEASE(mListener);
|
|
if(mTransferBuffer)
|
|
delete [] mTransferBuffer;
|
|
mTransferBuffer=0;
|
|
NS_RELEASE(mSink);
|
|
if(mCurrentPos)
|
|
delete mCurrentPos;
|
|
mCurrentPos=0;
|
|
if(mDTD)
|
|
NS_RELEASE(mDTD);
|
|
mDTD=0;
|
|
if(mScanner)
|
|
delete mScanner;
|
|
mScanner=0;
|
|
}
|
|
|
|
|
|
NS_IMPL_ADDREF(nsHTMLParser)
|
|
NS_IMPL_RELEASE(nsHTMLParser)
|
|
//NS_IMPL_ISUPPORTS(nsHTMLParser,NS_IHTML_PARSER_IID)
|
|
|
|
|
|
/**
|
|
* This method gets called as part of our COM-like interfaces.
|
|
* Its purpose is to create an interface to parser object
|
|
* of some type.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param nsIID id of object to discover
|
|
* @param aInstancePtr ptr to newly discovered interface
|
|
* @return NS_xxx result code
|
|
*/
|
|
nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|
{
|
|
if (NULL == aInstancePtr) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
|
|
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
|
*aInstancePtr = (nsIParser*)(this);
|
|
}
|
|
else if(aIID.Equals(kIParserIID)) { //do IParser base class...
|
|
*aInstancePtr = (nsIParser*)(this);
|
|
}
|
|
else if(aIID.Equals(kClassIID)) { //do this class...
|
|
*aInstancePtr = (nsHTMLParser*)(this);
|
|
}
|
|
else {
|
|
*aInstancePtr=0;
|
|
return NS_NOINTERFACE;
|
|
}
|
|
((nsISupports*) *aInstancePtr)->AddRef();
|
|
return NS_OK;
|
|
}
|
|
|
|
nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
|
|
{
|
|
nsIParserFilter* old=mParserFilter;
|
|
if(old)
|
|
NS_RELEASE(old);
|
|
if(aFilter) {
|
|
mParserFilter=aFilter;
|
|
NS_ADDREF(aFilter);
|
|
}
|
|
return old;
|
|
}
|
|
|
|
/**
|
|
* This method gets called in order to set the content
|
|
* sink for this parser to dump nodes to.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param nsIContentSink interface for node receiver
|
|
* @return
|
|
*/
|
|
nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) {
|
|
NS_PRECONDITION(0!=aSink,"sink cannot be null!");
|
|
nsIContentSink* old=mSink;
|
|
if(old)
|
|
NS_RELEASE(old);
|
|
if(aSink) {
|
|
mSink=aSink;
|
|
NS_ADDREF(aSink);
|
|
}
|
|
return old;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 6/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
|
|
mDTD=aDTD;
|
|
}
|
|
|
|
nsIDTD * nsHTMLParser::GetDTD(void) {
|
|
return mDTD;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 6/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CScanner* nsHTMLParser::GetScanner(void){
|
|
return mScanner;
|
|
}
|
|
|
|
/**
|
|
* This is where we loop over the tokens created in the
|
|
* tokenization phase, and try to make sense out of them.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
|
*/
|
|
PRInt32 nsHTMLParser::IterateTokens() {
|
|
nsDequeIterator e=mTokenDeque.End();
|
|
nsDequeIterator theMarkPos(e);
|
|
|
|
if(!mCurrentPos)
|
|
mCurrentPos=new nsDequeIterator(mTokenDeque.Begin());
|
|
|
|
PRInt32 result=kNoError;
|
|
|
|
while((kNoError==result) && ((*mCurrentPos<e))){
|
|
mMinorIteration++;
|
|
CToken* theToken=(CToken*)mCurrentPos->GetCurrent();
|
|
|
|
theMarkPos=*mCurrentPos;
|
|
result=mDTD->HandleToken(theToken);
|
|
++(*mCurrentPos);
|
|
}
|
|
|
|
if(kInterrupted==result)
|
|
*mCurrentPos=theMarkPos;
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/13/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
eParseMode DetermineParseMode() {
|
|
const char* theModeStr= PR_GetEnv("PARSE_MODE");
|
|
const char* other="other";
|
|
eParseMode result=eParseMode_navigator;
|
|
|
|
if(theModeStr)
|
|
if(0==nsCRT::strcasecmp(other,theModeStr))
|
|
result=eParseMode_other;
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/13/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsIDTD* NewDTD(eParseMode aMode) {
|
|
nsIDTD* aDTD=0;
|
|
switch(aMode) {
|
|
case eParseMode_navigator:
|
|
aDTD=new CNavDTD(); break;
|
|
case eParseMode_other:
|
|
aDTD=new COtherDTD(); break;
|
|
default:
|
|
break;
|
|
}
|
|
if (aDTD)
|
|
aDTD->AddRef();
|
|
return aDTD;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess5/18/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 nsHTMLParser::WillBuildModel(void) {
|
|
mMajorIteration=-1;
|
|
mMinorIteration=-1;
|
|
|
|
if(mDTD)
|
|
mDTD->WillBuildModel();
|
|
|
|
#ifdef DEBUG_SAVE_SOURCE_DOC
|
|
#if defined(XP_UNIX) && defined(IRIX)
|
|
/* XXX: IRIX does not support ios::binary */
|
|
gTempStream =new fstream("c:/temp/out.html",ios::out);
|
|
#else
|
|
gTempStream = new fstream("c:/temp/out.html",ios::out|ios::binary);
|
|
#endif
|
|
#endif
|
|
|
|
return kNoError;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess5/18/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 nsHTMLParser::DidBuildModel(PRInt32 anErrorCode) {
|
|
//One last thing...close any open containers.
|
|
PRInt32 result=anErrorCode;
|
|
if(mDTD) {
|
|
result=mDTD->DidBuildModel(anErrorCode);
|
|
}
|
|
|
|
#ifdef DEBUG_SAVE_SOURCE_DOC
|
|
if(gTempStream) {
|
|
gTempStream->close();
|
|
delete gTempStream;
|
|
gTempStream=0;
|
|
}
|
|
#endif
|
|
|
|
return anErrorCode;
|
|
}
|
|
|
|
/**
|
|
* This DEBUG ONLY method is used to simulate a network-based
|
|
* i/o model where data comes in incrementally.
|
|
*
|
|
* @update gess 5/13/98
|
|
* @param aFilename is the name of the disk file to use for testing.
|
|
* @return error code (kNoError means ok)
|
|
*/
|
|
PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
|
PRInt32 result=kBadFilename;
|
|
fstream* theFileStream;
|
|
nsString theBuffer;
|
|
const int kLocalBufSize=10;
|
|
|
|
mMajorIteration=-1;
|
|
mMinorIteration=-1;
|
|
|
|
#if defined(XP_UNIX) && defined(IRIX)
|
|
/* XXX: IRIX does not support ios::binary */
|
|
theFileStream=new fstream(aFilename,ios::in);
|
|
#else
|
|
theFileStream=new fstream(aFilename,ios::in|ios::binary);
|
|
#endif
|
|
if(theFileStream) {
|
|
result=kNoError;
|
|
while((kNoError==result) || (kInterrupted==result)) {
|
|
//read some data from the file...
|
|
|
|
char buf[kLocalBufSize];
|
|
buf[kLocalBufSize]=0;
|
|
|
|
if(theFileStream) {
|
|
theFileStream->read(buf,kLocalBufSize);
|
|
PRInt32 numread=theFileStream->gcount();
|
|
if(numread>0) {
|
|
buf[numread]=0;
|
|
theBuffer.Truncate();
|
|
theBuffer.Append(buf);
|
|
mScanner->Append(theBuffer);
|
|
result=ResumeParse();
|
|
}
|
|
else break;
|
|
}
|
|
|
|
}
|
|
theFileStream->close();
|
|
delete theFileStream;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This is the main controlling routine in the parsing process.
|
|
* Note that it may get called multiple times for the same scanner,
|
|
* since this is a pushed based system, and all the tokens may
|
|
* not have been consumed by the scanner during a given invocation
|
|
* of this method.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aFilename -- const char* containing file to be parsed.
|
|
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
|
*/
|
|
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
|
|
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
|
PRInt32 status=kBadFilename;
|
|
mIncremental=aIncremental;
|
|
|
|
if(aFilename) {
|
|
|
|
mParseMode=DetermineParseMode();
|
|
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
|
if(mDTD) {
|
|
mDTD->SetParser(this);
|
|
mDTD->SetContentSink(mSink);
|
|
mDTD->SetURLRef((char *)aFilename);
|
|
mDTD->SetParserDebug(aDebug);
|
|
}
|
|
|
|
WillBuildModel();
|
|
|
|
//ok, time to create our tokenizer and begin the process
|
|
if(aIncremental) {
|
|
mScanner=new CScanner(mParseMode);
|
|
status=ParseFileIncrementally(aFilename);
|
|
}
|
|
else {
|
|
//ok, time to create our tokenizer and begin the process
|
|
mScanner=new CScanner(aFilename,mParseMode);
|
|
status=ResumeParse();
|
|
}
|
|
DidBuildModel(status);
|
|
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* This is the main controlling routine in the parsing process.
|
|
* Note that it may get called multiple times for the same scanner,
|
|
* since this is a pushed based system, and all the tokens may
|
|
* not have been consumed by the scanner during a given invocation
|
|
* of this method.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aFilename -- const char* containing file to be parsed.
|
|
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
|
*/
|
|
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
|
nsIStreamListener* aListener,
|
|
PRBool aIncremental,
|
|
nsIParserDebug * aDebug) {
|
|
NS_PRECONDITION(0!=aURL,kNullURL);
|
|
|
|
PRInt32 status=kBadURL;
|
|
|
|
//set the rickGDebug flag to 1 if you want to try incrementally
|
|
//loading your document from a text file (given below).
|
|
static int rickGDebug=0;
|
|
if(rickGDebug)
|
|
return Parse("c:/temp/temp.html",PR_TRUE);
|
|
|
|
NS_IF_RELEASE(mListener);
|
|
mListener = aListener;
|
|
NS_IF_ADDREF(aListener);
|
|
|
|
mIncremental=aIncremental;
|
|
|
|
if(aURL) {
|
|
|
|
mParseMode=DetermineParseMode();
|
|
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
|
if(mDTD) {
|
|
mDTD->SetParser(this);
|
|
mDTD->SetContentSink(mSink);
|
|
mDTD->SetURLRef((char *)aURL->GetSpec());
|
|
mDTD->SetParserDebug(aDebug);
|
|
}
|
|
|
|
WillBuildModel();
|
|
|
|
//ok, time to create our tokenizer and begin the process
|
|
if(mIncremental) {
|
|
mScanner=new CScanner(mParseMode);
|
|
status=aURL->Open(this);
|
|
}
|
|
else {
|
|
mScanner=new CScanner(aURL,mParseMode);
|
|
WillBuildModel();
|
|
status=ResumeParse();
|
|
DidBuildModel(status);
|
|
}
|
|
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* Call this method if all you want to do is parse 1 string full of HTML text.
|
|
*
|
|
* @update gess5/11/98
|
|
* @param anHTMLString contains a string-full of real HTML
|
|
* @param appendTokens tells us whether we should insert tokens inline, or append them.
|
|
* @return TRUE if all went well -- FALSE otherwise
|
|
*/
|
|
PRInt32 nsHTMLParser::Parse(nsString& aSourceBuffer,PRBool appendTokens){
|
|
PRInt32 result=kNoError;
|
|
|
|
mScanner=new CScanner();
|
|
WillBuildModel();
|
|
mScanner->Append(aSourceBuffer);
|
|
result=ResumeParse();
|
|
DidBuildModel(result);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This routine is called to cause the parser to continue
|
|
* parsing it's underling stream. This call allows the
|
|
* parse process to happen in chunks, such as when the
|
|
* content is push based, and we need to parse in pieces.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return PR_TRUE if parsing concluded successfully.
|
|
*/
|
|
PRInt32 nsHTMLParser::ResumeParse() {
|
|
PRInt32 result=kNoError;
|
|
|
|
mDTD->WillResumeParse();
|
|
if(kNoError==result) {
|
|
result=Tokenize();
|
|
if(kInterrupted==result)
|
|
mDTD->WillInterruptParse();
|
|
IterateTokens();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* Retrieve the attributes for this node, and add then into
|
|
* the node.
|
|
*
|
|
* @update gess4/22/98
|
|
* @param aNode is the node you want to collect attributes for
|
|
* @param aCount is the # of attributes you're expecting
|
|
* @return error code (should be 0)
|
|
*/
|
|
PRInt32 nsHTMLParser::CollectAttributes(nsCParserNode& aNode,PRInt32 aCount){
|
|
nsDequeIterator end=mTokenDeque.End();
|
|
|
|
int attr=0;
|
|
for(attr=0;attr<aCount;attr++) {
|
|
if(*mCurrentPos<end) {
|
|
CToken* tkn=(CToken*)(++(*mCurrentPos));
|
|
if(tkn){
|
|
if(eToken_attribute==eHTMLTokenTypes(tkn->GetTokenType())){
|
|
aNode.AddAttribute(tkn);
|
|
}
|
|
else (*mCurrentPos)--;
|
|
}
|
|
else return kInterrupted;
|
|
}
|
|
else return kInterrupted;
|
|
}
|
|
return kNoError;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess4/22/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 nsHTMLParser::CollectSkippedContent(nsCParserNode& aNode){
|
|
eHTMLTokenTypes subtype=eToken_attribute;
|
|
nsDequeIterator end=mTokenDeque.End();
|
|
PRInt32 count=0;
|
|
|
|
while((*mCurrentPos!=end) && (eToken_attribute==subtype)) {
|
|
CToken* tkn=(CToken*)(++(*mCurrentPos));
|
|
subtype=eHTMLTokenTypes(tkn->GetTokenType());
|
|
if(eToken_skippedcontent==subtype) {
|
|
aNode.SetSkippedContent(tkn);
|
|
count++;
|
|
}
|
|
else (*mCurrentPos)--;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLParser::GetBindInfo(void){
|
|
nsresult result=0;
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult
|
|
nsHTMLParser::OnProgress(PRInt32 aProgress, PRInt32 aProgressMax,
|
|
const nsString& aMsg)
|
|
{
|
|
nsresult result=0;
|
|
if (nsnull != mListener) {
|
|
mListener->OnProgress(aProgress, aProgressMax, aMsg);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLParser::OnStartBinding(const char *aContentType){
|
|
if (nsnull != mListener) {
|
|
mListener->OnStartBinding(aContentType);
|
|
}
|
|
nsresult result=WillBuildModel();
|
|
if(!mTransferBuffer) {
|
|
mTransferBuffer=new char[gTransferBufferSize+1];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param pIStream contains the input chars
|
|
* @param length is the number of bytes waiting input
|
|
* @return error code (usually 0)
|
|
*/
|
|
nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){
|
|
if (nsnull != mListener) {
|
|
mListener->OnDataAvailable(pIStream, length);
|
|
}
|
|
|
|
int len=0;
|
|
int offset=0;
|
|
|
|
do {
|
|
PRInt32 err;
|
|
len = pIStream->Read(&err, mTransferBuffer, 0, gTransferBufferSize);
|
|
if(len>0) {
|
|
|
|
#ifdef DEBUG_SAVE_SOURCE_DOC
|
|
if(gTempStream) {
|
|
gTempStream->write(mTransferBuffer,len);
|
|
}
|
|
#endif
|
|
|
|
if (mParserFilter)
|
|
mParserFilter->RawBuffer(mTransferBuffer, &len);
|
|
|
|
mScanner->Append(&mTransferBuffer[offset],len);
|
|
|
|
} //if
|
|
} while (len > 0);
|
|
|
|
nsresult result=ResumeParse();
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsHTMLParser::OnStopBinding(PRInt32 status, const nsString& aMsg){
|
|
nsresult result=DidBuildModel(status);
|
|
if (nsnull != mListener) {
|
|
mListener->OnStopBinding(status, aMsg);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*******************************************************************
|
|
Here comes the tokenization methods...
|
|
*******************************************************************/
|
|
|
|
/**
|
|
* Cause the tokenizer to consume the next token, and
|
|
* return an error result.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anError -- ref to error code
|
|
* @return new token or null
|
|
*/
|
|
PRInt32 nsHTMLParser::ConsumeToken(CToken*& aToken) {
|
|
PRInt32 result=mDTD->ConsumeToken(aToken);
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* Part of the code sandwich, this gets called right before
|
|
* the tokenization process begins. The main reason for
|
|
* this call is to allow the delegate to do initialization.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return TRUE if it's ok to proceed
|
|
*/
|
|
PRBool nsHTMLParser::WillTokenize(PRBool aIncremental){
|
|
PRBool result=PR_TRUE;
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return TRUE if it's ok to proceed
|
|
*/
|
|
PRInt32 nsHTMLParser::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
|
CToken* theToken=0;
|
|
PRInt32 result=kNoError;
|
|
|
|
WillTokenize(PR_TRUE);
|
|
|
|
while(kNoError==result) {
|
|
result=ConsumeToken(theToken);
|
|
if(theToken && (kNoError==result)) {
|
|
|
|
#ifdef VERBOSE_DEBUG
|
|
theToken->DebugDumpToken(cout);
|
|
#endif
|
|
mTokenDeque.Push(theToken);
|
|
}
|
|
}
|
|
if(kEOF==result)
|
|
result=kNoError;
|
|
DidTokenize(PR_TRUE);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This is the primary control routine. It iteratively
|
|
* consumes tokens until an error occurs or you run out
|
|
* of data.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return error code
|
|
*/
|
|
PRInt32 nsHTMLParser::Tokenize(void) {
|
|
CToken* theToken=0;
|
|
PRInt32 result=kNoError;
|
|
PRBool done=(0==mMajorIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
|
|
|
|
|
while((PR_FALSE==done) && (kNoError==result)) {
|
|
mScanner->Mark();
|
|
result=ConsumeToken(theToken);
|
|
if(kNoError==result) {
|
|
if(theToken) {
|
|
|
|
#ifdef VERBOSE_DEBUG
|
|
theToken->DebugDumpToken(cout);
|
|
#endif
|
|
mTokenDeque.Push(theToken);
|
|
}
|
|
|
|
}
|
|
else {
|
|
if(theToken)
|
|
delete theToken;
|
|
mScanner->RewindToMark();
|
|
}
|
|
}
|
|
if((PR_TRUE==done) && (kInterrupted!=result))
|
|
DidTokenize(PR_TRUE);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This is the tail-end of the code sandwich for the
|
|
* tokenization process. It gets called once tokenziation
|
|
* has completed.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return TRUE if all went well
|
|
*/
|
|
PRBool nsHTMLParser::DidTokenize(PRBool aIncremental) {
|
|
PRBool result=PR_TRUE;
|
|
|
|
#ifdef VERBOSE_DEBUG
|
|
DebugDumpTokens(cout);
|
|
#endif
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This debug routine is used to cause the tokenizer to
|
|
* iterate its token list, asking each token to dump its
|
|
* contents to the given output stream.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void nsHTMLParser::DebugDumpTokens(ostream& out) {
|
|
nsDequeIterator b=mTokenDeque.Begin();
|
|
nsDequeIterator e=mTokenDeque.End();
|
|
|
|
CToken* theToken;
|
|
while(b!=e) {
|
|
theToken=(CToken*)(b++);
|
|
theToken->DebugDumpToken(out);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* This debug routine is used to cause the tokenizer to
|
|
* iterate its token list, asking each token to dump its
|
|
* contents to the given output stream.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void nsHTMLParser::DebugDumpSource(ostream& out) {
|
|
nsDequeIterator b=mTokenDeque.Begin();
|
|
nsDequeIterator e=mTokenDeque.End();
|
|
|
|
CToken* theToken;
|
|
while(b!=e) {
|
|
theToken=(CToken*)(b++);
|
|
theToken->DebugDumpSource(out);
|
|
}
|
|
|
|
}
|
|
|
|
|