diff --git a/mozilla/htmlparser/src/CNavDTD.cpp b/mozilla/htmlparser/src/CNavDTD.cpp
index ea0844b5856..4547755d1fb 100644
--- a/mozilla/htmlparser/src/CNavDTD.cpp
+++ b/mozilla/htmlparser/src/CNavDTD.cpp
@@ -31,6 +31,7 @@
*
*/
+#include "nsIParserDebug.h"
#include "CNavDTD.h"
#include "nsHTMLTokens.h"
#include "nsCRT.h"
@@ -43,13 +44,10 @@
#include "prtypes.h" //this is here for debug reasons...
#include "prio.h"
#include "plstr.h"
-#include "prstrm.h"
-#include
#ifdef XP_PC
#include //this is here for debug reasons...
#endif
-#include
#include "prmem.h"
@@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
static const char* kNullToken = "Error: Null token given";
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
-static char* gVerificationOutputDir=0;
-static char* gURLRef=0;
static nsAutoString gEmpty;
static char formElementTags[]= {
@@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
* @return
*/
CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
+ NS_INIT_REFCNT();
mParser=0;
+ mURLRef=0;
+ mParserDebug=0;
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
nsCRT::zero(mContextStack,sizeof(mContextStack));
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
mContextStackPos=0;
mStyleStackPos=0;
- gURLRef = 0;
mHasOpenForm=PR_FALSE;
mHasOpenMap=PR_FALSE;
- gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
InitializeDefaultTokenHandlers();
}
@@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
*/
CNavDTD::~CNavDTD(){
DeleteTokenHandlers();
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
+ if (mURLRef)
+ PL_strfree(mURLRef);
+ if (mParserDebug)
+ NS_RELEASE(mParserDebug);
// NS_RELEASE(mSink);
}
@@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){
if(aHandler) {
result=(*aHandler)(theToken,this);
- Verify("xxx",PR_TRUE);
+ if (mParserDebug)
+ mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
}
}//if
@@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
* @param aChild -- tag enum of child container
* @return PR_TRUE if parent can contain child
*/
-PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
PRBool result=PR_FALSE;
@@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
//handle form elements (this is very much a WIP!!!)
if(0!=strchr(formElementTags,aChild)){
- return CanContainFormElement(aParent,aChild);
+ return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
}
- switch(aParent) {
+ switch((eHTMLTags)aParent) {
case eHTMLTag_a:
case eHTMLTag_acronym:
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
* @param aChild -- tag type of child
* @return TRUE if propagation closes; false otherwise
*/
-PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
PRBool result=PR_FALSE;
switch(aParentTag) {
@@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
//otherwise, intentionally fall through...
case eHTMLTag_tr:
- if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
+ if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
aVector.Append((PRUnichar)eHTMLTag_td);
result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
// result=PR_TRUE;
@@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
return;
}
-
-/************************************************************************
- Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/**
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update jevering 6/06/98
- * @param path is the directory structure indicating the bad context vector
- * @param pURLRef is the associated URL
- * @param filename to record mapping to if not already recorded
- * @return TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP "/vector.map"
-#define CONTEXT_VECTOR_STAT "/vector.stat"
-#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
-{
- char recordPath[2048];
- PRIntn oflags = 0;
-
- // create the record file name from the verification director
- // and the default name.
- strcpy(recordPath,gVerificationOutputDir);
- strcat(recordPath,CONTEXT_VECTOR_MAP);
-
- // create the file exists, only open for read/write
- // otherwise, create it
- if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
- oflags = PR_CREATE_FILE;
- oflags |= PR_RDWR;
-
- // open the record file
- PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
- if (recordFile) {
-
- char * string = (char *)PR_Malloc(2048);
- PRBool found = PR_FALSE;
-
- // vectors are stored on the format iof "URL vector filename"
- // where the vector contains the verification path and
- // the filename contains the debug source dump
- sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
- // get the file size, read in the file and parse it line at
- // a time to check to see if we have already recorded this
- // occurance
-
- PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
- if (iSize) {
-
- char * buffer = (char*)PR_Malloc(iSize);
- char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
- if (buffer!=NULL && string!=NULL) {
- PRInt32 ibufferpos, istringpos;
-
- // beginning of file for read
- PR_Seek(recordFile,0,PR_SEEK_SET);
- PR_Read(recordFile,buffer,iSize);
-
- // run through the file looking for a matching vector
- for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
- {
- // compare string once we have hit the end of the line
- if (buffer[ibufferpos] == '\r') {
- stringbuf[istringpos] = '\0';
- istringpos = 0;
- // skip newline and space
- ibufferpos++;
-
- if (PL_strlen(stringbuf)) {
- char * space;
- // chop of the filename for compare
- if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
- *space = '\0';
-
- // we have already recorded this one, free up, and return
- if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
- PR_Free(buffer);
- PR_Free(stringbuf);
- PR_Free(string);
- return PR_TRUE;
- }
- }
- }
-
- // build up the compare string
- else
- stringbuf[istringpos++] = buffer[ibufferpos];
- }
-
- // throw away the record file data
- PR_Free(buffer);
- PR_Free(stringbuf);
- }
- }
-
- // if this bad vector was not recorded, add it to record file
-
- if (!found) {
- PR_Seek(recordFile,0,PR_SEEK_END);
- PR_Write(recordFile,string,PL_strlen(string));
- }
-
- PR_Close(recordFile);
- PR_Free(string);
+void CNavDTD::SetURLRef(char * aURLRef){
+ if (mURLRef) {
+ PL_strfree(mURLRef);
+ mURLRef=0;
}
-
- // vector was not recorded
- return PR_FALSE;
+ if (aURLRef)
+ mURLRef = PL_strdup(aURLRef);
}
-// structure to store the vector statistic information
-
-typedef struct vector_info {
- PRInt32 references; // number of occurances counted
- PRInt32 count; // number of tags in the vector
- PRBool good_vector; // is this a valid vector?
- eHTMLTags* vector; // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE 128
-
-// compare function for quick sort. Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
+void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
{
- VectorInfo ** p1 = (VectorInfo**)arg1;
- VectorInfo ** p2 = (VectorInfo**)arg2;
- return (*p2)->references - (*p1)->references;
-}
-
-
-/**
- * This debug routines stores statistical information about a
- * context vector. The context vector statistics are stored in
- * a global array. The table is resorted each time it grows to
- * aid in lookup speed. If a vector has already been noted, its
- * reference count is bumped, otherwise it is added to the table
- *
- * @update jevering 6/11/98
- * @param aTags is the tag list (vector)
- * @param count is the size of the vector
- * @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
- // if the table doesn't exist, create it
- if (!gVectorInfoArray) {
- gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
- }
- else {
- // attempt to look up the vector
- for (PRInt32 i = 0; i < gVectorCount; i++)
-
- // check the vector only if they are the same size, if they
- // match then just return without doing further work
- if (gVectorInfoArray[i]->count == count)
- if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
- // bzzzt. and we have a winner.. bump the ref count
- gVectorInfoArray[i]->references++;
- return;
- }
- }
-
- // the context vector hasn't been noted, so allocate it and
- // initialize it one.. add it to the table
- VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
- pVectorInfo->references = 1;
- pVectorInfo->count = count;
- pVectorInfo->good_vector = good_vector;
- pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
- memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
- gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
- // have we maxed out the table? grow it.. sort it.. love it.
- if ((gVectorCount % TABLE_SIZE) == 0) {
- gVectorInfoArray = (VectorInfo**)realloc(
- gVectorInfoArray,
- (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
- if (gVectorCount) {
- qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
- }
- }
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
- sprintf (vector_string, "%6d ", pInfo->references);
- for (PRInt32 j = 0; j < pInfo->count; j++) {
- PL_strcat(vector_string, "<");
- PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
- PL_strcat(vector_string, ">");
- }
- PL_strcat(vector_string,"\r\n");
-}
-
-/**
- * This debug routine dumps out the vector statistics to a text
- * file in the verification directory and defaults to the name
- * "vector.stat". It contains all parsed context vectors and there
- * occurance count sorted in decending order.
- *
- * @update jevering 6/11/98
- * @param
- * @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord(void)
-{
- // do we have a table?
- if (gVectorCount) {
-
- // hopefully, they wont exceed 1K.
- char vector_string[1024];
- char path[1024];
-
- path[0] = '\0';
-
- // put in the verification directory.. else the root
- if (gVerificationOutputDir)
- strcpy(path,gVerificationOutputDir);
-
- strcat(path,CONTEXT_VECTOR_STAT);
-
- // open the stat file creaming any existing stat file
- PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
- if (statisticFile) {
-
- PRInt32 i;
- PRofstream ps;
- ps.attach(statisticFile);
-
- // oh what the heck, sort it again
- if (gVectorCount) {
- qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
- }
-
- // cute little header
- sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
- ps << vector_string;
-
- ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // dump out the bad vectors encountered
- for (i = 0; i < gVectorCount; i++) {
- if (!gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- }
-
- ps << "\r\n\r\nValid context vector summary\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // take a big vector table dump (good vectors)
- for (i = 0; i < gVectorCount; i++) {
- if (gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- // free em up. they mean nothing to me now (I'm such a user)
-
- if (gVectorInfoArray[i]->vector)
- PR_Free(gVectorInfoArray[i]->vector);
- PR_Free(gVectorInfoArray[i]);
- }
- }
-
- // ok, we are done with the table, free it up as well
- PR_Free(gVectorInfoArray);
- gVectorInfoArray = 0;
- gVectorCount = 0;
- PR_Close(statisticFile);
+ if (aParserDebug) {
+ mParserDebug = aParserDebug;
+ NS_ADDREF(mParserDebug);
}
}
-
-
-/**
- * This debug method allows us to determine whether or not
- * we've seen (and can handle) the given context vector.
- *
- * @update gess4/22/98
- * @param tags is an array of eHTMLTags
- * @param count represents the number of items in the tags array
- * @param aDTD is the DTD we plan to ask for verification
- * @return TRUE if we know how to handle it, else false
- */
-PRBool CNavDTD::VerifyContextVector(void) const {
-
- PRBool result=PR_TRUE;
-
- if(0!=gVerificationOutputDir) {
-
-#ifdef XP_PC
- char path[_MAX_PATH+1];
- strcpy(path,gVerificationOutputDir);
-#endif
-
- int i=0;
- for(i=0;iDebugDumpSource(ps);
- PR_Close(debugFile);
- }
- }
- }
- }
-
- return result;
-}
diff --git a/mozilla/htmlparser/src/CNavDTD.h b/mozilla/htmlparser/src/CNavDTD.h
index 9883a4483e6..cdaa93d9dd9 100644
--- a/mozilla/htmlparser/src/CNavDTD.h
+++ b/mozilla/htmlparser/src/CNavDTD.h
@@ -42,6 +42,7 @@
class nsHTMLParser;
class nsIHTMLContentSink;
+class nsIParserDebug;
class CNavDTD : public nsIDTD {
@@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
* of one type can contain a tag of another type.
*
* @update gess 3/25/98
- * @param aParent -- tag enum of parent container
- * @param aChild -- tag enum of child container
+ * @param aParent -- int tag of parent container
+ * @param aChild -- int tag of child container
* @return PR_TRUE if parent can contain child
*/
- virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+ virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
/**
* This method is called to determine whether or not a tag
@@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
*/
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
-
/**
- * This method gets called at various times by the parser
- * whenever we want to verify a valid context stack. This
- * method also gives us a hook to add debugging metrics.
- *
- * @update gess4/6/98
- * @param aStack[] array of ints (tokens)
- * @param aCount number of elements in given array
- * @return TRUE if stack is valid, else FALSE
+ *
+ * @update jevering 6/18/98
+ * @param aURLRef if the current URL reference (for debugger)
+ * @return
*/
- virtual PRBool VerifyContextVector(void) const;
+ virtual void SetURLRef(char * aURLRef);
/**
*
- * @update gess5/18/98
- * @param
+ * @update jevering 6/18/98
+ * @param aParserDebug created debug parser object
* @return
*/
- virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+ virtual void SetParserDebug(nsIParserDebug * aParserDebug);
/**
* This method tries to design a context map (without actually
@@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
- virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+ virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
/**
* This method tries to design a context map (without actually
@@ -699,7 +695,8 @@ protected:
PRBool mHasOpenForm;
PRBool mHasOpenMap;
nsDeque mTokenDeque;
-
+ char* mURLRef;
+ nsIParserDebug* mParserDebug;
};
diff --git a/mozilla/htmlparser/src/COtherDTD.cpp b/mozilla/htmlparser/src/COtherDTD.cpp
index 47ecc728678..ccbf97c725a 100644
--- a/mozilla/htmlparser/src/COtherDTD.cpp
+++ b/mozilla/htmlparser/src/COtherDTD.cpp
@@ -31,6 +31,7 @@
*
*/
+#include "nsIParserDebug.h"
#include "COtherDTD.h"
#include "nsHTMLTokens.h"
#include "nsCRT.h"
@@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
static const char* kNullToken = "Error: Null token given";
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
-static char* gVerificationOutputDir=0;
-static char* gURLRef=0;
static nsAutoString gEmpty;
static char formElementTags[]= {
@@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
* @return
*/
COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
+ NS_INIT_REFCNT();
mParser=0;
+ mURLRef=0;
+ mParserDebug=0;
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
nsCRT::zero(mContextStack,sizeof(mContextStack));
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
mContextStackPos=0;
mStyleStackPos=0;
- gURLRef = 0;
mHasOpenForm=PR_FALSE;
mHasOpenMap=PR_FALSE;
- gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
InitializeDefaultTokenHandlers();
}
@@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
*/
COtherDTD::~COtherDTD(){
DeleteTokenHandlers();
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
+ if (mURLRef)
+ PL_strfree(mURLRef);
+ if (mParserDebug)
+ NS_RELEASE(mParserDebug);
// NS_RELEASE(mSink);
}
@@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){
if(aHandler) {
result=(*aHandler)(theToken,this);
- Verify("xxx",PR_TRUE);
+ if (mParserDebug)
+ mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
}
}//if
@@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
* @param aChild -- tag enum of child container
* @return PR_TRUE if parent can contain child
*/
-PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
PRBool result=PR_FALSE;
@@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
//handle form elements (this is very much a WIP!!!)
if(0!=strchr(formElementTags,aChild)){
- return CanContainFormElement(aParent,aChild);
+ return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
}
- switch(aParent) {
+ switch((eHTMLTags)aParent) {
case eHTMLTag_a:
case eHTMLTag_acronym:
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
* @param aChild -- tag type of child
* @return TRUE if propagation closes; false otherwise
*/
-PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
PRBool result=PR_FALSE;
switch(aParentTag) {
@@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
return;
}
-
-/************************************************************************
- Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/**
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update jevering 6/06/98
- * @param path is the directory structure indicating the bad context vector
- * @param pURLRef is the associated URL
- * @param filename to record mapping to if not already recorded
- * @return TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP "/vector.map"
-#define CONTEXT_VECTOR_STAT "/vector.stat"
-#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
-{
- char recordPath[2048];
- PRIntn oflags = 0;
-
- // create the record file name from the verification director
- // and the default name.
- strcpy(recordPath,gVerificationOutputDir);
- strcat(recordPath,CONTEXT_VECTOR_MAP);
-
- // create the file exists, only open for read/write
- // otherwise, create it
- if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
- oflags = PR_CREATE_FILE;
- oflags |= PR_RDWR;
-
- // open the record file
- PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
- if (recordFile) {
-
- char * string = (char *)PR_Malloc(2048);
- PRBool found = PR_FALSE;
-
- // vectors are stored on the format iof "URL vector filename"
- // where the vector contains the verification path and
- // the filename contains the debug source dump
- sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
- // get the file size, read in the file and parse it line at
- // a time to check to see if we have already recorded this
- // occurance
-
- PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
- if (iSize) {
-
- char * buffer = (char*)PR_Malloc(iSize);
- char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
- if (buffer!=NULL && string!=NULL) {
- PRInt32 ibufferpos, istringpos;
-
- // beginning of file for read
- PR_Seek(recordFile,0,PR_SEEK_SET);
- PR_Read(recordFile,buffer,iSize);
-
- // run through the file looking for a matching vector
- for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
- {
- // compare string once we have hit the end of the line
- if (buffer[ibufferpos] == '\r') {
- stringbuf[istringpos] = '\0';
- istringpos = 0;
- // skip newline and space
- ibufferpos++;
-
- if (PL_strlen(stringbuf)) {
- char * space;
- // chop of the filename for compare
- if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
- *space = '\0';
-
- // we have already recorded this one, free up, and return
- if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
- PR_Free(buffer);
- PR_Free(stringbuf);
- PR_Free(string);
- return PR_TRUE;
- }
- }
- }
-
- // build up the compare string
- else
- stringbuf[istringpos++] = buffer[ibufferpos];
- }
-
- // throw away the record file data
- PR_Free(buffer);
- PR_Free(stringbuf);
- }
- }
-
- // if this bad vector was not recorded, add it to record file
-
- if (!found) {
- PR_Seek(recordFile,0,PR_SEEK_END);
- PR_Write(recordFile,string,PL_strlen(string));
- }
-
- PR_Close(recordFile);
- PR_Free(string);
+void COtherDTD::SetURLRef(char * aURLRef){
+ if (mURLRef) {
+ PL_strfree(mURLRef);
+ mURLRef=0;
}
-
- // vector was not recorded
- return PR_FALSE;
+ if (aURLRef)
+ mURLRef = PL_strdup(aURLRef);
}
-// structure to store the vector statistic information
-
-typedef struct vector_info {
- PRInt32 references; // number of occurances counted
- PRInt32 count; // number of tags in the vector
- PRBool good_vector; // is this a valid vector?
- eHTMLTags* vector; // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE 128
-
-// compare function for quick sort. Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
+void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
{
- VectorInfo ** p1 = (VectorInfo**)arg1;
- VectorInfo ** p2 = (VectorInfo**)arg2;
- return (*p2)->references - (*p1)->references;
-}
-
-/**
- * quick sort the statistic array causing the most frequently
- * used vectors to be at the top (this makes it a little speedier
- * when looking them up)
- */
-static void SortVectorRecord(void) {
- // of course, sort it only if there is something to sort
- if (gVectorCount) {
- qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
- }
-}
-
-
-/**
- * This debug routines stores statistical information about a
- * context vector. The context vector statistics are stored in
- * a global array. The table is resorted each time it grows to
- * aid in lookup speed. If a vector has already been noted, its
- * reference count is bumped, otherwise it is added to the table
- *
- * @update jevering 6/11/98
- * @param aTags is the tag list (vector)
- * @param count is the size of the vector
- * @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
- // if the table doesn't exist, create it
- if (!gVectorInfoArray) {
- gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
- }
- else {
- // attempt to look up the vector
- for (PRInt32 i = 0; i < gVectorCount; i++)
-
- // check the vector only if they are the same size, if they
- // match then just return without doing further work
- if (gVectorInfoArray[i]->count == count)
- if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
- // bzzzt. and we have a winner.. bump the ref count
- gVectorInfoArray[i]->references++;
- return;
- }
- }
-
- // the context vector hasn't been noted, so allocate it and
- // initialize it one.. add it to the table
- VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
- pVectorInfo->references = 1;
- pVectorInfo->count = count;
- pVectorInfo->good_vector = good_vector;
- pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
- memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
- gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
- // have we maxed out the table? grow it.. sort it.. love it.
- if ((gVectorCount % TABLE_SIZE) == 0) {
- gVectorInfoArray = (VectorInfo**)realloc(
- gVectorInfoArray,
- (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
- SortVectorRecord();
- }
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
- sprintf (vector_string, "%6d ", pInfo->references);
- for (PRInt32 j = 0; j < pInfo->count; j++) {
- PL_strcat(vector_string, "<");
- PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
- PL_strcat(vector_string, ">");
- }
- PL_strcat(vector_string,"\r\n");
-}
-
-/**
- * This debug routine dumps out the vector statistics to a text
- * file in the verification directory and defaults to the name
- * "vector.stat". It contains all parsed context vectors and there
- * occurance count sorted in decending order.
- *
- * @update jevering 6/11/98
- * @param
- * @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord_other(void)
-{
- // do we have a table?
- if (gVectorCount) {
-
- // hopefully, they wont exceed 1K.
- char vector_string[1024];
- char path[1024];
-
- path[0] = '\0';
-
- // put in the verification directory.. else the root
- if (gVerificationOutputDir)
- strcpy(path,gVerificationOutputDir);
-
- strcat(path,CONTEXT_VECTOR_STAT);
-
- // open the stat file creaming any existing stat file
- PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
- if (statisticFile) {
-
- PRInt32 i;
- PRofstream ps;
- ps.attach(statisticFile);
-
- // oh what the heck, sort it again
- SortVectorRecord();
-
- // cute little header
- sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
- ps << vector_string;
-
- ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // dump out the bad vectors encountered
- for (i = 0; i < gVectorCount; i++) {
- if (!gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- }
-
- ps << "\r\n\r\nValid context vector summary\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // take a big vector table dump (good vectors)
- for (i = 0; i < gVectorCount; i++) {
- if (gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- // free em up. they mean nothing to me now (I'm such a user)
-
- if (gVectorInfoArray[i]->vector)
- PR_Free(gVectorInfoArray[i]->vector);
- PR_Free(gVectorInfoArray[i]);
- }
- }
-
- // ok, we are done with the table, free it up as well
- PR_Free(gVectorInfoArray);
- gVectorInfoArray = 0;
- gVectorCount = 0;
- PR_Close(statisticFile);
+ if (aParserDebug) {
+ mParserDebug = aParserDebug;
+ NS_ADDREF(mParserDebug);
}
}
-
-
-/**
- * This debug method allows us to determine whether or not
- * we've seen (and can handle) the given context vector.
- *
- * @update gess4/22/98
- * @param tags is an array of eHTMLTags
- * @param count represents the number of items in the tags array
- * @param aDTD is the DTD we plan to ask for verification
- * @return TRUE if we know how to handle it, else false
- */
-PRBool COtherDTD::VerifyContextVector(void) const {
-
- PRBool result=PR_TRUE;
-
- if(0!=gVerificationOutputDir) {
-
-#ifdef XP_PC
- char path[_MAX_PATH+1];
- strcpy(path,gVerificationOutputDir);
-#endif
-
- int i=0;
- for(i=0;iDebugDumpSource(ps);
- PR_Close(debugFile);
- }
- }
- }
- }
-
- return result;
-}
-
diff --git a/mozilla/htmlparser/src/COtherDTD.h b/mozilla/htmlparser/src/COtherDTD.h
index 21d2346eafd..7a74866cf13 100644
--- a/mozilla/htmlparser/src/COtherDTD.h
+++ b/mozilla/htmlparser/src/COtherDTD.h
@@ -34,7 +34,6 @@
#include "nsDeque.h"
-
#define NS_IOtherHTML_DTD_IID \
{0x8a5e89c0, 0xd16d, 0x11d1, \
{0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}}
@@ -42,6 +41,7 @@
class nsIParser;
class nsIHTMLContentSink;
+class nsIParserDebug;
class COtherDTD : public nsIDTD {
@@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
* of one type can contain a tag of another type.
*
* @update gess 3/25/98
- * @param aParent -- tag enum of parent container
- * @param aChild -- tag enum of child container
+ * @param aParent -- int tag of parent container
+ * @param aChild -- int tag of child container
* @return PR_TRUE if parent can contain child
*/
- virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+ virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
/**
* This method is called to determine whether or not a tag
@@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
*/
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
-
/**
- * This method gets called at various times by the parser
- * whenever we want to verify a valid context stack. This
- * method also gives us a hook to add debugging metrics.
- *
- * @update gess4/6/98
- * @param aStack[] array of ints (tokens)
- * @param aCount number of elements in given array
- * @return TRUE if stack is valid, else FALSE
+ *
+ * @update jevering 6/18/98
+ * @param aURLRef if the current URL reference (for debugger)
+ * @return
*/
- virtual PRBool VerifyContextVector(void) const;
+ virtual void SetURLRef(char * aURLRef);
/**
*
- * @update gess5/18/98
- * @param
+ * @update jevering 6/18/98
+ * @param aParserDebug created debug parser object
* @return
*/
- virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+ virtual void SetParserDebug(nsIParserDebug * aParserDebug);
/**
* This method tries to design a context map (without actually
@@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
- virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+ virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
/**
* This method tries to design a context map (without actually
@@ -701,7 +696,8 @@ protected:
PRBool mHasOpenForm;
PRBool mHasOpenMap;
nsDeque mTokenDeque;
-
+ char* mURLRef;
+ nsIParserDebug* mParserDebug;
};
diff --git a/mozilla/htmlparser/src/Makefile b/mozilla/htmlparser/src/Makefile
index 37a8ca552ce..243a5c4aa98 100644
--- a/mozilla/htmlparser/src/Makefile
+++ b/mozilla/htmlparser/src/Makefile
@@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
CPPSRCS = \
nsHTMLContentSink.cpp \
nsParserNode.cpp \
+ nsParserDebug.cpp \
nsScanner.cpp \
nsToken.cpp \
nsTokenHandler.cpp \
@@ -41,6 +42,8 @@ EXPORTS = \
nsHTMLTokens.h \
nsIParserNode.h \
nsIParser.h \
+ nsIParserDebug.h \
+ nsIParserFilter.h \
nsToken.h \
$(NULL)
diff --git a/mozilla/htmlparser/src/makefile.win b/mozilla/htmlparser/src/makefile.win
index 6f479747931..940db9e04f2 100644
--- a/mozilla/htmlparser/src/makefile.win
+++ b/mozilla/htmlparser/src/makefile.win
@@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
nsHTMLParser.cpp prstrm.cpp
EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
- nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h
+ nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \
+ nsIParserDebug.h nsIParserFilter.h
CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
.\$(OBJDIR)\CNavDTD.obj \
@@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
.\$(OBJDIR)\nsHTMLParser.obj \
.\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \
.\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \
- .\$(OBJDIR)\nsTokenHandler.obj \
+ .\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \
.\$(OBJDIR)\prstrm.obj
LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
diff --git a/mozilla/htmlparser/src/nsHTMLParser.cpp b/mozilla/htmlparser/src/nsHTMLParser.cpp
index 99b1d81a340..d361ceed444 100644
--- a/mozilla/htmlparser/src/nsHTMLParser.cpp
+++ b/mozilla/htmlparser/src/nsHTMLParser.cpp
@@ -30,6 +30,7 @@
#include "prstrm.h"
#include
#include "nsIInputStream.h"
+#include "nsIParserFilter.h"
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
#ifdef XP_PC
@@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
static const char* kNullFilename= "Error: Null filename given";
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
-static char* gVerificationOutputDir=0;
-static PRBool gRecordingStatistics=PR_TRUE;
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
-static char* gURLRef=0;
//#define DEBUG_SAVE_SOURCE_DOC 1
#ifdef DEBUG_SAVE_SOURCE_DOC
@@ -58,17 +56,6 @@ fstream* gTempStream=0;
#endif
-extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
-{
- gVerificationOutputDir = verify_dir;
-}
-
-
-extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
-{
- gRecordingStatistics = bval;
-}
-
/**
* This method is defined in nsIParser. It is used to
* cause the COM-like construction of an nsHTMLParser.
@@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
*/
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
NS_INIT_REFCNT();
+ mParserFilter = nsnull;
mListener = nsnull;
mTransferBuffer=0;
mSink=0;
@@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
* @return
*/
nsHTMLParser::~nsHTMLParser() {
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
NS_IF_RELEASE(mListener);
if(mTransferBuffer)
delete [] mTransferBuffer;
@@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
delete mCurrentPos;
mCurrentPos=0;
if(mDTD)
- delete mDTD;
+ NS_RELEASE(mDTD);
mDTD=0;
if(mScanner)
delete mScanner;
@@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
return NS_OK;
}
+nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
+{
+ nsIParserFilter* old=mParserFilter;
+ if(old)
+ NS_RELEASE(old);
+ if(aFilter) {
+ mParserFilter=aFilter;
+ NS_ADDREF(aFilter);
+ }
+ return old;
+}
+
/**
* This method gets called in order to set the content
* sink for this parser to dump nodes to.
@@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
mDTD=aDTD;
}
+nsIDTD * nsHTMLParser::GetDTD(void) {
+ return mDTD;
+}
+
/**
*
*
@@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
* @param
* @return
*/
-nsIDTD* GetDTD(eParseMode aMode) {
+nsIDTD* NewDTD(eParseMode aMode) {
nsIDTD* aDTD=0;
switch(aMode) {
case eParseMode_navigator:
@@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
default:
break;
}
+ if (aDTD)
+ aDTD->AddRef();
return aDTD;
}
@@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
nsString theBuffer;
const int kLocalBufSize=10;
- if (gURLRef)
- PL_strfree(gURLRef);
- if (aFilename)
- gURLRef = PL_strdup(aFilename);
-
mMajorIteration=-1;
mMinorIteration=-1;
@@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
*/
-PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
+PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
NS_PRECONDITION(0!=aFilename,kNullFilename);
PRInt32 status=kBadFilename;
mIncremental=aIncremental;
if(aFilename) {
- if (gURLRef)
- PL_strfree(gURLRef);
- gURLRef = PL_strdup(aFilename);
-
mParseMode=DetermineParseMode();
- mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+ mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
if(mDTD) {
mDTD->SetParser(this);
mDTD->SetContentSink(mSink);
+ mDTD->SetURLRef((char *)aFilename);
+ mDTD->SetParserDebug(aDebug);
}
WillBuildModel();
@@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
*/
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
nsIStreamListener* aListener,
- PRBool aIncremental) {
+ PRBool aIncremental,
+ nsIParserDebug * aDebug) {
NS_PRECONDITION(0!=aURL,kNullURL);
PRInt32 status=kBadURL;
@@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
if(aURL) {
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
- if (aURL->GetSpec())
- gURLRef = PL_strdup(aURL->GetSpec());
-
mParseMode=DetermineParseMode();
- mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+ mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
if(mDTD) {
mDTD->SetParser(this);
mDTD->SetContentSink(mSink);
+ mDTD->SetURLRef((char *)aURL->GetSpec());
+ mDTD->SetParserDebug(aDebug);
}
WillBuildModel();
@@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
}
#endif
+ if (mParserFilter)
+ mParserFilter->RawBuffer(mTransferBuffer, &len);
+
mScanner->Append(&mTransferBuffer[offset],len);
} //if
diff --git a/mozilla/htmlparser/src/nsHTMLParser.h b/mozilla/htmlparser/src/nsHTMLParser.h
index 1b1c8470aa3..d8638dd3b32 100644
--- a/mozilla/htmlparser/src/nsHTMLParser.h
+++ b/mozilla/htmlparser/src/nsHTMLParser.h
@@ -73,6 +73,8 @@ class nsIHTMLContentSink;
class nsIURL;
class nsIDTD;
class CScanner;
+class nsIParserFilter;
+class nsIParserDebug;
class nsHTMLParser : public nsIParser, public nsIStreamListener {
@@ -103,8 +105,12 @@ friend class CTokenHandler;
* @return old sink, or NULL
*/
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
+
+ virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void SetDTD(nsIDTD* aDTD);
+
+ virtual nsIDTD * GetDTD(void);
/**
*
@@ -124,7 +130,8 @@ friend class CTokenHandler;
*/
virtual PRInt32 Parse(nsIURL* aURL,
nsIStreamListener* aListener,
- PRBool aIncremental=PR_TRUE);
+ PRBool aIncremental=PR_TRUE,
+ nsIParserDebug * aDebug = 0);
/**
* Cause parser to parse input from given file in given mode
@@ -133,7 +140,7 @@ friend class CTokenHandler;
* @param aMode is the desired parser mode (Nav, other, etc.)
* @return TRUE if all went well -- FALSE otherwise
*/
- virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
+ virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0);
/**
* @update gess5/11/98
@@ -290,7 +297,8 @@ protected:
//*********************************************
nsIStreamListener* mListener;
- nsIContentSink* mSink;
+ nsIContentSink* mSink;
+ nsIParserFilter* mParserFilter;
nsDequeIterator* mCurrentPos;
nsDequeIterator* mMarkPos;
diff --git a/mozilla/htmlparser/src/nsIDTD.h b/mozilla/htmlparser/src/nsIDTD.h
index 926ee926943..77ae7f9868c 100644
--- a/mozilla/htmlparser/src/nsIDTD.h
+++ b/mozilla/htmlparser/src/nsIDTD.h
@@ -37,6 +37,7 @@
class nsIParser;
class CToken;
class nsIContentSink;
+class nsIParserDebug;
class nsIDTD : public nsISupports {
@@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {
/**
*
- * @update gess5/18/98
- * @param
+ * @update jevering 6/18/98
+ * @param aURLRef if the current URL reference (for debugger)
* @return
*/
- virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
+ virtual void SetURLRef(char * aURLRef) = 0;
+ /**
+ *
+ * @update jevering 6/18/98
+ * @param aParent parent tag
+ * @param aChild child tag
+ * @return PR_TRUE if valid container
+ */
+ virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
+
+ /**
+ *
+ * @update jevering 6/18/98
+ * @param aParserDebug created debug parser object
+ * @return
+ */
+ virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
};
diff --git a/mozilla/htmlparser/src/nsIParser.h b/mozilla/htmlparser/src/nsIParser.h
index f509e1c909e..a4ffd45bc27 100644
--- a/mozilla/htmlparser/src/nsIParser.h
+++ b/mozilla/htmlparser/src/nsIParser.h
@@ -34,6 +34,7 @@ class nsString;
class CToken;
class nsIURL;
class nsIDTD;
+class nsIParserDebug;
/**
* This class defines the iparser interface. This XPCOM
@@ -60,9 +61,10 @@ class nsIParser : public nsISupports {
virtual PRInt32 Parse(nsIURL* aURL,
nsIStreamListener* aListener,
- PRBool aIncremental=PR_TRUE) = 0;
+ PRBool aIncremental=PR_TRUE,
+ nsIParserDebug * aDebug = 0) = 0;
- virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
+ virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
diff --git a/mozilla/htmlparser/src/nsIParserDebug.h b/mozilla/htmlparser/src/nsIParserDebug.h
new file mode 100644
index 00000000000..a1e45204291
--- /dev/null
+++ b/mozilla/htmlparser/src/nsIParserDebug.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/8/98
+ *
+ *
+ */
+
+#ifndef NS_IPARSERDEBUG__
+#define NS_IPARSERDEBUG__
+
+#include "nsISupports.h"
+#include "nsHTMLTokens.h"
+#include "prtypes.h"
+
+#define NS_IPARSERDEBUG_IID \
+ {0x7b68c220, 0x0685, 0x11d2, \
+ {0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
+
+
+class nsIDTD;
+class nsHTMLParser;
+
+class nsIParserDebug : public nsISupports {
+
+public:
+
+ virtual void SetVerificationDirectory(char * verify_dir) = 0;
+
+ virtual void SetRecordStatistics(PRBool bval) = 0;
+
+ virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
+
+ virtual void DumpVectorRecord(void) = 0;
+
+};
+
+extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
+
+#endif /* NS_IPARSERDEBUG__ */
\ No newline at end of file
diff --git a/mozilla/htmlparser/src/nsIParserFilter.h b/mozilla/htmlparser/src/nsIParserFilter.h
new file mode 100644
index 00000000000..8b257515efc
--- /dev/null
+++ b/mozilla/htmlparser/src/nsIParserFilter.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update jevering 6/17/98
+ *
+ */
+
+#ifndef IPARSERFILTER
+#define IPARSERFILTER
+
+#include "nsISupports.h"
+
+class CToken;
+
+#define NS_IPARSERFILTER_IID \
+ {0x14d6ff0, 0x0610, 0x11d2, \
+ {0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
+
+
+class nsIParserFilter : public nsISupports {
+ public:
+
+ NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
+
+ NS_IMETHOD WillAddToken(CToken & token) = 0;
+
+ NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
+};
+
+extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
+
+
+#endif
+
diff --git a/mozilla/htmlparser/src/nsParserDebug.cpp b/mozilla/htmlparser/src/nsParserDebug.cpp
new file mode 100644
index 00000000000..e998807e309
--- /dev/null
+++ b/mozilla/htmlparser/src/nsParserDebug.cpp
@@ -0,0 +1,534 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update jevering 06/18/98
+ *
+ * This file contains the parser debugger object which aids in
+ * walking links and reporting statistic information, reporting
+ * bad vectors.
+ */
+
+#include "CNavDTD.h"
+#include "nsHTMLTokens.h"
+#include "nsHTMLParser.h"
+#include "nsIParserDebug.h"
+#include "nsCRT.h"
+#include "prenv.h" //this is here for debug reasons...
+#include "prtypes.h" //this is here for debug reasons...
+#include "prio.h"
+#include "plstr.h"
+#include "prstrm.h"
+#include
+#include
+#include "prmem.h"
+
+#define CONTEXT_VECTOR_MAP "/vector.map"
+#define CONTEXT_VECTOR_STAT "/vector.stat"
+#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
+
+// structure to store the vector statistic information
+
+typedef struct vector_info {
+ PRInt32 references; // number of occurances counted
+ PRInt32 count; // number of tags in the vector
+ PRBool good_vector; // is this a valid vector?
+ eHTMLTags* vector; // and the vector
+} VectorInfo;
+
+// the statistic vector table grows each time it exceeds this
+// stepping value
+#define TABLE_SIZE 128
+
+class CParserDebug : public nsIParserDebug {
+public:
+
+ CParserDebug(char * aVerifyDir = 0);
+ ~CParserDebug();
+
+ NS_DECL_ISUPPORTS
+
+ void SetVerificationDirectory(char * verify_dir);
+ void SetRecordStatistics(PRBool bval);
+ PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
+ void DumpVectorRecord(void);
+
+ // global table for storing vector statistics and the size
+
+private:
+ VectorInfo ** mVectorInfoArray;
+ PRInt32 mVectorCount;
+ char * mVerificationDir;
+ PRBool mRecordingStatistics;
+
+ PRBool DebugRecord(char * path, char * pURLRef, char * filename);
+ void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
+ void MakeVectorString(char * vector_string, VectorInfo * pInfo);
+};
+
+static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
+static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
+
+/**
+ * This method is defined in nsIParser. It is used to
+ * cause the COM-like construction of an nsHTMLParser.
+ *
+ * @update jevering 3/25/98
+ * @param nsIParser** ptr to newly instantiated parser
+ * @return NS_xxx error result
+ */
+
+NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
+{
+ CParserDebug *it = new CParserDebug();
+
+ if (it == 0) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
+}
+
+CParserDebug::CParserDebug(char * aVerifyDir)
+{
+ NS_INIT_REFCNT();
+ mVectorInfoArray = 0;
+ mVectorCount = 0;
+ if (aVerifyDir)
+ mVerificationDir = PL_strdup(aVerifyDir);
+ else {
+ char * pString = PR_GetEnv("VERIFY_PARSER");
+ if (pString)
+ mVerificationDir = PL_strdup(pString);
+ else
+ mVerificationDir = 0;
+ }
+ mRecordingStatistics = PR_TRUE;
+}
+
+CParserDebug::~CParserDebug()
+{
+ if (mVerificationDir)
+ PL_strfree(mVerificationDir);
+}
+
+/**
+ * This method gets called as part of our COM-like interfaces.
+ * Its purpose is to create an interface to parser object
+ * of some type.
+ *
+ * @update gess 4/8/98
+ * @param nsIID id of object to discover
+ * @param aInstancePtr ptr to newly discovered interface
+ * @return NS_xxx result code
+ */
+nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
+{
+ if (NULL == aInstancePtr) {
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ if(aIID.Equals(kISupportsIID)) { //do IUnknown...
+ *aInstancePtr = (nsIParserDebug*)(this);
+ }
+ else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class...
+ *aInstancePtr = (nsIParserDebug*)(this);
+ }
+ else {
+ *aInstancePtr=0;
+ return NS_NOINTERFACE;
+ }
+ ((nsISupports*) *aInstancePtr)->AddRef();
+ return NS_OK;
+}
+
+NS_IMPL_ADDREF(CParserDebug)
+NS_IMPL_RELEASE(CParserDebug)
+
+void CParserDebug::SetVerificationDirectory(char * verify_dir)
+{
+ if (mVerificationDir) {
+ PL_strfree(mVerificationDir);
+ mVerificationDir = 0;
+ }
+ mVerificationDir = PL_strdup(verify_dir);
+}
+
+void CParserDebug::SetRecordStatistics(PRBool bval)
+{
+ mRecordingStatistics = bval;
+}
+
+/**
+ * This debug method records an invalid context vector and it's
+ * associated context vector and URL in a simple flat file mapping which
+ * resides in the verification directory and is named context.map
+ *
+ * @update jevering 6/06/98
+ * @param path is the directory structure indicating the bad context vector
+ * @param pURLRef is the associated URL
+ * @param filename to record mapping to if not already recorded
+ * @return TRUE if it is already record (dont rerecord)
+ */
+
+PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
+{
+ char recordPath[2048];
+ PRIntn oflags = 0;
+
+ // create the record file name from the verification director
+ // and the default name.
+ strcpy(recordPath,mVerificationDir);
+ strcat(recordPath,CONTEXT_VECTOR_MAP);
+
+ // create the file exists, only open for read/write
+ // otherwise, create it
+ if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
+ oflags = PR_CREATE_FILE;
+ oflags |= PR_RDWR;
+
+ // open the record file
+ PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
+
+ if (recordFile) {
+
+ char * string = (char *)PR_Malloc(2048);
+ PRBool found = PR_FALSE;
+
+ // vectors are stored on the format iof "URL vector filename"
+ // where the vector contains the verification path and
+ // the filename contains the debug source dump
+ sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
+
+ // get the file size, read in the file and parse it line at
+ // a time to check to see if we have already recorded this
+ // occurance
+
+ PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
+ if (iSize) {
+
+ char * buffer = (char*)PR_Malloc(iSize);
+ char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
+ if (buffer!=NULL && string!=NULL) {
+ PRInt32 ibufferpos, istringpos;
+
+ // beginning of file for read
+ PR_Seek(recordFile,0,PR_SEEK_SET);
+ PR_Read(recordFile,buffer,iSize);
+
+ // run through the file looking for a matching vector
+ for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
+ {
+ // compare string once we have hit the end of the line
+ if (buffer[ibufferpos] == '\r') {
+ stringbuf[istringpos] = '\0';
+ istringpos = 0;
+ // skip newline and space
+ ibufferpos++;
+
+ if (PL_strlen(stringbuf)) {
+ char * space;
+ // chop of the filename for compare
+ if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
+ *space = '\0';
+
+ // we have already recorded this one, free up, and return
+ if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
+ PR_Free(buffer);
+ PR_Free(stringbuf);
+ PR_Free(string);
+ return PR_TRUE;
+ }
+ }
+ }
+
+ // build up the compare string
+ else
+ stringbuf[istringpos++] = buffer[ibufferpos];
+ }
+
+ // throw away the record file data
+ PR_Free(buffer);
+ PR_Free(stringbuf);
+ }
+ }
+
+ // if this bad vector was not recorded, add it to record file
+
+ if (!found) {
+ PR_Seek(recordFile,0,PR_SEEK_END);
+ PR_Write(recordFile,string,PL_strlen(string));
+ }
+
+ PR_Close(recordFile);
+ PR_Free(string);
+ }
+
+ // vector was not recorded
+ return PR_FALSE;
+}
+
+/**
+ * compare function for quick sort. Compares references and
+ * sorts in decending order
+ */
+
+static int compare( const void *arg1, const void *arg2 )
+{
+ VectorInfo ** p1 = (VectorInfo**)arg1;
+ VectorInfo ** p2 = (VectorInfo**)arg2;
+ return (*p2)->references - (*p1)->references;
+}
+
+/**
+ * This debug routines stores statistical information about a
+ * context vector. The context vector statistics are stored in
+ * a global array. The table is resorted each time it grows to
+ * aid in lookup speed. If a vector has already been noted, its
+ * reference count is bumped, otherwise it is added to the table
+ *
+ * @update jevering 6/11/98
+ * @param aTags is the tag list (vector)
+ * @param count is the size of the vector
+ * @return
+ */
+
+void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
+{
+ // if the table doesn't exist, create it
+ if (!mVectorInfoArray) {
+ mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
+ }
+ else {
+ // attempt to look up the vector
+ for (PRInt32 i = 0; i < mVectorCount; i++)
+
+ // check the vector only if they are the same size, if they
+ // match then just return without doing further work
+ if (mVectorInfoArray[i]->count == count)
+ if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
+
+ // bzzzt. and we have a winner.. bump the ref count
+ mVectorInfoArray[i]->references++;
+ return;
+ }
+ }
+
+ // the context vector hasn't been noted, so allocate it and
+ // initialize it one.. add it to the table
+ VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
+ pVectorInfo->references = 1;
+ pVectorInfo->count = count;
+ pVectorInfo->good_vector = good_vector;
+ pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
+ memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
+ mVectorInfoArray[mVectorCount++] = pVectorInfo;
+
+ // have we maxed out the table? grow it.. sort it.. love it.
+ if ((mVectorCount % TABLE_SIZE) == 0) {
+ mVectorInfoArray = (VectorInfo**)realloc(
+ mVectorInfoArray,
+ (sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
+ if (mVectorCount) {
+ qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+ }
+ }
+}
+
+void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
+{
+ sprintf (vector_string, "%6d ", pInfo->references);
+ for (PRInt32 j = 0; j < pInfo->count; j++) {
+ PL_strcat(vector_string, "<");
+ PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
+ PL_strcat(vector_string, ">");
+ }
+ PL_strcat(vector_string,"\r\n");
+}
+
+/**
+ * This debug routine dumps out the vector statistics to a text
+ * file in the verification directory and defaults to the name
+ * "vector.stat". It contains all parsed context vectors and there
+ * occurance count sorted in decending order.
+ *
+ * @update jevering 6/11/98
+ * @param
+ * @return
+ */
+
+void CParserDebug::DumpVectorRecord(void)
+{
+ // do we have a table?
+ if (mVectorCount) {
+
+ // hopefully, they wont exceed 1K.
+ char vector_string[1024];
+ char path[1024];
+
+ path[0] = '\0';
+
+ // put in the verification directory.. else the root
+ if (mVerificationDir)
+ strcpy(path,mVerificationDir);
+
+ strcat(path,CONTEXT_VECTOR_STAT);
+
+ // open the stat file creaming any existing stat file
+ PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
+ if (statisticFile) {
+
+ PRInt32 i;
+ PRofstream ps;
+ ps.attach(statisticFile);
+
+ // oh what the heck, sort it again
+ if (mVectorCount) {
+ qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+ }
+
+ // cute little header
+ sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
+ ps << vector_string;
+
+ ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
+ ps << VECTOR_TABLE_HEADER;
+
+ // dump out the bad vectors encountered
+ for (i = 0; i < mVectorCount; i++) {
+ if (!mVectorInfoArray[i]->good_vector) {
+ MakeVectorString(vector_string, mVectorInfoArray[i]);
+ ps << vector_string;
+ }
+ }
+
+ ps << "\r\n\r\nValid context vector summary\r\n";
+ ps << VECTOR_TABLE_HEADER;
+
+ // take a big vector table dump (good vectors)
+ for (i = 0; i < mVectorCount; i++) {
+ if (mVectorInfoArray[i]->good_vector) {
+ MakeVectorString(vector_string, mVectorInfoArray[i]);
+ ps << vector_string;
+ }
+ // free em up. they mean nothing to me now (I'm such a user)
+
+ if (mVectorInfoArray[i]->vector)
+ PR_Free(mVectorInfoArray[i]->vector);
+ PR_Free(mVectorInfoArray[i]);
+ }
+ }
+
+ // ok, we are done with the table, free it up as well
+ PR_Free(mVectorInfoArray);
+ mVectorInfoArray = 0;
+ mVectorCount = 0;
+ PR_Close(statisticFile);
+ }
+}
+
+
+/**
+ * This debug method allows us to determine whether or not
+ * we've seen (and can handle) the given context vector.
+ *
+ * @update gess4/22/98
+ * @param tags is an array of eHTMLTags
+ * @param count represents the number of items in the tags array
+ * @param aDTD is the DTD we plan to ask for verification
+ * @return TRUE if we know how to handle it, else false
+ */
+
+PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef)
+{
+ PRBool result=PR_TRUE;
+
+ //ok, now see if we understand this vector
+
+ if(0!=mVerificationDir || mRecordingStatistics) {
+
+ if(aDTD && aContextStackPos>1) {
+ for (int i = 0; i < aContextStackPos-1; i++)
+ if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
+ result = PR_FALSE;
+ break;
+ }
+ }
+ }
+
+ if (mRecordingStatistics) {
+ NoteVector(aContextStack,aContextStackPos,result);
+ }
+
+ if(0!=mVerificationDir) {
+ char path[2048];
+ strcpy(path,mVerificationDir);
+
+ int i=0;
+ for(i=0;iDebugDumpSource(ps);
+ PR_Close(debugFile);
+ }
+ }
+ }
+ }
+
+ return result;
+}
diff --git a/mozilla/htmlparser/src/nsTokenizer.cpp b/mozilla/htmlparser/src/nsTokenizer.cpp
new file mode 100644
index 00000000000..dacfe58838c
--- /dev/null
+++ b/mozilla/htmlparser/src/nsTokenizer.cpp
@@ -0,0 +1,327 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+
+#include
+#include "nsTokenizer.h"
+#include "nsToken.h"
+#include "nsScanner.h"
+#include "nsIParserFilter.h"
+#include "nsIURL.h"
+
+static void TokenFreeProc(void * pToken)
+{
+ if (pToken!=NULL) {
+ CToken * pCToken = (CToken*)pToken;
+ delete pCToken;
+ }
+}
+
+/**
+ * Default constructor
+ *
+ * @update gess 3/25/98
+ * @param aFilename -- name of file to be tokenized
+ * @param aDelegate -- ref to delegate to be used to tokenize
+ * @return
+ */
+CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
+ mTokenDeque(PR_TRUE,TokenFreeProc) {
+ mParserFilter = aIFilter;
+ mDelegate=aDelegate;
+ mScanner=new CScanner(aURL,aMode);
+ mParseMode=aMode;
+}
+
+/**
+ * Default constructor
+ *
+ * @update gess 3/25/98
+ * @param aFilename -- name of file to be tokenized
+ * @param aDelegate -- ref to delegate to be used to tokenize
+ * @return
+ */
+CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
+ mTokenDeque(PR_TRUE,TokenFreeProc) {
+ mParserFilter = aIFilter;
+ mDelegate=aDelegate;
+ mScanner=new CScanner(aFilename,aMode);
+ mParseMode=aMode;
+}
+
+/**
+ * Default constructor
+ *
+ * @update gess 3/25/98
+ * @param aFilename -- name of file to be tokenized
+ * @param aDelegate -- ref to delegate to be used to tokenize
+ * @return
+ */
+CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
+ mTokenDeque(PR_TRUE,TokenFreeProc) {
+ mParserFilter = aIFilter;
+ mDelegate=aDelegate;
+ mScanner=new CScanner(aMode);
+ mParseMode=aMode;
+}
+
+/**
+ * default destructor
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+CTokenizer::~CTokenizer() {
+ delete mScanner;
+ mDelegate->Destroy();
+ mScanner=0;
+}
+
+
+/**
+ *
+ *
+ * @update gess 5/13/98
+ * @param
+ * @return
+ */
+PRBool CTokenizer::Append(nsString& aBuffer) {
+ if(mScanner)
+ return mScanner->Append(aBuffer);
+ return PR_FALSE;
+}
+
+
+/**
+ *
+ *
+ * @update gess 5/21/98
+ * @param
+ * @return
+ */
+PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
+ if(mScanner)
+ return mScanner->Append(aBuffer,aLen);
+ return PR_FALSE;
+}
+
+/**
+ * Retrieve a reference to the internal token deque.
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+nsDeque& CTokenizer::GetDeque(void) {
+ return mTokenDeque;
+}
+
+/**
+ * Cause the tokenizer to consume the next token, and
+ * return an error result.
+ *
+ * @update gess 3/25/98
+ * @param anError -- ref to error code
+ * @return new token or null
+ */
+PRInt32 CTokenizer::GetToken(CToken*& aToken) {
+ PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
+ return result;
+}
+
+/**
+ * Retrieve the number of elements in the deque
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return int containing element count
+ */
+PRInt32 CTokenizer::GetSize(void) {
+ return mTokenDeque.GetSize();
+}
+
+
+/**
+ * Part of the code sandwich, this gets called right before
+ * the tokenization process begins. The main reason for
+ * this call is to allow the delegate to do initialization.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return TRUE if it's ok to proceed
+ */
+PRBool CTokenizer::WillTokenize(PRBool aIncremental){
+ PRBool result=PR_TRUE;
+ result=mDelegate->WillTokenize(aIncremental);
+ return result;
+}
+
+/**
+ *
+ * @update gess 3/25/98
+ * @return TRUE if it's ok to proceed
+ */
+PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
+ CToken* theToken=0;
+ PRInt32 result=kNoError;
+
+ WillTokenize(PR_TRUE);
+
+ while(kNoError==result) {
+ result=GetToken(theToken);
+ if(theToken && (kNoError==result)) {
+
+#ifdef VERBOSE_DEBUG
+ theToken->DebugDumpToken(cout);
+#endif
+
+ PRBool bWillAdd = PR_TRUE;
+ if (mParserFilter)
+ bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
+ if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
+ mTokenDeque.Push(theToken);
+ }
+ }
+ else if (theToken)
+ delete theToken;
+ }
+ if(kEOF==result)
+ result=kNoError;
+ DidTokenize(PR_TRUE);
+ return result;
+}
+
+/**
+ * This is the primary control routine. It iteratively
+ * consumes tokens until an error occurs or you run out
+ * of data.
+ *
+ * @update gess 3/25/98
+ * @return error code
+ */
+PRInt32 CTokenizer::Tokenize(int anIteration) {
+ CToken* theToken=0;
+ PRInt32 result=kNoError;
+ PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
+
+
+ while((PR_FALSE==done) && (kNoError==result)) {
+ mScanner->Mark();
+ result=GetToken(theToken);
+ if(kNoError==result) {
+ if(theToken) {
+
+ #ifdef VERBOSE_DEBUG
+ theToken->DebugDumpToken(cout);
+ #endif
+
+ PRBool bWillAdd = PR_TRUE;
+ if (mParserFilter)
+ bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
+ if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
+ mTokenDeque.Push(theToken);
+ }
+ }
+
+ }
+ else {
+ if(theToken)
+ delete theToken;
+ mScanner->RewindToMark();
+ }
+ }
+ if((PR_TRUE==done) && (kInterrupted!=result))
+ DidTokenize(PR_TRUE);
+ return result;
+}
+
+/**
+ * This is the tail-end of the code sandwich for the
+ * tokenization process. It gets called once tokenziation
+ * has completed.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return TRUE if all went well
+ */
+PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
+ PRBool result=mDelegate->DidTokenize(aIncremental);
+
+#ifdef VERBOSE_DEBUG
+ DebugDumpTokens(cout);
+#endif
+
+ return result;
+}
+
+/**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+void CTokenizer::DebugDumpTokens(ostream& out) {
+ nsDequeIterator b=mTokenDeque.Begin();
+ nsDequeIterator e=mTokenDeque.End();
+
+ CToken* theToken;
+ while(b!=e) {
+ theToken=(CToken*)(b++);
+ theToken->DebugDumpToken(out);
+ }
+}
+
+
+/**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+void CTokenizer::DebugDumpSource(ostream& out) {
+ nsDequeIterator b=mTokenDeque.Begin();
+ nsDequeIterator e=mTokenDeque.End();
+
+ CToken* theToken;
+ while(b!=e) {
+ theToken=(CToken*)(b++);
+ theToken->DebugDumpSource(out);
+ }
+
+}
+
+
+/**
+ *
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+void CTokenizer::SelfTest(void) {
+#ifdef _DEBUG
+#endif
+}
+
+
diff --git a/mozilla/htmlparser/src/nsTokenizer.h b/mozilla/htmlparser/src/nsTokenizer.h
new file mode 100644
index 00000000000..7d54555ef6f
--- /dev/null
+++ b/mozilla/htmlparser/src/nsTokenizer.h
@@ -0,0 +1,185 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * LAST MODS: gess 28Feb98
+ *
+ * This file declares the basic tokenizer class. The
+ * central theme of this class is to control and
+ * coordinate a tokenization process. Note that this
+ * class is grammer-neutral: this class doesn't care
+ * at all what the underlying stream consists of.
+ *
+ * The main purpose of this class is to iterate over an
+ * input stream with the help of a given scanner and a
+ * given type-specific tokenizer-Delegate.
+ *
+ * The primary method here is the tokenize() method, which
+ * simple loops calling getToken() until an EOF condition
+ * (or some other error) occurs.
+ *
+ */
+
+
+#ifndef TOKENIZER
+#define TOKENIZER
+
+#include "nsToken.h"
+#include "nsITokenizerDelegate.h"
+#include "nsDeque.h"
+#include
+
+class CScanner;
+class nsIURL;
+class nsIParserFilter;
+
+class CTokenizer {
+ public:
+
+ CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
+ CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
+ CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
+
+ ~CTokenizer();
+
+ /**
+ * This method incrementally tokenizes as much content as
+ * it can get its hands on.
+ *
+ * @update gess 3/25/98
+ * @return TRUE if it's ok to proceed
+ */
+ PRInt32 Tokenize(int anIteration); //your friendly incremental version
+
+ /**
+ *
+ * @update gess 3/25/98
+ * @return TRUE if it's ok to proceed
+ */
+ PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE);
+
+ /**
+ * Cause the tokenizer to consume the next token, and
+ * return an error result.
+ *
+ * @update gess 3/25/98
+ * @param anError -- ref to error code
+ * @return new token or null
+ */
+ PRInt32 GetToken(CToken*& aToken);
+
+ /**
+ * Retrieve the number of elements in the deque
+ *
+ * @update gess 3/25/98
+ * @return int containing element count
+ */
+ PRInt32 GetSize(void);
+
+ /**
+ * Retrieve a reference to the internal token deque.
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+ nsDeque& GetDeque(void);
+
+ /**
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+ PRBool Append(nsString& aBuffer);
+
+ /**
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+ PRBool Append(const char* aBuffer, PRInt32 aLen);
+
+
+ /**
+ *
+ *
+ * @update gess 5/13/98
+ * @param
+ * @return
+ */
+ PRBool SetBuffer(nsString& aBuffer);
+
+ /**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+ void DebugDumpSource(ostream& out);
+
+ /**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+ void DebugDumpTokens(ostream& out);
+
+ static void SelfTest();
+
+ protected:
+
+ /**
+ * This is the front-end of the code sandwich for the
+ * tokenization process. It gets called once just before
+ * tokenziation begins.
+ *
+ * @update gess 3/25/98
+ * @param aIncremental tells us if tokenization is incremental
+ * @return TRUE if all went well
+ */
+ PRBool WillTokenize(PRBool aIncremental);
+
+
+ /**
+ * This is the tail-end of the code sandwich for the
+ * tokenization process. It gets called once tokenziation
+ * has completed.
+ *
+ * @update gess 3/25/98
+ * @param aIncremental tells us if tokenization was incremental
+ * @return TRUE if all went well
+ */
+ PRBool DidTokenize(PRBool aIncremental);
+
+ ITokenizerDelegate* mDelegate;
+ CScanner* mScanner;
+ nsDeque mTokenDeque;
+ eParseMode mParseMode;
+ nsIParserFilter* mParserFilter;
+};
+
+#endif
+
+
diff --git a/mozilla/parser/htmlparser/src/CNavDTD.cpp b/mozilla/parser/htmlparser/src/CNavDTD.cpp
index ea0844b5856..4547755d1fb 100644
--- a/mozilla/parser/htmlparser/src/CNavDTD.cpp
+++ b/mozilla/parser/htmlparser/src/CNavDTD.cpp
@@ -31,6 +31,7 @@
*
*/
+#include "nsIParserDebug.h"
#include "CNavDTD.h"
#include "nsHTMLTokens.h"
#include "nsCRT.h"
@@ -43,13 +44,10 @@
#include "prtypes.h" //this is here for debug reasons...
#include "prio.h"
#include "plstr.h"
-#include "prstrm.h"
-#include
#ifdef XP_PC
#include //this is here for debug reasons...
#endif
-#include
#include "prmem.h"
@@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
static const char* kNullToken = "Error: Null token given";
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
-static char* gVerificationOutputDir=0;
-static char* gURLRef=0;
static nsAutoString gEmpty;
static char formElementTags[]= {
@@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
* @return
*/
CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
+ NS_INIT_REFCNT();
mParser=0;
+ mURLRef=0;
+ mParserDebug=0;
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
nsCRT::zero(mContextStack,sizeof(mContextStack));
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
mContextStackPos=0;
mStyleStackPos=0;
- gURLRef = 0;
mHasOpenForm=PR_FALSE;
mHasOpenMap=PR_FALSE;
- gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
InitializeDefaultTokenHandlers();
}
@@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
*/
CNavDTD::~CNavDTD(){
DeleteTokenHandlers();
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
+ if (mURLRef)
+ PL_strfree(mURLRef);
+ if (mParserDebug)
+ NS_RELEASE(mParserDebug);
// NS_RELEASE(mSink);
}
@@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){
if(aHandler) {
result=(*aHandler)(theToken,this);
- Verify("xxx",PR_TRUE);
+ if (mParserDebug)
+ mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
}
}//if
@@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
* @param aChild -- tag enum of child container
* @return PR_TRUE if parent can contain child
*/
-PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
PRBool result=PR_FALSE;
@@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
//handle form elements (this is very much a WIP!!!)
if(0!=strchr(formElementTags,aChild)){
- return CanContainFormElement(aParent,aChild);
+ return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
}
- switch(aParent) {
+ switch((eHTMLTags)aParent) {
case eHTMLTag_a:
case eHTMLTag_acronym:
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
* @param aChild -- tag type of child
* @return TRUE if propagation closes; false otherwise
*/
-PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
PRBool result=PR_FALSE;
switch(aParentTag) {
@@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
//otherwise, intentionally fall through...
case eHTMLTag_tr:
- if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
+ if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
aVector.Append((PRUnichar)eHTMLTag_td);
result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
// result=PR_TRUE;
@@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
return;
}
-
-/************************************************************************
- Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/**
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update jevering 6/06/98
- * @param path is the directory structure indicating the bad context vector
- * @param pURLRef is the associated URL
- * @param filename to record mapping to if not already recorded
- * @return TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP "/vector.map"
-#define CONTEXT_VECTOR_STAT "/vector.stat"
-#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
-{
- char recordPath[2048];
- PRIntn oflags = 0;
-
- // create the record file name from the verification director
- // and the default name.
- strcpy(recordPath,gVerificationOutputDir);
- strcat(recordPath,CONTEXT_VECTOR_MAP);
-
- // create the file exists, only open for read/write
- // otherwise, create it
- if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
- oflags = PR_CREATE_FILE;
- oflags |= PR_RDWR;
-
- // open the record file
- PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
- if (recordFile) {
-
- char * string = (char *)PR_Malloc(2048);
- PRBool found = PR_FALSE;
-
- // vectors are stored on the format iof "URL vector filename"
- // where the vector contains the verification path and
- // the filename contains the debug source dump
- sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
- // get the file size, read in the file and parse it line at
- // a time to check to see if we have already recorded this
- // occurance
-
- PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
- if (iSize) {
-
- char * buffer = (char*)PR_Malloc(iSize);
- char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
- if (buffer!=NULL && string!=NULL) {
- PRInt32 ibufferpos, istringpos;
-
- // beginning of file for read
- PR_Seek(recordFile,0,PR_SEEK_SET);
- PR_Read(recordFile,buffer,iSize);
-
- // run through the file looking for a matching vector
- for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
- {
- // compare string once we have hit the end of the line
- if (buffer[ibufferpos] == '\r') {
- stringbuf[istringpos] = '\0';
- istringpos = 0;
- // skip newline and space
- ibufferpos++;
-
- if (PL_strlen(stringbuf)) {
- char * space;
- // chop of the filename for compare
- if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
- *space = '\0';
-
- // we have already recorded this one, free up, and return
- if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
- PR_Free(buffer);
- PR_Free(stringbuf);
- PR_Free(string);
- return PR_TRUE;
- }
- }
- }
-
- // build up the compare string
- else
- stringbuf[istringpos++] = buffer[ibufferpos];
- }
-
- // throw away the record file data
- PR_Free(buffer);
- PR_Free(stringbuf);
- }
- }
-
- // if this bad vector was not recorded, add it to record file
-
- if (!found) {
- PR_Seek(recordFile,0,PR_SEEK_END);
- PR_Write(recordFile,string,PL_strlen(string));
- }
-
- PR_Close(recordFile);
- PR_Free(string);
+void CNavDTD::SetURLRef(char * aURLRef){
+ if (mURLRef) {
+ PL_strfree(mURLRef);
+ mURLRef=0;
}
-
- // vector was not recorded
- return PR_FALSE;
+ if (aURLRef)
+ mURLRef = PL_strdup(aURLRef);
}
-// structure to store the vector statistic information
-
-typedef struct vector_info {
- PRInt32 references; // number of occurances counted
- PRInt32 count; // number of tags in the vector
- PRBool good_vector; // is this a valid vector?
- eHTMLTags* vector; // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE 128
-
-// compare function for quick sort. Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
+void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
{
- VectorInfo ** p1 = (VectorInfo**)arg1;
- VectorInfo ** p2 = (VectorInfo**)arg2;
- return (*p2)->references - (*p1)->references;
-}
-
-
-/**
- * This debug routines stores statistical information about a
- * context vector. The context vector statistics are stored in
- * a global array. The table is resorted each time it grows to
- * aid in lookup speed. If a vector has already been noted, its
- * reference count is bumped, otherwise it is added to the table
- *
- * @update jevering 6/11/98
- * @param aTags is the tag list (vector)
- * @param count is the size of the vector
- * @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
- // if the table doesn't exist, create it
- if (!gVectorInfoArray) {
- gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
- }
- else {
- // attempt to look up the vector
- for (PRInt32 i = 0; i < gVectorCount; i++)
-
- // check the vector only if they are the same size, if they
- // match then just return without doing further work
- if (gVectorInfoArray[i]->count == count)
- if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
- // bzzzt. and we have a winner.. bump the ref count
- gVectorInfoArray[i]->references++;
- return;
- }
- }
-
- // the context vector hasn't been noted, so allocate it and
- // initialize it one.. add it to the table
- VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
- pVectorInfo->references = 1;
- pVectorInfo->count = count;
- pVectorInfo->good_vector = good_vector;
- pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
- memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
- gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
- // have we maxed out the table? grow it.. sort it.. love it.
- if ((gVectorCount % TABLE_SIZE) == 0) {
- gVectorInfoArray = (VectorInfo**)realloc(
- gVectorInfoArray,
- (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
- if (gVectorCount) {
- qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
- }
- }
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
- sprintf (vector_string, "%6d ", pInfo->references);
- for (PRInt32 j = 0; j < pInfo->count; j++) {
- PL_strcat(vector_string, "<");
- PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
- PL_strcat(vector_string, ">");
- }
- PL_strcat(vector_string,"\r\n");
-}
-
-/**
- * This debug routine dumps out the vector statistics to a text
- * file in the verification directory and defaults to the name
- * "vector.stat". It contains all parsed context vectors and there
- * occurance count sorted in decending order.
- *
- * @update jevering 6/11/98
- * @param
- * @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord(void)
-{
- // do we have a table?
- if (gVectorCount) {
-
- // hopefully, they wont exceed 1K.
- char vector_string[1024];
- char path[1024];
-
- path[0] = '\0';
-
- // put in the verification directory.. else the root
- if (gVerificationOutputDir)
- strcpy(path,gVerificationOutputDir);
-
- strcat(path,CONTEXT_VECTOR_STAT);
-
- // open the stat file creaming any existing stat file
- PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
- if (statisticFile) {
-
- PRInt32 i;
- PRofstream ps;
- ps.attach(statisticFile);
-
- // oh what the heck, sort it again
- if (gVectorCount) {
- qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
- }
-
- // cute little header
- sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
- ps << vector_string;
-
- ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // dump out the bad vectors encountered
- for (i = 0; i < gVectorCount; i++) {
- if (!gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- }
-
- ps << "\r\n\r\nValid context vector summary\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // take a big vector table dump (good vectors)
- for (i = 0; i < gVectorCount; i++) {
- if (gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- // free em up. they mean nothing to me now (I'm such a user)
-
- if (gVectorInfoArray[i]->vector)
- PR_Free(gVectorInfoArray[i]->vector);
- PR_Free(gVectorInfoArray[i]);
- }
- }
-
- // ok, we are done with the table, free it up as well
- PR_Free(gVectorInfoArray);
- gVectorInfoArray = 0;
- gVectorCount = 0;
- PR_Close(statisticFile);
+ if (aParserDebug) {
+ mParserDebug = aParserDebug;
+ NS_ADDREF(mParserDebug);
}
}
-
-
-/**
- * This debug method allows us to determine whether or not
- * we've seen (and can handle) the given context vector.
- *
- * @update gess4/22/98
- * @param tags is an array of eHTMLTags
- * @param count represents the number of items in the tags array
- * @param aDTD is the DTD we plan to ask for verification
- * @return TRUE if we know how to handle it, else false
- */
-PRBool CNavDTD::VerifyContextVector(void) const {
-
- PRBool result=PR_TRUE;
-
- if(0!=gVerificationOutputDir) {
-
-#ifdef XP_PC
- char path[_MAX_PATH+1];
- strcpy(path,gVerificationOutputDir);
-#endif
-
- int i=0;
- for(i=0;iDebugDumpSource(ps);
- PR_Close(debugFile);
- }
- }
- }
- }
-
- return result;
-}
diff --git a/mozilla/parser/htmlparser/src/CNavDTD.h b/mozilla/parser/htmlparser/src/CNavDTD.h
index 9883a4483e6..cdaa93d9dd9 100644
--- a/mozilla/parser/htmlparser/src/CNavDTD.h
+++ b/mozilla/parser/htmlparser/src/CNavDTD.h
@@ -42,6 +42,7 @@
class nsHTMLParser;
class nsIHTMLContentSink;
+class nsIParserDebug;
class CNavDTD : public nsIDTD {
@@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
* of one type can contain a tag of another type.
*
* @update gess 3/25/98
- * @param aParent -- tag enum of parent container
- * @param aChild -- tag enum of child container
+ * @param aParent -- int tag of parent container
+ * @param aChild -- int tag of child container
* @return PR_TRUE if parent can contain child
*/
- virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+ virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
/**
* This method is called to determine whether or not a tag
@@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
*/
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
-
/**
- * This method gets called at various times by the parser
- * whenever we want to verify a valid context stack. This
- * method also gives us a hook to add debugging metrics.
- *
- * @update gess4/6/98
- * @param aStack[] array of ints (tokens)
- * @param aCount number of elements in given array
- * @return TRUE if stack is valid, else FALSE
+ *
+ * @update jevering 6/18/98
+ * @param aURLRef if the current URL reference (for debugger)
+ * @return
*/
- virtual PRBool VerifyContextVector(void) const;
+ virtual void SetURLRef(char * aURLRef);
/**
*
- * @update gess5/18/98
- * @param
+ * @update jevering 6/18/98
+ * @param aParserDebug created debug parser object
* @return
*/
- virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+ virtual void SetParserDebug(nsIParserDebug * aParserDebug);
/**
* This method tries to design a context map (without actually
@@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
- virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+ virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
/**
* This method tries to design a context map (without actually
@@ -699,7 +695,8 @@ protected:
PRBool mHasOpenForm;
PRBool mHasOpenMap;
nsDeque mTokenDeque;
-
+ char* mURLRef;
+ nsIParserDebug* mParserDebug;
};
diff --git a/mozilla/parser/htmlparser/src/COtherDTD.cpp b/mozilla/parser/htmlparser/src/COtherDTD.cpp
index 47ecc728678..ccbf97c725a 100644
--- a/mozilla/parser/htmlparser/src/COtherDTD.cpp
+++ b/mozilla/parser/htmlparser/src/COtherDTD.cpp
@@ -31,6 +31,7 @@
*
*/
+#include "nsIParserDebug.h"
#include "COtherDTD.h"
#include "nsHTMLTokens.h"
#include "nsCRT.h"
@@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
static const char* kNullToken = "Error: Null token given";
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
-static char* gVerificationOutputDir=0;
-static char* gURLRef=0;
static nsAutoString gEmpty;
static char formElementTags[]= {
@@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
* @return
*/
COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
+ NS_INIT_REFCNT();
mParser=0;
+ mURLRef=0;
+ mParserDebug=0;
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
nsCRT::zero(mContextStack,sizeof(mContextStack));
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
mContextStackPos=0;
mStyleStackPos=0;
- gURLRef = 0;
mHasOpenForm=PR_FALSE;
mHasOpenMap=PR_FALSE;
- gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
InitializeDefaultTokenHandlers();
}
@@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
*/
COtherDTD::~COtherDTD(){
DeleteTokenHandlers();
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
+ if (mURLRef)
+ PL_strfree(mURLRef);
+ if (mParserDebug)
+ NS_RELEASE(mParserDebug);
// NS_RELEASE(mSink);
}
@@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){
if(aHandler) {
result=(*aHandler)(theToken,this);
- Verify("xxx",PR_TRUE);
+ if (mParserDebug)
+ mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
}
}//if
@@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
* @param aChild -- tag enum of child container
* @return PR_TRUE if parent can contain child
*/
-PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
+PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
PRBool result=PR_FALSE;
@@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
//handle form elements (this is very much a WIP!!!)
if(0!=strchr(formElementTags,aChild)){
- return CanContainFormElement(aParent,aChild);
+ return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
}
- switch(aParent) {
+ switch((eHTMLTags)aParent) {
case eHTMLTag_a:
case eHTMLTag_acronym:
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
@@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
* @param aChild -- tag type of child
* @return TRUE if propagation closes; false otherwise
*/
-PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
+PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
PRBool result=PR_FALSE;
switch(aParentTag) {
@@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
return;
}
-
-/************************************************************************
- Here's a bunch of stuff JEvering put into the parser to do debugging.
- ************************************************************************/
-
-/**
- * This debug method records an invalid context vector and it's
- * associated context vector and URL in a simple flat file mapping which
- * resides in the verification directory and is named context.map
- *
- * @update jevering 6/06/98
- * @param path is the directory structure indicating the bad context vector
- * @param pURLRef is the associated URL
- * @param filename to record mapping to if not already recorded
- * @return TRUE if it is already record (dont rerecord)
- */
-
-#define CONTEXT_VECTOR_MAP "/vector.map"
-#define CONTEXT_VECTOR_STAT "/vector.stat"
-#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
-static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
-{
- char recordPath[2048];
- PRIntn oflags = 0;
-
- // create the record file name from the verification director
- // and the default name.
- strcpy(recordPath,gVerificationOutputDir);
- strcat(recordPath,CONTEXT_VECTOR_MAP);
-
- // create the file exists, only open for read/write
- // otherwise, create it
- if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
- oflags = PR_CREATE_FILE;
- oflags |= PR_RDWR;
-
- // open the record file
- PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
-
- if (recordFile) {
-
- char * string = (char *)PR_Malloc(2048);
- PRBool found = PR_FALSE;
-
- // vectors are stored on the format iof "URL vector filename"
- // where the vector contains the verification path and
- // the filename contains the debug source dump
- sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
-
- // get the file size, read in the file and parse it line at
- // a time to check to see if we have already recorded this
- // occurance
-
- PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
- if (iSize) {
-
- char * buffer = (char*)PR_Malloc(iSize);
- char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
- if (buffer!=NULL && string!=NULL) {
- PRInt32 ibufferpos, istringpos;
-
- // beginning of file for read
- PR_Seek(recordFile,0,PR_SEEK_SET);
- PR_Read(recordFile,buffer,iSize);
-
- // run through the file looking for a matching vector
- for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
- {
- // compare string once we have hit the end of the line
- if (buffer[ibufferpos] == '\r') {
- stringbuf[istringpos] = '\0';
- istringpos = 0;
- // skip newline and space
- ibufferpos++;
-
- if (PL_strlen(stringbuf)) {
- char * space;
- // chop of the filename for compare
- if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
- *space = '\0';
-
- // we have already recorded this one, free up, and return
- if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
- PR_Free(buffer);
- PR_Free(stringbuf);
- PR_Free(string);
- return PR_TRUE;
- }
- }
- }
-
- // build up the compare string
- else
- stringbuf[istringpos++] = buffer[ibufferpos];
- }
-
- // throw away the record file data
- PR_Free(buffer);
- PR_Free(stringbuf);
- }
- }
-
- // if this bad vector was not recorded, add it to record file
-
- if (!found) {
- PR_Seek(recordFile,0,PR_SEEK_END);
- PR_Write(recordFile,string,PL_strlen(string));
- }
-
- PR_Close(recordFile);
- PR_Free(string);
+void COtherDTD::SetURLRef(char * aURLRef){
+ if (mURLRef) {
+ PL_strfree(mURLRef);
+ mURLRef=0;
}
-
- // vector was not recorded
- return PR_FALSE;
+ if (aURLRef)
+ mURLRef = PL_strdup(aURLRef);
}
-// structure to store the vector statistic information
-
-typedef struct vector_info {
- PRInt32 references; // number of occurances counted
- PRInt32 count; // number of tags in the vector
- PRBool good_vector; // is this a valid vector?
- eHTMLTags* vector; // and the vector
-} VectorInfo;
-
-// global table for storing vector statistics and the size
-static VectorInfo ** gVectorInfoArray = 0;
-static PRInt32 gVectorCount = 0;
-
-// the statistic vector table grows each time it exceeds this
-// stepping value
-#define TABLE_SIZE 128
-
-// compare function for quick sort. Compares references and
-// sorts in decending order
-
-static int compare( const void *arg1, const void *arg2 )
+void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
{
- VectorInfo ** p1 = (VectorInfo**)arg1;
- VectorInfo ** p2 = (VectorInfo**)arg2;
- return (*p2)->references - (*p1)->references;
-}
-
-/**
- * quick sort the statistic array causing the most frequently
- * used vectors to be at the top (this makes it a little speedier
- * when looking them up)
- */
-static void SortVectorRecord(void) {
- // of course, sort it only if there is something to sort
- if (gVectorCount) {
- qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
- }
-}
-
-
-/**
- * This debug routines stores statistical information about a
- * context vector. The context vector statistics are stored in
- * a global array. The table is resorted each time it grows to
- * aid in lookup speed. If a vector has already been noted, its
- * reference count is bumped, otherwise it is added to the table
- *
- * @update jevering 6/11/98
- * @param aTags is the tag list (vector)
- * @param count is the size of the vector
- * @return
- */
-
-static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
-{
- // if the table doesn't exist, create it
- if (!gVectorInfoArray) {
- gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
- }
- else {
- // attempt to look up the vector
- for (PRInt32 i = 0; i < gVectorCount; i++)
-
- // check the vector only if they are the same size, if they
- // match then just return without doing further work
- if (gVectorInfoArray[i]->count == count)
- if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
-
- // bzzzt. and we have a winner.. bump the ref count
- gVectorInfoArray[i]->references++;
- return;
- }
- }
-
- // the context vector hasn't been noted, so allocate it and
- // initialize it one.. add it to the table
- VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
- pVectorInfo->references = 1;
- pVectorInfo->count = count;
- pVectorInfo->good_vector = good_vector;
- pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
- memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
- gVectorInfoArray[gVectorCount++] = pVectorInfo;
-
- // have we maxed out the table? grow it.. sort it.. love it.
- if ((gVectorCount % TABLE_SIZE) == 0) {
- gVectorInfoArray = (VectorInfo**)realloc(
- gVectorInfoArray,
- (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
- SortVectorRecord();
- }
-}
-
-static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
-{
- sprintf (vector_string, "%6d ", pInfo->references);
- for (PRInt32 j = 0; j < pInfo->count; j++) {
- PL_strcat(vector_string, "<");
- PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
- PL_strcat(vector_string, ">");
- }
- PL_strcat(vector_string,"\r\n");
-}
-
-/**
- * This debug routine dumps out the vector statistics to a text
- * file in the verification directory and defaults to the name
- * "vector.stat". It contains all parsed context vectors and there
- * occurance count sorted in decending order.
- *
- * @update jevering 6/11/98
- * @param
- * @return
- */
-
-extern "C" NS_EXPORT void DumpVectorRecord_other(void)
-{
- // do we have a table?
- if (gVectorCount) {
-
- // hopefully, they wont exceed 1K.
- char vector_string[1024];
- char path[1024];
-
- path[0] = '\0';
-
- // put in the verification directory.. else the root
- if (gVerificationOutputDir)
- strcpy(path,gVerificationOutputDir);
-
- strcat(path,CONTEXT_VECTOR_STAT);
-
- // open the stat file creaming any existing stat file
- PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
- if (statisticFile) {
-
- PRInt32 i;
- PRofstream ps;
- ps.attach(statisticFile);
-
- // oh what the heck, sort it again
- SortVectorRecord();
-
- // cute little header
- sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
- ps << vector_string;
-
- ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // dump out the bad vectors encountered
- for (i = 0; i < gVectorCount; i++) {
- if (!gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- }
-
- ps << "\r\n\r\nValid context vector summary\r\n";
- ps << VECTOR_TABLE_HEADER;
-
- // take a big vector table dump (good vectors)
- for (i = 0; i < gVectorCount; i++) {
- if (gVectorInfoArray[i]->good_vector) {
- MakeVectorString(vector_string, gVectorInfoArray[i]);
- ps << vector_string;
- }
- // free em up. they mean nothing to me now (I'm such a user)
-
- if (gVectorInfoArray[i]->vector)
- PR_Free(gVectorInfoArray[i]->vector);
- PR_Free(gVectorInfoArray[i]);
- }
- }
-
- // ok, we are done with the table, free it up as well
- PR_Free(gVectorInfoArray);
- gVectorInfoArray = 0;
- gVectorCount = 0;
- PR_Close(statisticFile);
+ if (aParserDebug) {
+ mParserDebug = aParserDebug;
+ NS_ADDREF(mParserDebug);
}
}
-
-
-/**
- * This debug method allows us to determine whether or not
- * we've seen (and can handle) the given context vector.
- *
- * @update gess4/22/98
- * @param tags is an array of eHTMLTags
- * @param count represents the number of items in the tags array
- * @param aDTD is the DTD we plan to ask for verification
- * @return TRUE if we know how to handle it, else false
- */
-PRBool COtherDTD::VerifyContextVector(void) const {
-
- PRBool result=PR_TRUE;
-
- if(0!=gVerificationOutputDir) {
-
-#ifdef XP_PC
- char path[_MAX_PATH+1];
- strcpy(path,gVerificationOutputDir);
-#endif
-
- int i=0;
- for(i=0;iDebugDumpSource(ps);
- PR_Close(debugFile);
- }
- }
- }
- }
-
- return result;
-}
-
diff --git a/mozilla/parser/htmlparser/src/COtherDTD.h b/mozilla/parser/htmlparser/src/COtherDTD.h
index 21d2346eafd..7a74866cf13 100644
--- a/mozilla/parser/htmlparser/src/COtherDTD.h
+++ b/mozilla/parser/htmlparser/src/COtherDTD.h
@@ -34,7 +34,6 @@
#include "nsDeque.h"
-
#define NS_IOtherHTML_DTD_IID \
{0x8a5e89c0, 0xd16d, 0x11d1, \
{0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}}
@@ -42,6 +41,7 @@
class nsIParser;
class nsIHTMLContentSink;
+class nsIParserDebug;
class COtherDTD : public nsIDTD {
@@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
* of one type can contain a tag of another type.
*
* @update gess 3/25/98
- * @param aParent -- tag enum of parent container
- * @param aChild -- tag enum of child container
+ * @param aParent -- int tag of parent container
+ * @param aChild -- int tag of child container
* @return PR_TRUE if parent can contain child
*/
- virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
+ virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
/**
* This method is called to determine whether or not a tag
@@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
*/
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
-
/**
- * This method gets called at various times by the parser
- * whenever we want to verify a valid context stack. This
- * method also gives us a hook to add debugging metrics.
- *
- * @update gess4/6/98
- * @param aStack[] array of ints (tokens)
- * @param aCount number of elements in given array
- * @return TRUE if stack is valid, else FALSE
+ *
+ * @update jevering 6/18/98
+ * @param aURLRef if the current URL reference (for debugger)
+ * @return
*/
- virtual PRBool VerifyContextVector(void) const;
+ virtual void SetURLRef(char * aURLRef);
/**
*
- * @update gess5/18/98
- * @param
+ * @update jevering 6/18/98
+ * @param aParserDebug created debug parser object
* @return
*/
- virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
+ virtual void SetParserDebug(nsIParserDebug * aParserDebug);
/**
* This method tries to design a context map (without actually
@@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
* @param aChild -- tag type of child
* @return True if closure was achieved -- other false
*/
- virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
+ virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
/**
* This method tries to design a context map (without actually
@@ -701,7 +696,8 @@ protected:
PRBool mHasOpenForm;
PRBool mHasOpenMap;
nsDeque mTokenDeque;
-
+ char* mURLRef;
+ nsIParserDebug* mParserDebug;
};
diff --git a/mozilla/parser/htmlparser/src/Makefile b/mozilla/parser/htmlparser/src/Makefile
index 37a8ca552ce..243a5c4aa98 100644
--- a/mozilla/parser/htmlparser/src/Makefile
+++ b/mozilla/parser/htmlparser/src/Makefile
@@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
CPPSRCS = \
nsHTMLContentSink.cpp \
nsParserNode.cpp \
+ nsParserDebug.cpp \
nsScanner.cpp \
nsToken.cpp \
nsTokenHandler.cpp \
@@ -41,6 +42,8 @@ EXPORTS = \
nsHTMLTokens.h \
nsIParserNode.h \
nsIParser.h \
+ nsIParserDebug.h \
+ nsIParserFilter.h \
nsToken.h \
$(NULL)
diff --git a/mozilla/parser/htmlparser/src/makefile.win b/mozilla/parser/htmlparser/src/makefile.win
index 6f479747931..940db9e04f2 100644
--- a/mozilla/parser/htmlparser/src/makefile.win
+++ b/mozilla/parser/htmlparser/src/makefile.win
@@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
nsHTMLParser.cpp prstrm.cpp
EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
- nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h
+ nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \
+ nsIParserDebug.h nsIParserFilter.h
CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
.\$(OBJDIR)\CNavDTD.obj \
@@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
.\$(OBJDIR)\nsHTMLParser.obj \
.\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \
.\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \
- .\$(OBJDIR)\nsTokenHandler.obj \
+ .\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \
.\$(OBJDIR)\prstrm.obj
LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
diff --git a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp
index 99b1d81a340..d361ceed444 100644
--- a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp
+++ b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp
@@ -30,6 +30,7 @@
#include "prstrm.h"
#include
#include "nsIInputStream.h"
+#include "nsIParserFilter.h"
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
#ifdef XP_PC
@@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
static const char* kNullFilename= "Error: Null filename given";
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
-static char* gVerificationOutputDir=0;
-static PRBool gRecordingStatistics=PR_TRUE;
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
-static char* gURLRef=0;
//#define DEBUG_SAVE_SOURCE_DOC 1
#ifdef DEBUG_SAVE_SOURCE_DOC
@@ -58,17 +56,6 @@ fstream* gTempStream=0;
#endif
-extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
-{
- gVerificationOutputDir = verify_dir;
-}
-
-
-extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
-{
- gRecordingStatistics = bval;
-}
-
/**
* This method is defined in nsIParser. It is used to
* cause the COM-like construction of an nsHTMLParser.
@@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
*/
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
NS_INIT_REFCNT();
+ mParserFilter = nsnull;
mListener = nsnull;
mTransferBuffer=0;
mSink=0;
@@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
* @return
*/
nsHTMLParser::~nsHTMLParser() {
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
NS_IF_RELEASE(mListener);
if(mTransferBuffer)
delete [] mTransferBuffer;
@@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
delete mCurrentPos;
mCurrentPos=0;
if(mDTD)
- delete mDTD;
+ NS_RELEASE(mDTD);
mDTD=0;
if(mScanner)
delete mScanner;
@@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
return NS_OK;
}
+nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
+{
+ nsIParserFilter* old=mParserFilter;
+ if(old)
+ NS_RELEASE(old);
+ if(aFilter) {
+ mParserFilter=aFilter;
+ NS_ADDREF(aFilter);
+ }
+ return old;
+}
+
/**
* This method gets called in order to set the content
* sink for this parser to dump nodes to.
@@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
mDTD=aDTD;
}
+nsIDTD * nsHTMLParser::GetDTD(void) {
+ return mDTD;
+}
+
/**
*
*
@@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
* @param
* @return
*/
-nsIDTD* GetDTD(eParseMode aMode) {
+nsIDTD* NewDTD(eParseMode aMode) {
nsIDTD* aDTD=0;
switch(aMode) {
case eParseMode_navigator:
@@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
default:
break;
}
+ if (aDTD)
+ aDTD->AddRef();
return aDTD;
}
@@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
nsString theBuffer;
const int kLocalBufSize=10;
- if (gURLRef)
- PL_strfree(gURLRef);
- if (aFilename)
- gURLRef = PL_strdup(aFilename);
-
mMajorIteration=-1;
mMinorIteration=-1;
@@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
* @param aFilename -- const char* containing file to be parsed.
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
*/
-PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
+PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
NS_PRECONDITION(0!=aFilename,kNullFilename);
PRInt32 status=kBadFilename;
mIncremental=aIncremental;
if(aFilename) {
- if (gURLRef)
- PL_strfree(gURLRef);
- gURLRef = PL_strdup(aFilename);
-
mParseMode=DetermineParseMode();
- mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+ mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
if(mDTD) {
mDTD->SetParser(this);
mDTD->SetContentSink(mSink);
+ mDTD->SetURLRef((char *)aFilename);
+ mDTD->SetParserDebug(aDebug);
}
WillBuildModel();
@@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
*/
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
nsIStreamListener* aListener,
- PRBool aIncremental) {
+ PRBool aIncremental,
+ nsIParserDebug * aDebug) {
NS_PRECONDITION(0!=aURL,kNullURL);
PRInt32 status=kBadURL;
@@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
if(aURL) {
- if (gURLRef)
- {
- PL_strfree(gURLRef);
- gURLRef = 0;
- }
- if (aURL->GetSpec())
- gURLRef = PL_strdup(aURL->GetSpec());
-
mParseMode=DetermineParseMode();
- mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
+ mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
if(mDTD) {
mDTD->SetParser(this);
mDTD->SetContentSink(mSink);
+ mDTD->SetURLRef((char *)aURL->GetSpec());
+ mDTD->SetParserDebug(aDebug);
}
WillBuildModel();
@@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
}
#endif
+ if (mParserFilter)
+ mParserFilter->RawBuffer(mTransferBuffer, &len);
+
mScanner->Append(&mTransferBuffer[offset],len);
} //if
diff --git a/mozilla/parser/htmlparser/src/nsHTMLParser.h b/mozilla/parser/htmlparser/src/nsHTMLParser.h
index 1b1c8470aa3..d8638dd3b32 100644
--- a/mozilla/parser/htmlparser/src/nsHTMLParser.h
+++ b/mozilla/parser/htmlparser/src/nsHTMLParser.h
@@ -73,6 +73,8 @@ class nsIHTMLContentSink;
class nsIURL;
class nsIDTD;
class CScanner;
+class nsIParserFilter;
+class nsIParserDebug;
class nsHTMLParser : public nsIParser, public nsIStreamListener {
@@ -103,8 +105,12 @@ friend class CTokenHandler;
* @return old sink, or NULL
*/
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
+
+ virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void SetDTD(nsIDTD* aDTD);
+
+ virtual nsIDTD * GetDTD(void);
/**
*
@@ -124,7 +130,8 @@ friend class CTokenHandler;
*/
virtual PRInt32 Parse(nsIURL* aURL,
nsIStreamListener* aListener,
- PRBool aIncremental=PR_TRUE);
+ PRBool aIncremental=PR_TRUE,
+ nsIParserDebug * aDebug = 0);
/**
* Cause parser to parse input from given file in given mode
@@ -133,7 +140,7 @@ friend class CTokenHandler;
* @param aMode is the desired parser mode (Nav, other, etc.)
* @return TRUE if all went well -- FALSE otherwise
*/
- virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
+ virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0);
/**
* @update gess5/11/98
@@ -290,7 +297,8 @@ protected:
//*********************************************
nsIStreamListener* mListener;
- nsIContentSink* mSink;
+ nsIContentSink* mSink;
+ nsIParserFilter* mParserFilter;
nsDequeIterator* mCurrentPos;
nsDequeIterator* mMarkPos;
diff --git a/mozilla/parser/htmlparser/src/nsIDTD.h b/mozilla/parser/htmlparser/src/nsIDTD.h
index 926ee926943..77ae7f9868c 100644
--- a/mozilla/parser/htmlparser/src/nsIDTD.h
+++ b/mozilla/parser/htmlparser/src/nsIDTD.h
@@ -37,6 +37,7 @@
class nsIParser;
class CToken;
class nsIContentSink;
+class nsIParserDebug;
class nsIDTD : public nsISupports {
@@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {
/**
*
- * @update gess5/18/98
- * @param
+ * @update jevering 6/18/98
+ * @param aURLRef if the current URL reference (for debugger)
* @return
*/
- virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
+ virtual void SetURLRef(char * aURLRef) = 0;
+ /**
+ *
+ * @update jevering 6/18/98
+ * @param aParent parent tag
+ * @param aChild child tag
+ * @return PR_TRUE if valid container
+ */
+ virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
+
+ /**
+ *
+ * @update jevering 6/18/98
+ * @param aParserDebug created debug parser object
+ * @return
+ */
+ virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
};
diff --git a/mozilla/parser/htmlparser/src/nsIParser.h b/mozilla/parser/htmlparser/src/nsIParser.h
index f509e1c909e..a4ffd45bc27 100644
--- a/mozilla/parser/htmlparser/src/nsIParser.h
+++ b/mozilla/parser/htmlparser/src/nsIParser.h
@@ -34,6 +34,7 @@ class nsString;
class CToken;
class nsIURL;
class nsIDTD;
+class nsIParserDebug;
/**
* This class defines the iparser interface. This XPCOM
@@ -60,9 +61,10 @@ class nsIParser : public nsISupports {
virtual PRInt32 Parse(nsIURL* aURL,
nsIStreamListener* aListener,
- PRBool aIncremental=PR_TRUE) = 0;
+ PRBool aIncremental=PR_TRUE,
+ nsIParserDebug * aDebug = 0) = 0;
- virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
+ virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
diff --git a/mozilla/parser/htmlparser/src/nsIParserDebug.h b/mozilla/parser/htmlparser/src/nsIParserDebug.h
new file mode 100644
index 00000000000..a1e45204291
--- /dev/null
+++ b/mozilla/parser/htmlparser/src/nsIParserDebug.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update gess 4/8/98
+ *
+ *
+ */
+
+#ifndef NS_IPARSERDEBUG__
+#define NS_IPARSERDEBUG__
+
+#include "nsISupports.h"
+#include "nsHTMLTokens.h"
+#include "prtypes.h"
+
+#define NS_IPARSERDEBUG_IID \
+ {0x7b68c220, 0x0685, 0x11d2, \
+ {0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
+
+
+class nsIDTD;
+class nsHTMLParser;
+
+class nsIParserDebug : public nsISupports {
+
+public:
+
+ virtual void SetVerificationDirectory(char * verify_dir) = 0;
+
+ virtual void SetRecordStatistics(PRBool bval) = 0;
+
+ virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
+
+ virtual void DumpVectorRecord(void) = 0;
+
+};
+
+extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
+
+#endif /* NS_IPARSERDEBUG__ */
\ No newline at end of file
diff --git a/mozilla/parser/htmlparser/src/nsIParserFilter.h b/mozilla/parser/htmlparser/src/nsIParserFilter.h
new file mode 100644
index 00000000000..8b257515efc
--- /dev/null
+++ b/mozilla/parser/htmlparser/src/nsIParserFilter.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update jevering 6/17/98
+ *
+ */
+
+#ifndef IPARSERFILTER
+#define IPARSERFILTER
+
+#include "nsISupports.h"
+
+class CToken;
+
+#define NS_IPARSERFILTER_IID \
+ {0x14d6ff0, 0x0610, 0x11d2, \
+ {0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
+
+
+class nsIParserFilter : public nsISupports {
+ public:
+
+ NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
+
+ NS_IMETHOD WillAddToken(CToken & token) = 0;
+
+ NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
+};
+
+extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
+
+
+#endif
+
diff --git a/mozilla/parser/htmlparser/src/nsParserDebug.cpp b/mozilla/parser/htmlparser/src/nsParserDebug.cpp
new file mode 100644
index 00000000000..e998807e309
--- /dev/null
+++ b/mozilla/parser/htmlparser/src/nsParserDebug.cpp
@@ -0,0 +1,534 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * @update jevering 06/18/98
+ *
+ * This file contains the parser debugger object which aids in
+ * walking links and reporting statistic information, reporting
+ * bad vectors.
+ */
+
+#include "CNavDTD.h"
+#include "nsHTMLTokens.h"
+#include "nsHTMLParser.h"
+#include "nsIParserDebug.h"
+#include "nsCRT.h"
+#include "prenv.h" //this is here for debug reasons...
+#include "prtypes.h" //this is here for debug reasons...
+#include "prio.h"
+#include "plstr.h"
+#include "prstrm.h"
+#include
+#include
+#include "prmem.h"
+
+#define CONTEXT_VECTOR_MAP "/vector.map"
+#define CONTEXT_VECTOR_STAT "/vector.stat"
+#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
+
+// structure to store the vector statistic information
+
+typedef struct vector_info {
+ PRInt32 references; // number of occurances counted
+ PRInt32 count; // number of tags in the vector
+ PRBool good_vector; // is this a valid vector?
+ eHTMLTags* vector; // and the vector
+} VectorInfo;
+
+// the statistic vector table grows each time it exceeds this
+// stepping value
+#define TABLE_SIZE 128
+
+class CParserDebug : public nsIParserDebug {
+public:
+
+ CParserDebug(char * aVerifyDir = 0);
+ ~CParserDebug();
+
+ NS_DECL_ISUPPORTS
+
+ void SetVerificationDirectory(char * verify_dir);
+ void SetRecordStatistics(PRBool bval);
+ PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
+ void DumpVectorRecord(void);
+
+ // global table for storing vector statistics and the size
+
+private:
+ VectorInfo ** mVectorInfoArray;
+ PRInt32 mVectorCount;
+ char * mVerificationDir;
+ PRBool mRecordingStatistics;
+
+ PRBool DebugRecord(char * path, char * pURLRef, char * filename);
+ void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
+ void MakeVectorString(char * vector_string, VectorInfo * pInfo);
+};
+
+static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
+static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
+
+/**
+ * This method is defined in nsIParser. It is used to
+ * cause the COM-like construction of an nsHTMLParser.
+ *
+ * @update jevering 3/25/98
+ * @param nsIParser** ptr to newly instantiated parser
+ * @return NS_xxx error result
+ */
+
+NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
+{
+ CParserDebug *it = new CParserDebug();
+
+ if (it == 0) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
+}
+
+CParserDebug::CParserDebug(char * aVerifyDir)
+{
+ NS_INIT_REFCNT();
+ mVectorInfoArray = 0;
+ mVectorCount = 0;
+ if (aVerifyDir)
+ mVerificationDir = PL_strdup(aVerifyDir);
+ else {
+ char * pString = PR_GetEnv("VERIFY_PARSER");
+ if (pString)
+ mVerificationDir = PL_strdup(pString);
+ else
+ mVerificationDir = 0;
+ }
+ mRecordingStatistics = PR_TRUE;
+}
+
+CParserDebug::~CParserDebug()
+{
+ if (mVerificationDir)
+ PL_strfree(mVerificationDir);
+}
+
+/**
+ * This method gets called as part of our COM-like interfaces.
+ * Its purpose is to create an interface to parser object
+ * of some type.
+ *
+ * @update gess 4/8/98
+ * @param nsIID id of object to discover
+ * @param aInstancePtr ptr to newly discovered interface
+ * @return NS_xxx result code
+ */
+nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
+{
+ if (NULL == aInstancePtr) {
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ if(aIID.Equals(kISupportsIID)) { //do IUnknown...
+ *aInstancePtr = (nsIParserDebug*)(this);
+ }
+ else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class...
+ *aInstancePtr = (nsIParserDebug*)(this);
+ }
+ else {
+ *aInstancePtr=0;
+ return NS_NOINTERFACE;
+ }
+ ((nsISupports*) *aInstancePtr)->AddRef();
+ return NS_OK;
+}
+
+NS_IMPL_ADDREF(CParserDebug)
+NS_IMPL_RELEASE(CParserDebug)
+
+void CParserDebug::SetVerificationDirectory(char * verify_dir)
+{
+ if (mVerificationDir) {
+ PL_strfree(mVerificationDir);
+ mVerificationDir = 0;
+ }
+ mVerificationDir = PL_strdup(verify_dir);
+}
+
+void CParserDebug::SetRecordStatistics(PRBool bval)
+{
+ mRecordingStatistics = bval;
+}
+
+/**
+ * This debug method records an invalid context vector and it's
+ * associated context vector and URL in a simple flat file mapping which
+ * resides in the verification directory and is named context.map
+ *
+ * @update jevering 6/06/98
+ * @param path is the directory structure indicating the bad context vector
+ * @param pURLRef is the associated URL
+ * @param filename to record mapping to if not already recorded
+ * @return TRUE if it is already record (dont rerecord)
+ */
+
+PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
+{
+ char recordPath[2048];
+ PRIntn oflags = 0;
+
+ // create the record file name from the verification director
+ // and the default name.
+ strcpy(recordPath,mVerificationDir);
+ strcat(recordPath,CONTEXT_VECTOR_MAP);
+
+ // create the file exists, only open for read/write
+ // otherwise, create it
+ if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
+ oflags = PR_CREATE_FILE;
+ oflags |= PR_RDWR;
+
+ // open the record file
+ PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
+
+ if (recordFile) {
+
+ char * string = (char *)PR_Malloc(2048);
+ PRBool found = PR_FALSE;
+
+ // vectors are stored on the format iof "URL vector filename"
+ // where the vector contains the verification path and
+ // the filename contains the debug source dump
+ sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
+
+ // get the file size, read in the file and parse it line at
+ // a time to check to see if we have already recorded this
+ // occurance
+
+ PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
+ if (iSize) {
+
+ char * buffer = (char*)PR_Malloc(iSize);
+ char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
+ if (buffer!=NULL && string!=NULL) {
+ PRInt32 ibufferpos, istringpos;
+
+ // beginning of file for read
+ PR_Seek(recordFile,0,PR_SEEK_SET);
+ PR_Read(recordFile,buffer,iSize);
+
+ // run through the file looking for a matching vector
+ for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
+ {
+ // compare string once we have hit the end of the line
+ if (buffer[ibufferpos] == '\r') {
+ stringbuf[istringpos] = '\0';
+ istringpos = 0;
+ // skip newline and space
+ ibufferpos++;
+
+ if (PL_strlen(stringbuf)) {
+ char * space;
+ // chop of the filename for compare
+ if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
+ *space = '\0';
+
+ // we have already recorded this one, free up, and return
+ if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
+ PR_Free(buffer);
+ PR_Free(stringbuf);
+ PR_Free(string);
+ return PR_TRUE;
+ }
+ }
+ }
+
+ // build up the compare string
+ else
+ stringbuf[istringpos++] = buffer[ibufferpos];
+ }
+
+ // throw away the record file data
+ PR_Free(buffer);
+ PR_Free(stringbuf);
+ }
+ }
+
+ // if this bad vector was not recorded, add it to record file
+
+ if (!found) {
+ PR_Seek(recordFile,0,PR_SEEK_END);
+ PR_Write(recordFile,string,PL_strlen(string));
+ }
+
+ PR_Close(recordFile);
+ PR_Free(string);
+ }
+
+ // vector was not recorded
+ return PR_FALSE;
+}
+
+/**
+ * compare function for quick sort. Compares references and
+ * sorts in decending order
+ */
+
+static int compare( const void *arg1, const void *arg2 )
+{
+ VectorInfo ** p1 = (VectorInfo**)arg1;
+ VectorInfo ** p2 = (VectorInfo**)arg2;
+ return (*p2)->references - (*p1)->references;
+}
+
+/**
+ * This debug routines stores statistical information about a
+ * context vector. The context vector statistics are stored in
+ * a global array. The table is resorted each time it grows to
+ * aid in lookup speed. If a vector has already been noted, its
+ * reference count is bumped, otherwise it is added to the table
+ *
+ * @update jevering 6/11/98
+ * @param aTags is the tag list (vector)
+ * @param count is the size of the vector
+ * @return
+ */
+
+void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
+{
+ // if the table doesn't exist, create it
+ if (!mVectorInfoArray) {
+ mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
+ }
+ else {
+ // attempt to look up the vector
+ for (PRInt32 i = 0; i < mVectorCount; i++)
+
+ // check the vector only if they are the same size, if they
+ // match then just return without doing further work
+ if (mVectorInfoArray[i]->count == count)
+ if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
+
+ // bzzzt. and we have a winner.. bump the ref count
+ mVectorInfoArray[i]->references++;
+ return;
+ }
+ }
+
+ // the context vector hasn't been noted, so allocate it and
+ // initialize it one.. add it to the table
+ VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
+ pVectorInfo->references = 1;
+ pVectorInfo->count = count;
+ pVectorInfo->good_vector = good_vector;
+ pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
+ memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
+ mVectorInfoArray[mVectorCount++] = pVectorInfo;
+
+ // have we maxed out the table? grow it.. sort it.. love it.
+ if ((mVectorCount % TABLE_SIZE) == 0) {
+ mVectorInfoArray = (VectorInfo**)realloc(
+ mVectorInfoArray,
+ (sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
+ if (mVectorCount) {
+ qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+ }
+ }
+}
+
+void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
+{
+ sprintf (vector_string, "%6d ", pInfo->references);
+ for (PRInt32 j = 0; j < pInfo->count; j++) {
+ PL_strcat(vector_string, "<");
+ PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
+ PL_strcat(vector_string, ">");
+ }
+ PL_strcat(vector_string,"\r\n");
+}
+
+/**
+ * This debug routine dumps out the vector statistics to a text
+ * file in the verification directory and defaults to the name
+ * "vector.stat". It contains all parsed context vectors and there
+ * occurance count sorted in decending order.
+ *
+ * @update jevering 6/11/98
+ * @param
+ * @return
+ */
+
+void CParserDebug::DumpVectorRecord(void)
+{
+ // do we have a table?
+ if (mVectorCount) {
+
+ // hopefully, they wont exceed 1K.
+ char vector_string[1024];
+ char path[1024];
+
+ path[0] = '\0';
+
+ // put in the verification directory.. else the root
+ if (mVerificationDir)
+ strcpy(path,mVerificationDir);
+
+ strcat(path,CONTEXT_VECTOR_STAT);
+
+ // open the stat file creaming any existing stat file
+ PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
+ if (statisticFile) {
+
+ PRInt32 i;
+ PRofstream ps;
+ ps.attach(statisticFile);
+
+ // oh what the heck, sort it again
+ if (mVectorCount) {
+ qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
+ }
+
+ // cute little header
+ sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
+ ps << vector_string;
+
+ ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
+ ps << VECTOR_TABLE_HEADER;
+
+ // dump out the bad vectors encountered
+ for (i = 0; i < mVectorCount; i++) {
+ if (!mVectorInfoArray[i]->good_vector) {
+ MakeVectorString(vector_string, mVectorInfoArray[i]);
+ ps << vector_string;
+ }
+ }
+
+ ps << "\r\n\r\nValid context vector summary\r\n";
+ ps << VECTOR_TABLE_HEADER;
+
+ // take a big vector table dump (good vectors)
+ for (i = 0; i < mVectorCount; i++) {
+ if (mVectorInfoArray[i]->good_vector) {
+ MakeVectorString(vector_string, mVectorInfoArray[i]);
+ ps << vector_string;
+ }
+ // free em up. they mean nothing to me now (I'm such a user)
+
+ if (mVectorInfoArray[i]->vector)
+ PR_Free(mVectorInfoArray[i]->vector);
+ PR_Free(mVectorInfoArray[i]);
+ }
+ }
+
+ // ok, we are done with the table, free it up as well
+ PR_Free(mVectorInfoArray);
+ mVectorInfoArray = 0;
+ mVectorCount = 0;
+ PR_Close(statisticFile);
+ }
+}
+
+
+/**
+ * This debug method allows us to determine whether or not
+ * we've seen (and can handle) the given context vector.
+ *
+ * @update gess4/22/98
+ * @param tags is an array of eHTMLTags
+ * @param count represents the number of items in the tags array
+ * @param aDTD is the DTD we plan to ask for verification
+ * @return TRUE if we know how to handle it, else false
+ */
+
+PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef)
+{
+ PRBool result=PR_TRUE;
+
+ //ok, now see if we understand this vector
+
+ if(0!=mVerificationDir || mRecordingStatistics) {
+
+ if(aDTD && aContextStackPos>1) {
+ for (int i = 0; i < aContextStackPos-1; i++)
+ if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
+ result = PR_FALSE;
+ break;
+ }
+ }
+ }
+
+ if (mRecordingStatistics) {
+ NoteVector(aContextStack,aContextStackPos,result);
+ }
+
+ if(0!=mVerificationDir) {
+ char path[2048];
+ strcpy(path,mVerificationDir);
+
+ int i=0;
+ for(i=0;iDebugDumpSource(ps);
+ PR_Close(debugFile);
+ }
+ }
+ }
+ }
+
+ return result;
+}
diff --git a/mozilla/parser/htmlparser/src/nsTokenizer.cpp b/mozilla/parser/htmlparser/src/nsTokenizer.cpp
new file mode 100644
index 00000000000..dacfe58838c
--- /dev/null
+++ b/mozilla/parser/htmlparser/src/nsTokenizer.cpp
@@ -0,0 +1,327 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+
+#include
+#include "nsTokenizer.h"
+#include "nsToken.h"
+#include "nsScanner.h"
+#include "nsIParserFilter.h"
+#include "nsIURL.h"
+
+static void TokenFreeProc(void * pToken)
+{
+ if (pToken!=NULL) {
+ CToken * pCToken = (CToken*)pToken;
+ delete pCToken;
+ }
+}
+
+/**
+ * Default constructor
+ *
+ * @update gess 3/25/98
+ * @param aFilename -- name of file to be tokenized
+ * @param aDelegate -- ref to delegate to be used to tokenize
+ * @return
+ */
+CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
+ mTokenDeque(PR_TRUE,TokenFreeProc) {
+ mParserFilter = aIFilter;
+ mDelegate=aDelegate;
+ mScanner=new CScanner(aURL,aMode);
+ mParseMode=aMode;
+}
+
+/**
+ * Default constructor
+ *
+ * @update gess 3/25/98
+ * @param aFilename -- name of file to be tokenized
+ * @param aDelegate -- ref to delegate to be used to tokenize
+ * @return
+ */
+CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
+ mTokenDeque(PR_TRUE,TokenFreeProc) {
+ mParserFilter = aIFilter;
+ mDelegate=aDelegate;
+ mScanner=new CScanner(aFilename,aMode);
+ mParseMode=aMode;
+}
+
+/**
+ * Default constructor
+ *
+ * @update gess 3/25/98
+ * @param aFilename -- name of file to be tokenized
+ * @param aDelegate -- ref to delegate to be used to tokenize
+ * @return
+ */
+CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
+ mTokenDeque(PR_TRUE,TokenFreeProc) {
+ mParserFilter = aIFilter;
+ mDelegate=aDelegate;
+ mScanner=new CScanner(aMode);
+ mParseMode=aMode;
+}
+
+/**
+ * default destructor
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+CTokenizer::~CTokenizer() {
+ delete mScanner;
+ mDelegate->Destroy();
+ mScanner=0;
+}
+
+
+/**
+ *
+ *
+ * @update gess 5/13/98
+ * @param
+ * @return
+ */
+PRBool CTokenizer::Append(nsString& aBuffer) {
+ if(mScanner)
+ return mScanner->Append(aBuffer);
+ return PR_FALSE;
+}
+
+
+/**
+ *
+ *
+ * @update gess 5/21/98
+ * @param
+ * @return
+ */
+PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
+ if(mScanner)
+ return mScanner->Append(aBuffer,aLen);
+ return PR_FALSE;
+}
+
+/**
+ * Retrieve a reference to the internal token deque.
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+nsDeque& CTokenizer::GetDeque(void) {
+ return mTokenDeque;
+}
+
+/**
+ * Cause the tokenizer to consume the next token, and
+ * return an error result.
+ *
+ * @update gess 3/25/98
+ * @param anError -- ref to error code
+ * @return new token or null
+ */
+PRInt32 CTokenizer::GetToken(CToken*& aToken) {
+ PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
+ return result;
+}
+
+/**
+ * Retrieve the number of elements in the deque
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return int containing element count
+ */
+PRInt32 CTokenizer::GetSize(void) {
+ return mTokenDeque.GetSize();
+}
+
+
+/**
+ * Part of the code sandwich, this gets called right before
+ * the tokenization process begins. The main reason for
+ * this call is to allow the delegate to do initialization.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return TRUE if it's ok to proceed
+ */
+PRBool CTokenizer::WillTokenize(PRBool aIncremental){
+ PRBool result=PR_TRUE;
+ result=mDelegate->WillTokenize(aIncremental);
+ return result;
+}
+
+/**
+ *
+ * @update gess 3/25/98
+ * @return TRUE if it's ok to proceed
+ */
+PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
+ CToken* theToken=0;
+ PRInt32 result=kNoError;
+
+ WillTokenize(PR_TRUE);
+
+ while(kNoError==result) {
+ result=GetToken(theToken);
+ if(theToken && (kNoError==result)) {
+
+#ifdef VERBOSE_DEBUG
+ theToken->DebugDumpToken(cout);
+#endif
+
+ PRBool bWillAdd = PR_TRUE;
+ if (mParserFilter)
+ bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
+ if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
+ mTokenDeque.Push(theToken);
+ }
+ }
+ else if (theToken)
+ delete theToken;
+ }
+ if(kEOF==result)
+ result=kNoError;
+ DidTokenize(PR_TRUE);
+ return result;
+}
+
+/**
+ * This is the primary control routine. It iteratively
+ * consumes tokens until an error occurs or you run out
+ * of data.
+ *
+ * @update gess 3/25/98
+ * @return error code
+ */
+PRInt32 CTokenizer::Tokenize(int anIteration) {
+ CToken* theToken=0;
+ PRInt32 result=kNoError;
+ PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
+
+
+ while((PR_FALSE==done) && (kNoError==result)) {
+ mScanner->Mark();
+ result=GetToken(theToken);
+ if(kNoError==result) {
+ if(theToken) {
+
+ #ifdef VERBOSE_DEBUG
+ theToken->DebugDumpToken(cout);
+ #endif
+
+ PRBool bWillAdd = PR_TRUE;
+ if (mParserFilter)
+ bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
+ if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
+ mTokenDeque.Push(theToken);
+ }
+ }
+
+ }
+ else {
+ if(theToken)
+ delete theToken;
+ mScanner->RewindToMark();
+ }
+ }
+ if((PR_TRUE==done) && (kInterrupted!=result))
+ DidTokenize(PR_TRUE);
+ return result;
+}
+
+/**
+ * This is the tail-end of the code sandwich for the
+ * tokenization process. It gets called once tokenziation
+ * has completed.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return TRUE if all went well
+ */
+PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
+ PRBool result=mDelegate->DidTokenize(aIncremental);
+
+#ifdef VERBOSE_DEBUG
+ DebugDumpTokens(cout);
+#endif
+
+ return result;
+}
+
+/**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+void CTokenizer::DebugDumpTokens(ostream& out) {
+ nsDequeIterator b=mTokenDeque.Begin();
+ nsDequeIterator e=mTokenDeque.End();
+
+ CToken* theToken;
+ while(b!=e) {
+ theToken=(CToken*)(b++);
+ theToken->DebugDumpToken(out);
+ }
+}
+
+
+/**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+void CTokenizer::DebugDumpSource(ostream& out) {
+ nsDequeIterator b=mTokenDeque.Begin();
+ nsDequeIterator e=mTokenDeque.End();
+
+ CToken* theToken;
+ while(b!=e) {
+ theToken=(CToken*)(b++);
+ theToken->DebugDumpSource(out);
+ }
+
+}
+
+
+/**
+ *
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+void CTokenizer::SelfTest(void) {
+#ifdef _DEBUG
+#endif
+}
+
+
diff --git a/mozilla/parser/htmlparser/src/nsTokenizer.h b/mozilla/parser/htmlparser/src/nsTokenizer.h
new file mode 100644
index 00000000000..7d54555ef6f
--- /dev/null
+++ b/mozilla/parser/htmlparser/src/nsTokenizer.h
@@ -0,0 +1,185 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.0 (the "NPL"); you may not use this file except in
+ * compliance with the NPL. You may obtain a copy of the NPL at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the NPL is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
+ * for the specific language governing rights and limitations under the
+ * NPL.
+ *
+ * The Initial Developer of this code under the NPL is Netscape
+ * Communications Corporation. Portions created by Netscape are
+ * Copyright (C) 1998 Netscape Communications Corporation. All Rights
+ * Reserved.
+ */
+
+/**
+ * MODULE NOTES:
+ * LAST MODS: gess 28Feb98
+ *
+ * This file declares the basic tokenizer class. The
+ * central theme of this class is to control and
+ * coordinate a tokenization process. Note that this
+ * class is grammer-neutral: this class doesn't care
+ * at all what the underlying stream consists of.
+ *
+ * The main purpose of this class is to iterate over an
+ * input stream with the help of a given scanner and a
+ * given type-specific tokenizer-Delegate.
+ *
+ * The primary method here is the tokenize() method, which
+ * simple loops calling getToken() until an EOF condition
+ * (or some other error) occurs.
+ *
+ */
+
+
+#ifndef TOKENIZER
+#define TOKENIZER
+
+#include "nsToken.h"
+#include "nsITokenizerDelegate.h"
+#include "nsDeque.h"
+#include
+
+class CScanner;
+class nsIURL;
+class nsIParserFilter;
+
+class CTokenizer {
+ public:
+
+ CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
+ CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
+ CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
+
+ ~CTokenizer();
+
+ /**
+ * This method incrementally tokenizes as much content as
+ * it can get its hands on.
+ *
+ * @update gess 3/25/98
+ * @return TRUE if it's ok to proceed
+ */
+ PRInt32 Tokenize(int anIteration); //your friendly incremental version
+
+ /**
+ *
+ * @update gess 3/25/98
+ * @return TRUE if it's ok to proceed
+ */
+ PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE);
+
+ /**
+ * Cause the tokenizer to consume the next token, and
+ * return an error result.
+ *
+ * @update gess 3/25/98
+ * @param anError -- ref to error code
+ * @return new token or null
+ */
+ PRInt32 GetToken(CToken*& aToken);
+
+ /**
+ * Retrieve the number of elements in the deque
+ *
+ * @update gess 3/25/98
+ * @return int containing element count
+ */
+ PRInt32 GetSize(void);
+
+ /**
+ * Retrieve a reference to the internal token deque.
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+ nsDeque& GetDeque(void);
+
+ /**
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+ PRBool Append(nsString& aBuffer);
+
+ /**
+ *
+ * @update gess 4/20/98
+ * @return deque reference
+ */
+ PRBool Append(const char* aBuffer, PRInt32 aLen);
+
+
+ /**
+ *
+ *
+ * @update gess 5/13/98
+ * @param
+ * @return
+ */
+ PRBool SetBuffer(nsString& aBuffer);
+
+ /**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+ void DebugDumpSource(ostream& out);
+
+ /**
+ * This debug routine is used to cause the tokenizer to
+ * iterate its token list, asking each token to dump its
+ * contents to the given output stream.
+ *
+ * @update gess 3/25/98
+ * @param
+ * @return
+ */
+ void DebugDumpTokens(ostream& out);
+
+ static void SelfTest();
+
+ protected:
+
+ /**
+ * This is the front-end of the code sandwich for the
+ * tokenization process. It gets called once just before
+ * tokenziation begins.
+ *
+ * @update gess 3/25/98
+ * @param aIncremental tells us if tokenization is incremental
+ * @return TRUE if all went well
+ */
+ PRBool WillTokenize(PRBool aIncremental);
+
+
+ /**
+ * This is the tail-end of the code sandwich for the
+ * tokenization process. It gets called once tokenziation
+ * has completed.
+ *
+ * @update gess 3/25/98
+ * @param aIncremental tells us if tokenization was incremental
+ * @return TRUE if all went well
+ */
+ PRBool DidTokenize(PRBool aIncremental);
+
+ ITokenizerDelegate* mDelegate;
+ CScanner* mScanner;
+ nsDeque mTokenDeque;
+ eParseMode mParseMode;
+ nsIParserFilter* mParserFilter;
+};
+
+#endif
+
+