diff --git a/mozilla/htmlparser/src/CNavDTD.cpp b/mozilla/htmlparser/src/CNavDTD.cpp index ea0844b5856..4547755d1fb 100644 --- a/mozilla/htmlparser/src/CNavDTD.cpp +++ b/mozilla/htmlparser/src/CNavDTD.cpp @@ -31,6 +31,7 @@ * */ +#include "nsIParserDebug.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" @@ -43,13 +44,10 @@ #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" -#include "prstrm.h" -#include #ifdef XP_PC #include //this is here for debug reasons... #endif -#include #include "prmem.h" @@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer"; static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; -static char* gVerificationOutputDir=0; -static char* gURLRef=0; static nsAutoString gEmpty; static char formElementTags[]= { @@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller; * @return */ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { + NS_INIT_REFCNT(); mParser=0; + mURLRef=0; + mParserDebug=0; nsCRT::zero(mLeafBits,sizeof(mLeafBits)); nsCRT::zero(mContextStack,sizeof(mContextStack)); nsCRT::zero(mStyleStack,sizeof(mStyleStack)); nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers)); mContextStackPos=0; mStyleStackPos=0; - gURLRef = 0; mHasOpenForm=PR_FALSE; mHasOpenMap=PR_FALSE; - gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER"); InitializeDefaultTokenHandlers(); } @@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { */ CNavDTD::~CNavDTD(){ DeleteTokenHandlers(); - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } + if (mURLRef) + PL_strfree(mURLRef); + if (mParserDebug) + NS_RELEASE(mParserDebug); // NS_RELEASE(mSink); } @@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){ if(aHandler) { result=(*aHandler)(theToken,this); - Verify("xxx",PR_TRUE); + if (mParserDebug) + mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef); } }//if @@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const * @param aChild -- tag enum of child container * @return PR_TRUE if parent can contain child */ -PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { +PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) { PRBool result=PR_FALSE; @@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { //handle form elements (this is very much a WIP!!!) if(0!=strchr(formElementTags,aChild)){ - return CanContainFormElement(aParent,aChild); + return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild); } - switch(aParent) { + switch((eHTMLTags)aParent) { case eHTMLTag_a: case eHTMLTag_acronym: result=PRBool(0!=strchr(gTagSet1,aChild)); break; @@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{ * @param aChild -- tag type of child * @return TRUE if propagation closes; false otherwise */ -PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const { +PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) { PRBool result=PR_FALSE; switch(aParentTag) { @@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag //otherwise, intentionally fall through... case eHTMLTag_tr: - if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) { + if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) { aVector.Append((PRUnichar)eHTMLTag_td); result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td); // result=PR_TRUE; @@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){ return; } - -/************************************************************************ - Here's a bunch of stuff JEvering put into the parser to do debugging. - ************************************************************************/ - -/** - * This debug method records an invalid context vector and it's - * associated context vector and URL in a simple flat file mapping which - * resides in the verification directory and is named context.map - * - * @update jevering 6/06/98 - * @param path is the directory structure indicating the bad context vector - * @param pURLRef is the associated URL - * @param filename to record mapping to if not already recorded - * @return TRUE if it is already record (dont rerecord) - */ - -#define CONTEXT_VECTOR_MAP "/vector.map" -#define CONTEXT_VECTOR_STAT "/vector.stat" -#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" -static PRBool DebugRecord(char * path, char * pURLRef, char * filename) -{ - char recordPath[2048]; - PRIntn oflags = 0; - - // create the record file name from the verification director - // and the default name. - strcpy(recordPath,gVerificationOutputDir); - strcat(recordPath,CONTEXT_VECTOR_MAP); - - // create the file exists, only open for read/write - // otherwise, create it - if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) - oflags = PR_CREATE_FILE; - oflags |= PR_RDWR; - - // open the record file - PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); - - if (recordFile) { - - char * string = (char *)PR_Malloc(2048); - PRBool found = PR_FALSE; - - // vectors are stored on the format iof "URL vector filename" - // where the vector contains the verification path and - // the filename contains the debug source dump - sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); - - // get the file size, read in the file and parse it line at - // a time to check to see if we have already recorded this - // occurance - - PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); - if (iSize) { - - char * buffer = (char*)PR_Malloc(iSize); - char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); - if (buffer!=NULL && string!=NULL) { - PRInt32 ibufferpos, istringpos; - - // beginning of file for read - PR_Seek(recordFile,0,PR_SEEK_SET); - PR_Read(recordFile,buffer,iSize); - - // run through the file looking for a matching vector - for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) - { - // compare string once we have hit the end of the line - if (buffer[ibufferpos] == '\r') { - stringbuf[istringpos] = '\0'; - istringpos = 0; - // skip newline and space - ibufferpos++; - - if (PL_strlen(stringbuf)) { - char * space; - // chop of the filename for compare - if ((space = PL_strrchr(stringbuf, ' '))!=NULL) - *space = '\0'; - - // we have already recorded this one, free up, and return - if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { - PR_Free(buffer); - PR_Free(stringbuf); - PR_Free(string); - return PR_TRUE; - } - } - } - - // build up the compare string - else - stringbuf[istringpos++] = buffer[ibufferpos]; - } - - // throw away the record file data - PR_Free(buffer); - PR_Free(stringbuf); - } - } - - // if this bad vector was not recorded, add it to record file - - if (!found) { - PR_Seek(recordFile,0,PR_SEEK_END); - PR_Write(recordFile,string,PL_strlen(string)); - } - - PR_Close(recordFile); - PR_Free(string); +void CNavDTD::SetURLRef(char * aURLRef){ + if (mURLRef) { + PL_strfree(mURLRef); + mURLRef=0; } - - // vector was not recorded - return PR_FALSE; + if (aURLRef) + mURLRef = PL_strdup(aURLRef); } -// structure to store the vector statistic information - -typedef struct vector_info { - PRInt32 references; // number of occurances counted - PRInt32 count; // number of tags in the vector - PRBool good_vector; // is this a valid vector? - eHTMLTags* vector; // and the vector -} VectorInfo; - -// global table for storing vector statistics and the size -static VectorInfo ** gVectorInfoArray = 0; -static PRInt32 gVectorCount = 0; - -// the statistic vector table grows each time it exceeds this -// stepping value -#define TABLE_SIZE 128 - -// compare function for quick sort. Compares references and -// sorts in decending order - -static int compare( const void *arg1, const void *arg2 ) +void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug) { - VectorInfo ** p1 = (VectorInfo**)arg1; - VectorInfo ** p2 = (VectorInfo**)arg2; - return (*p2)->references - (*p1)->references; -} - - -/** - * This debug routines stores statistical information about a - * context vector. The context vector statistics are stored in - * a global array. The table is resorted each time it grows to - * aid in lookup speed. If a vector has already been noted, its - * reference count is bumped, otherwise it is added to the table - * - * @update jevering 6/11/98 - * @param aTags is the tag list (vector) - * @param count is the size of the vector - * @return - */ - -static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector) -{ - // if the table doesn't exist, create it - if (!gVectorInfoArray) { - gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); - } - else { - // attempt to look up the vector - for (PRInt32 i = 0; i < gVectorCount; i++) - - // check the vector only if they are the same size, if they - // match then just return without doing further work - if (gVectorInfoArray[i]->count == count) - if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) { - - // bzzzt. and we have a winner.. bump the ref count - gVectorInfoArray[i]->references++; - return; - } - } - - // the context vector hasn't been noted, so allocate it and - // initialize it one.. add it to the table - VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); - pVectorInfo->references = 1; - pVectorInfo->count = count; - pVectorInfo->good_vector = good_vector; - pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags)); - memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count); - gVectorInfoArray[gVectorCount++] = pVectorInfo; - - // have we maxed out the table? grow it.. sort it.. love it. - if ((gVectorCount % TABLE_SIZE) == 0) { - gVectorInfoArray = (VectorInfo**)realloc( - gVectorInfoArray, - (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); - if (gVectorCount) { - qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); - } - } -} - -static void MakeVectorString(char * vector_string, VectorInfo * pInfo) -{ - sprintf (vector_string, "%6d ", pInfo->references); - for (PRInt32 j = 0; j < pInfo->count; j++) { - PL_strcat(vector_string, "<"); - PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); - PL_strcat(vector_string, ">"); - } - PL_strcat(vector_string,"\r\n"); -} - -/** - * This debug routine dumps out the vector statistics to a text - * file in the verification directory and defaults to the name - * "vector.stat". It contains all parsed context vectors and there - * occurance count sorted in decending order. - * - * @update jevering 6/11/98 - * @param - * @return - */ - -extern "C" NS_EXPORT void DumpVectorRecord(void) -{ - // do we have a table? - if (gVectorCount) { - - // hopefully, they wont exceed 1K. - char vector_string[1024]; - char path[1024]; - - path[0] = '\0'; - - // put in the verification directory.. else the root - if (gVerificationOutputDir) - strcpy(path,gVerificationOutputDir); - - strcat(path,CONTEXT_VECTOR_STAT); - - // open the stat file creaming any existing stat file - PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); - if (statisticFile) { - - PRInt32 i; - PRofstream ps; - ps.attach(statisticFile); - - // oh what the heck, sort it again - if (gVectorCount) { - qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); - } - - // cute little header - sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); - ps << vector_string; - - ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; - ps << VECTOR_TABLE_HEADER; - - // dump out the bad vectors encountered - for (i = 0; i < gVectorCount; i++) { - if (!gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - } - - ps << "\r\n\r\nValid context vector summary\r\n"; - ps << VECTOR_TABLE_HEADER; - - // take a big vector table dump (good vectors) - for (i = 0; i < gVectorCount; i++) { - if (gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - // free em up. they mean nothing to me now (I'm such a user) - - if (gVectorInfoArray[i]->vector) - PR_Free(gVectorInfoArray[i]->vector); - PR_Free(gVectorInfoArray[i]); - } - } - - // ok, we are done with the table, free it up as well - PR_Free(gVectorInfoArray); - gVectorInfoArray = 0; - gVectorCount = 0; - PR_Close(statisticFile); + if (aParserDebug) { + mParserDebug = aParserDebug; + NS_ADDREF(mParserDebug); } } - - -/** - * This debug method allows us to determine whether or not - * we've seen (and can handle) the given context vector. - * - * @update gess4/22/98 - * @param tags is an array of eHTMLTags - * @param count represents the number of items in the tags array - * @param aDTD is the DTD we plan to ask for verification - * @return TRUE if we know how to handle it, else false - */ -PRBool CNavDTD::VerifyContextVector(void) const { - - PRBool result=PR_TRUE; - - if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; - strcpy(path,gVerificationOutputDir); -#endif - - int i=0; - for(i=0;iDebugDumpSource(ps); - PR_Close(debugFile); - } - } - } - } - - return result; -} diff --git a/mozilla/htmlparser/src/CNavDTD.h b/mozilla/htmlparser/src/CNavDTD.h index 9883a4483e6..cdaa93d9dd9 100644 --- a/mozilla/htmlparser/src/CNavDTD.h +++ b/mozilla/htmlparser/src/CNavDTD.h @@ -42,6 +42,7 @@ class nsHTMLParser; class nsIHTMLContentSink; +class nsIParserDebug; class CNavDTD : public nsIDTD { @@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD { * of one type can contain a tag of another type. * * @update gess 3/25/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container + * @param aParent -- int tag of parent container + * @param aChild -- int tag of child container * @return PR_TRUE if parent can contain child */ - virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const; + virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild); /** * This method is called to determine whether or not a tag @@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD { */ virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const; - /** - * This method gets called at various times by the parser - * whenever we want to verify a valid context stack. This - * method also gives us a hook to add debugging metrics. - * - * @update gess4/6/98 - * @param aStack[] array of ints (tokens) - * @param aCount number of elements in given array - * @return TRUE if stack is valid, else FALSE + * + * @update jevering 6/18/98 + * @param aURLRef if the current URL reference (for debugger) + * @return */ - virtual PRBool VerifyContextVector(void) const; + virtual void SetURLRef(char * aURLRef); /** * - * @update gess5/18/98 - * @param + * @update jevering 6/18/98 + * @param aParserDebug created debug parser object * @return */ - virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats); + virtual void SetParserDebug(nsIParserDebug * aParserDebug); /** * This method tries to design a context map (without actually @@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD { * @param aChild -- tag type of child * @return True if closure was achieved -- other false */ - virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const; + virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag); /** * This method tries to design a context map (without actually @@ -699,7 +695,8 @@ protected: PRBool mHasOpenForm; PRBool mHasOpenMap; nsDeque mTokenDeque; - + char* mURLRef; + nsIParserDebug* mParserDebug; }; diff --git a/mozilla/htmlparser/src/COtherDTD.cpp b/mozilla/htmlparser/src/COtherDTD.cpp index 47ecc728678..ccbf97c725a 100644 --- a/mozilla/htmlparser/src/COtherDTD.cpp +++ b/mozilla/htmlparser/src/COtherDTD.cpp @@ -31,6 +31,7 @@ * */ +#include "nsIParserDebug.h" #include "COtherDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" @@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer"; static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; -static char* gVerificationOutputDir=0; -static char* gURLRef=0; static nsAutoString gEmpty; static char formElementTags[]= { @@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller; * @return */ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { + NS_INIT_REFCNT(); mParser=0; + mURLRef=0; + mParserDebug=0; nsCRT::zero(mLeafBits,sizeof(mLeafBits)); nsCRT::zero(mContextStack,sizeof(mContextStack)); nsCRT::zero(mStyleStack,sizeof(mStyleStack)); nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers)); mContextStackPos=0; mStyleStackPos=0; - gURLRef = 0; mHasOpenForm=PR_FALSE; mHasOpenMap=PR_FALSE; - gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER"); InitializeDefaultTokenHandlers(); } @@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { */ COtherDTD::~COtherDTD(){ DeleteTokenHandlers(); - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } + if (mURLRef) + PL_strfree(mURLRef); + if (mParserDebug) + NS_RELEASE(mParserDebug); // NS_RELEASE(mSink); } @@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){ if(aHandler) { result=(*aHandler)(theToken,this); - Verify("xxx",PR_TRUE); + if (mParserDebug) + mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef); } }//if @@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons * @param aChild -- tag enum of child container * @return PR_TRUE if parent can contain child */ -PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { +PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) { PRBool result=PR_FALSE; @@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { //handle form elements (this is very much a WIP!!!) if(0!=strchr(formElementTags,aChild)){ - return CanContainFormElement(aParent,aChild); + return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild); } - switch(aParent) { + switch((eHTMLTags)aParent) { case eHTMLTag_a: case eHTMLTag_acronym: result=PRBool(0!=strchr(gTagSet1,aChild)); break; @@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{ * @param aChild -- tag type of child * @return TRUE if propagation closes; false otherwise */ -PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const { +PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) { PRBool result=PR_FALSE; switch(aParentTag) { @@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){ return; } - -/************************************************************************ - Here's a bunch of stuff JEvering put into the parser to do debugging. - ************************************************************************/ - -/** - * This debug method records an invalid context vector and it's - * associated context vector and URL in a simple flat file mapping which - * resides in the verification directory and is named context.map - * - * @update jevering 6/06/98 - * @param path is the directory structure indicating the bad context vector - * @param pURLRef is the associated URL - * @param filename to record mapping to if not already recorded - * @return TRUE if it is already record (dont rerecord) - */ - -#define CONTEXT_VECTOR_MAP "/vector.map" -#define CONTEXT_VECTOR_STAT "/vector.stat" -#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" -static PRBool DebugRecord(char * path, char * pURLRef, char * filename) -{ - char recordPath[2048]; - PRIntn oflags = 0; - - // create the record file name from the verification director - // and the default name. - strcpy(recordPath,gVerificationOutputDir); - strcat(recordPath,CONTEXT_VECTOR_MAP); - - // create the file exists, only open for read/write - // otherwise, create it - if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) - oflags = PR_CREATE_FILE; - oflags |= PR_RDWR; - - // open the record file - PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); - - if (recordFile) { - - char * string = (char *)PR_Malloc(2048); - PRBool found = PR_FALSE; - - // vectors are stored on the format iof "URL vector filename" - // where the vector contains the verification path and - // the filename contains the debug source dump - sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); - - // get the file size, read in the file and parse it line at - // a time to check to see if we have already recorded this - // occurance - - PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); - if (iSize) { - - char * buffer = (char*)PR_Malloc(iSize); - char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); - if (buffer!=NULL && string!=NULL) { - PRInt32 ibufferpos, istringpos; - - // beginning of file for read - PR_Seek(recordFile,0,PR_SEEK_SET); - PR_Read(recordFile,buffer,iSize); - - // run through the file looking for a matching vector - for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) - { - // compare string once we have hit the end of the line - if (buffer[ibufferpos] == '\r') { - stringbuf[istringpos] = '\0'; - istringpos = 0; - // skip newline and space - ibufferpos++; - - if (PL_strlen(stringbuf)) { - char * space; - // chop of the filename for compare - if ((space = PL_strrchr(stringbuf, ' '))!=NULL) - *space = '\0'; - - // we have already recorded this one, free up, and return - if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { - PR_Free(buffer); - PR_Free(stringbuf); - PR_Free(string); - return PR_TRUE; - } - } - } - - // build up the compare string - else - stringbuf[istringpos++] = buffer[ibufferpos]; - } - - // throw away the record file data - PR_Free(buffer); - PR_Free(stringbuf); - } - } - - // if this bad vector was not recorded, add it to record file - - if (!found) { - PR_Seek(recordFile,0,PR_SEEK_END); - PR_Write(recordFile,string,PL_strlen(string)); - } - - PR_Close(recordFile); - PR_Free(string); +void COtherDTD::SetURLRef(char * aURLRef){ + if (mURLRef) { + PL_strfree(mURLRef); + mURLRef=0; } - - // vector was not recorded - return PR_FALSE; + if (aURLRef) + mURLRef = PL_strdup(aURLRef); } -// structure to store the vector statistic information - -typedef struct vector_info { - PRInt32 references; // number of occurances counted - PRInt32 count; // number of tags in the vector - PRBool good_vector; // is this a valid vector? - eHTMLTags* vector; // and the vector -} VectorInfo; - -// global table for storing vector statistics and the size -static VectorInfo ** gVectorInfoArray = 0; -static PRInt32 gVectorCount = 0; - -// the statistic vector table grows each time it exceeds this -// stepping value -#define TABLE_SIZE 128 - -// compare function for quick sort. Compares references and -// sorts in decending order - -static int compare( const void *arg1, const void *arg2 ) +void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug) { - VectorInfo ** p1 = (VectorInfo**)arg1; - VectorInfo ** p2 = (VectorInfo**)arg2; - return (*p2)->references - (*p1)->references; -} - -/** - * quick sort the statistic array causing the most frequently - * used vectors to be at the top (this makes it a little speedier - * when looking them up) - */ -static void SortVectorRecord(void) { - // of course, sort it only if there is something to sort - if (gVectorCount) { - qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); - } -} - - -/** - * This debug routines stores statistical information about a - * context vector. The context vector statistics are stored in - * a global array. The table is resorted each time it grows to - * aid in lookup speed. If a vector has already been noted, its - * reference count is bumped, otherwise it is added to the table - * - * @update jevering 6/11/98 - * @param aTags is the tag list (vector) - * @param count is the size of the vector - * @return - */ - -static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector) -{ - // if the table doesn't exist, create it - if (!gVectorInfoArray) { - gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); - } - else { - // attempt to look up the vector - for (PRInt32 i = 0; i < gVectorCount; i++) - - // check the vector only if they are the same size, if they - // match then just return without doing further work - if (gVectorInfoArray[i]->count == count) - if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) { - - // bzzzt. and we have a winner.. bump the ref count - gVectorInfoArray[i]->references++; - return; - } - } - - // the context vector hasn't been noted, so allocate it and - // initialize it one.. add it to the table - VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); - pVectorInfo->references = 1; - pVectorInfo->count = count; - pVectorInfo->good_vector = good_vector; - pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags)); - memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count); - gVectorInfoArray[gVectorCount++] = pVectorInfo; - - // have we maxed out the table? grow it.. sort it.. love it. - if ((gVectorCount % TABLE_SIZE) == 0) { - gVectorInfoArray = (VectorInfo**)realloc( - gVectorInfoArray, - (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); - SortVectorRecord(); - } -} - -static void MakeVectorString(char * vector_string, VectorInfo * pInfo) -{ - sprintf (vector_string, "%6d ", pInfo->references); - for (PRInt32 j = 0; j < pInfo->count; j++) { - PL_strcat(vector_string, "<"); - PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); - PL_strcat(vector_string, ">"); - } - PL_strcat(vector_string,"\r\n"); -} - -/** - * This debug routine dumps out the vector statistics to a text - * file in the verification directory and defaults to the name - * "vector.stat". It contains all parsed context vectors and there - * occurance count sorted in decending order. - * - * @update jevering 6/11/98 - * @param - * @return - */ - -extern "C" NS_EXPORT void DumpVectorRecord_other(void) -{ - // do we have a table? - if (gVectorCount) { - - // hopefully, they wont exceed 1K. - char vector_string[1024]; - char path[1024]; - - path[0] = '\0'; - - // put in the verification directory.. else the root - if (gVerificationOutputDir) - strcpy(path,gVerificationOutputDir); - - strcat(path,CONTEXT_VECTOR_STAT); - - // open the stat file creaming any existing stat file - PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); - if (statisticFile) { - - PRInt32 i; - PRofstream ps; - ps.attach(statisticFile); - - // oh what the heck, sort it again - SortVectorRecord(); - - // cute little header - sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); - ps << vector_string; - - ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; - ps << VECTOR_TABLE_HEADER; - - // dump out the bad vectors encountered - for (i = 0; i < gVectorCount; i++) { - if (!gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - } - - ps << "\r\n\r\nValid context vector summary\r\n"; - ps << VECTOR_TABLE_HEADER; - - // take a big vector table dump (good vectors) - for (i = 0; i < gVectorCount; i++) { - if (gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - // free em up. they mean nothing to me now (I'm such a user) - - if (gVectorInfoArray[i]->vector) - PR_Free(gVectorInfoArray[i]->vector); - PR_Free(gVectorInfoArray[i]); - } - } - - // ok, we are done with the table, free it up as well - PR_Free(gVectorInfoArray); - gVectorInfoArray = 0; - gVectorCount = 0; - PR_Close(statisticFile); + if (aParserDebug) { + mParserDebug = aParserDebug; + NS_ADDREF(mParserDebug); } } - - -/** - * This debug method allows us to determine whether or not - * we've seen (and can handle) the given context vector. - * - * @update gess4/22/98 - * @param tags is an array of eHTMLTags - * @param count represents the number of items in the tags array - * @param aDTD is the DTD we plan to ask for verification - * @return TRUE if we know how to handle it, else false - */ -PRBool COtherDTD::VerifyContextVector(void) const { - - PRBool result=PR_TRUE; - - if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; - strcpy(path,gVerificationOutputDir); -#endif - - int i=0; - for(i=0;iDebugDumpSource(ps); - PR_Close(debugFile); - } - } - } - } - - return result; -} - diff --git a/mozilla/htmlparser/src/COtherDTD.h b/mozilla/htmlparser/src/COtherDTD.h index 21d2346eafd..7a74866cf13 100644 --- a/mozilla/htmlparser/src/COtherDTD.h +++ b/mozilla/htmlparser/src/COtherDTD.h @@ -34,7 +34,6 @@ #include "nsDeque.h" - #define NS_IOtherHTML_DTD_IID \ {0x8a5e89c0, 0xd16d, 0x11d1, \ {0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}} @@ -42,6 +41,7 @@ class nsIParser; class nsIHTMLContentSink; +class nsIParserDebug; class COtherDTD : public nsIDTD { @@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD { * of one type can contain a tag of another type. * * @update gess 3/25/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container + * @param aParent -- int tag of parent container + * @param aChild -- int tag of child container * @return PR_TRUE if parent can contain child */ - virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const; + virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild); /** * This method is called to determine whether or not a tag @@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD { */ virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const; - /** - * This method gets called at various times by the parser - * whenever we want to verify a valid context stack. This - * method also gives us a hook to add debugging metrics. - * - * @update gess4/6/98 - * @param aStack[] array of ints (tokens) - * @param aCount number of elements in given array - * @return TRUE if stack is valid, else FALSE + * + * @update jevering 6/18/98 + * @param aURLRef if the current URL reference (for debugger) + * @return */ - virtual PRBool VerifyContextVector(void) const; + virtual void SetURLRef(char * aURLRef); /** * - * @update gess5/18/98 - * @param + * @update jevering 6/18/98 + * @param aParserDebug created debug parser object * @return */ - virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats); + virtual void SetParserDebug(nsIParserDebug * aParserDebug); /** * This method tries to design a context map (without actually @@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD { * @param aChild -- tag type of child * @return True if closure was achieved -- other false */ - virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const; + virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag); /** * This method tries to design a context map (without actually @@ -701,7 +696,8 @@ protected: PRBool mHasOpenForm; PRBool mHasOpenMap; nsDeque mTokenDeque; - + char* mURLRef; + nsIParserDebug* mParserDebug; }; diff --git a/mozilla/htmlparser/src/Makefile b/mozilla/htmlparser/src/Makefile index 37a8ca552ce..243a5c4aa98 100644 --- a/mozilla/htmlparser/src/Makefile +++ b/mozilla/htmlparser/src/Makefile @@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS CPPSRCS = \ nsHTMLContentSink.cpp \ nsParserNode.cpp \ + nsParserDebug.cpp \ nsScanner.cpp \ nsToken.cpp \ nsTokenHandler.cpp \ @@ -41,6 +42,8 @@ EXPORTS = \ nsHTMLTokens.h \ nsIParserNode.h \ nsIParser.h \ + nsIParserDebug.h \ + nsIParserFilter.h \ nsToken.h \ $(NULL) diff --git a/mozilla/htmlparser/src/makefile.win b/mozilla/htmlparser/src/makefile.win index 6f479747931..940db9e04f2 100644 --- a/mozilla/htmlparser/src/makefile.win +++ b/mozilla/htmlparser/src/makefile.win @@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \ nsHTMLParser.cpp prstrm.cpp EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \ - nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h + nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \ + nsIParserDebug.h nsIParserFilter.h CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\CNavDTD.obj \ @@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\nsHTMLParser.obj \ .\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \ .\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \ - .\$(OBJDIR)\nsTokenHandler.obj \ + .\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \ .\$(OBJDIR)\prstrm.obj LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib diff --git a/mozilla/htmlparser/src/nsHTMLParser.cpp b/mozilla/htmlparser/src/nsHTMLParser.cpp index 99b1d81a340..d361ceed444 100644 --- a/mozilla/htmlparser/src/nsHTMLParser.cpp +++ b/mozilla/htmlparser/src/nsHTMLParser.cpp @@ -30,6 +30,7 @@ #include "prstrm.h" #include #include "nsIInputStream.h" +#include "nsIParserFilter.h" /* UNCOMMENT THIS IF STUFF STOPS WORKING... #ifdef XP_PC @@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given"; static const char* kNullFilename= "Error: Null filename given"; static const char* kNullTokenizer = "Error: Unable to construct tokenizer"; -static char* gVerificationOutputDir=0; -static PRBool gRecordingStatistics=PR_TRUE; static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream -static char* gURLRef=0; //#define DEBUG_SAVE_SOURCE_DOC 1 #ifdef DEBUG_SAVE_SOURCE_DOC @@ -58,17 +56,6 @@ fstream* gTempStream=0; #endif -extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir) -{ - gVerificationOutputDir = verify_dir; -} - - -extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval) -{ - gRecordingStatistics = bval; -} - /** * This method is defined in nsIParser. It is used to * cause the COM-like construction of an nsHTMLParser. @@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller; */ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) { NS_INIT_REFCNT(); + mParserFilter = nsnull; mListener = nsnull; mTransferBuffer=0; mSink=0; @@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) { * @return */ nsHTMLParser::~nsHTMLParser() { - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } NS_IF_RELEASE(mListener); if(mTransferBuffer) delete [] mTransferBuffer; @@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() { delete mCurrentPos; mCurrentPos=0; if(mDTD) - delete mDTD; + NS_RELEASE(mDTD); mDTD=0; if(mScanner) delete mScanner; @@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr) return NS_OK; } +nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter) +{ + nsIParserFilter* old=mParserFilter; + if(old) + NS_RELEASE(old); + if(aFilter) { + mParserFilter=aFilter; + NS_ADDREF(aFilter); + } + return old; +} + /** * This method gets called in order to set the content * sink for this parser to dump nodes to. @@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) { mDTD=aDTD; } +nsIDTD * nsHTMLParser::GetDTD(void) { + return mDTD; +} + /** * * @@ -287,7 +286,7 @@ eParseMode DetermineParseMode() { * @param * @return */ -nsIDTD* GetDTD(eParseMode aMode) { +nsIDTD* NewDTD(eParseMode aMode) { nsIDTD* aDTD=0; switch(aMode) { case eParseMode_navigator: @@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) { default: break; } + if (aDTD) + aDTD->AddRef(); return aDTD; } @@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){ nsString theBuffer; const int kLocalBufSize=10; - if (gURLRef) - PL_strfree(gURLRef); - if (aFilename) - gURLRef = PL_strdup(aFilename); - mMajorIteration=-1; mMinorIteration=-1; @@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){ * @param aFilename -- const char* containing file to be parsed. * @return PR_TRUE if parse succeeded, PR_FALSE otherwise. */ -PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){ +PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){ NS_PRECONDITION(0!=aFilename,kNullFilename); PRInt32 status=kBadFilename; mIncremental=aIncremental; if(aFilename) { - if (gURLRef) - PL_strfree(gURLRef); - gURLRef = PL_strdup(aFilename); - mParseMode=DetermineParseMode(); - mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD; + mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD; if(mDTD) { mDTD->SetParser(this); mDTD->SetContentSink(mSink); + mDTD->SetURLRef((char *)aFilename); + mDTD->SetParserDebug(aDebug); } WillBuildModel(); @@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){ */ PRInt32 nsHTMLParser::Parse(nsIURL* aURL, nsIStreamListener* aListener, - PRBool aIncremental) { + PRBool aIncremental, + nsIParserDebug * aDebug) { NS_PRECONDITION(0!=aURL,kNullURL); PRInt32 status=kBadURL; @@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL, if(aURL) { - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } - if (aURL->GetSpec()) - gURLRef = PL_strdup(aURL->GetSpec()); - mParseMode=DetermineParseMode(); - mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD; + mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD; if(mDTD) { mDTD->SetParser(this); mDTD->SetContentSink(mSink); + mDTD->SetURLRef((char *)aURL->GetSpec()); + mDTD->SetParserDebug(aDebug); } WillBuildModel(); @@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length) } #endif + if (mParserFilter) + mParserFilter->RawBuffer(mTransferBuffer, &len); + mScanner->Append(&mTransferBuffer[offset],len); } //if diff --git a/mozilla/htmlparser/src/nsHTMLParser.h b/mozilla/htmlparser/src/nsHTMLParser.h index 1b1c8470aa3..d8638dd3b32 100644 --- a/mozilla/htmlparser/src/nsHTMLParser.h +++ b/mozilla/htmlparser/src/nsHTMLParser.h @@ -73,6 +73,8 @@ class nsIHTMLContentSink; class nsIURL; class nsIDTD; class CScanner; +class nsIParserFilter; +class nsIParserDebug; class nsHTMLParser : public nsIParser, public nsIStreamListener { @@ -103,8 +105,12 @@ friend class CTokenHandler; * @return old sink, or NULL */ virtual nsIContentSink* SetContentSink(nsIContentSink* aSink); + + virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter); virtual void SetDTD(nsIDTD* aDTD); + + virtual nsIDTD * GetDTD(void); /** * @@ -124,7 +130,8 @@ friend class CTokenHandler; */ virtual PRInt32 Parse(nsIURL* aURL, nsIStreamListener* aListener, - PRBool aIncremental=PR_TRUE); + PRBool aIncremental=PR_TRUE, + nsIParserDebug * aDebug = 0); /** * Cause parser to parse input from given file in given mode @@ -133,7 +140,7 @@ friend class CTokenHandler; * @param aMode is the desired parser mode (Nav, other, etc.) * @return TRUE if all went well -- FALSE otherwise */ - virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental); + virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0); /** * @update gess5/11/98 @@ -290,7 +297,8 @@ protected: //********************************************* nsIStreamListener* mListener; - nsIContentSink* mSink; + nsIContentSink* mSink; + nsIParserFilter* mParserFilter; nsDequeIterator* mCurrentPos; nsDequeIterator* mMarkPos; diff --git a/mozilla/htmlparser/src/nsIDTD.h b/mozilla/htmlparser/src/nsIDTD.h index 926ee926943..77ae7f9868c 100644 --- a/mozilla/htmlparser/src/nsIDTD.h +++ b/mozilla/htmlparser/src/nsIDTD.h @@ -37,6 +37,7 @@ class nsIParser; class CToken; class nsIContentSink; +class nsIParserDebug; class nsIDTD : public nsISupports { @@ -115,12 +116,28 @@ class nsIDTD : public nsISupports { /** * - * @update gess5/18/98 - * @param + * @update jevering 6/18/98 + * @param aURLRef if the current URL reference (for debugger) * @return */ - virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0; + virtual void SetURLRef(char * aURLRef) = 0; + /** + * + * @update jevering 6/18/98 + * @param aParent parent tag + * @param aChild child tag + * @return PR_TRUE if valid container + */ + virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0; + + /** + * + * @update jevering 6/18/98 + * @param aParserDebug created debug parser object + * @return + */ + virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0; }; diff --git a/mozilla/htmlparser/src/nsIParser.h b/mozilla/htmlparser/src/nsIParser.h index f509e1c909e..a4ffd45bc27 100644 --- a/mozilla/htmlparser/src/nsIParser.h +++ b/mozilla/htmlparser/src/nsIParser.h @@ -34,6 +34,7 @@ class nsString; class CToken; class nsIURL; class nsIDTD; +class nsIParserDebug; /** * This class defines the iparser interface. This XPCOM @@ -60,9 +61,10 @@ class nsIParser : public nsISupports { virtual PRInt32 Parse(nsIURL* aURL, nsIStreamListener* aListener, - PRBool aIncremental=PR_TRUE) = 0; + PRBool aIncremental=PR_TRUE, + nsIParserDebug * aDebug = 0) = 0; - virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0; + virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0; virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0; diff --git a/mozilla/htmlparser/src/nsIParserDebug.h b/mozilla/htmlparser/src/nsIParserDebug.h new file mode 100644 index 00000000000..a1e45204291 --- /dev/null +++ b/mozilla/htmlparser/src/nsIParserDebug.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * @update gess 4/8/98 + * + * + */ + +#ifndef NS_IPARSERDEBUG__ +#define NS_IPARSERDEBUG__ + +#include "nsISupports.h" +#include "nsHTMLTokens.h" +#include "prtypes.h" + +#define NS_IPARSERDEBUG_IID \ + {0x7b68c220, 0x0685, 0x11d2, \ + {0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}} + + +class nsIDTD; +class nsHTMLParser; + +class nsIParserDebug : public nsISupports { + +public: + + virtual void SetVerificationDirectory(char * verify_dir) = 0; + + virtual void SetRecordStatistics(PRBool bval) = 0; + + virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0; + + virtual void DumpVectorRecord(void) = 0; + +}; + +extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult); + +#endif /* NS_IPARSERDEBUG__ */ \ No newline at end of file diff --git a/mozilla/htmlparser/src/nsIParserFilter.h b/mozilla/htmlparser/src/nsIParserFilter.h new file mode 100644 index 00000000000..8b257515efc --- /dev/null +++ b/mozilla/htmlparser/src/nsIParserFilter.h @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * @update jevering 6/17/98 + * + */ + +#ifndef IPARSERFILTER +#define IPARSERFILTER + +#include "nsISupports.h" + +class CToken; + +#define NS_IPARSERFILTER_IID \ + {0x14d6ff0, 0x0610, 0x11d2, \ + {0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}} + + +class nsIParserFilter : public nsISupports { + public: + + NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0; + + NS_IMETHOD WillAddToken(CToken & token) = 0; + + NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0; +}; + +extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult); + + +#endif + diff --git a/mozilla/htmlparser/src/nsParserDebug.cpp b/mozilla/htmlparser/src/nsParserDebug.cpp new file mode 100644 index 00000000000..e998807e309 --- /dev/null +++ b/mozilla/htmlparser/src/nsParserDebug.cpp @@ -0,0 +1,534 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * @update jevering 06/18/98 + * + * This file contains the parser debugger object which aids in + * walking links and reporting statistic information, reporting + * bad vectors. + */ + +#include "CNavDTD.h" +#include "nsHTMLTokens.h" +#include "nsHTMLParser.h" +#include "nsIParserDebug.h" +#include "nsCRT.h" +#include "prenv.h" //this is here for debug reasons... +#include "prtypes.h" //this is here for debug reasons... +#include "prio.h" +#include "plstr.h" +#include "prstrm.h" +#include +#include +#include "prmem.h" + +#define CONTEXT_VECTOR_MAP "/vector.map" +#define CONTEXT_VECTOR_STAT "/vector.stat" +#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" + +// structure to store the vector statistic information + +typedef struct vector_info { + PRInt32 references; // number of occurances counted + PRInt32 count; // number of tags in the vector + PRBool good_vector; // is this a valid vector? + eHTMLTags* vector; // and the vector +} VectorInfo; + +// the statistic vector table grows each time it exceeds this +// stepping value +#define TABLE_SIZE 128 + +class CParserDebug : public nsIParserDebug { +public: + + CParserDebug(char * aVerifyDir = 0); + ~CParserDebug(); + + NS_DECL_ISUPPORTS + + void SetVerificationDirectory(char * verify_dir); + void SetRecordStatistics(PRBool bval); + PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef); + void DumpVectorRecord(void); + + // global table for storing vector statistics and the size + +private: + VectorInfo ** mVectorInfoArray; + PRInt32 mVectorCount; + char * mVerificationDir; + PRBool mRecordingStatistics; + + PRBool DebugRecord(char * path, char * pURLRef, char * filename); + void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector); + void MakeVectorString(char * vector_string, VectorInfo * pInfo); +}; + +static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); +static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID); + +/** + * This method is defined in nsIParser. It is used to + * cause the COM-like construction of an nsHTMLParser. + * + * @update jevering 3/25/98 + * @param nsIParser** ptr to newly instantiated parser + * @return NS_xxx error result + */ + +NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult) +{ + CParserDebug *it = new CParserDebug(); + + if (it == 0) { + return NS_ERROR_OUT_OF_MEMORY; + } + + return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult); +} + +CParserDebug::CParserDebug(char * aVerifyDir) +{ + NS_INIT_REFCNT(); + mVectorInfoArray = 0; + mVectorCount = 0; + if (aVerifyDir) + mVerificationDir = PL_strdup(aVerifyDir); + else { + char * pString = PR_GetEnv("VERIFY_PARSER"); + if (pString) + mVerificationDir = PL_strdup(pString); + else + mVerificationDir = 0; + } + mRecordingStatistics = PR_TRUE; +} + +CParserDebug::~CParserDebug() +{ + if (mVerificationDir) + PL_strfree(mVerificationDir); +} + +/** + * This method gets called as part of our COM-like interfaces. + * Its purpose is to create an interface to parser object + * of some type. + * + * @update gess 4/8/98 + * @param nsIID id of object to discover + * @param aInstancePtr ptr to newly discovered interface + * @return NS_xxx result code + */ +nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr) +{ + if (NULL == aInstancePtr) { + return NS_ERROR_NULL_POINTER; + } + + if(aIID.Equals(kISupportsIID)) { //do IUnknown... + *aInstancePtr = (nsIParserDebug*)(this); + } + else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class... + *aInstancePtr = (nsIParserDebug*)(this); + } + else { + *aInstancePtr=0; + return NS_NOINTERFACE; + } + ((nsISupports*) *aInstancePtr)->AddRef(); + return NS_OK; +} + +NS_IMPL_ADDREF(CParserDebug) +NS_IMPL_RELEASE(CParserDebug) + +void CParserDebug::SetVerificationDirectory(char * verify_dir) +{ + if (mVerificationDir) { + PL_strfree(mVerificationDir); + mVerificationDir = 0; + } + mVerificationDir = PL_strdup(verify_dir); +} + +void CParserDebug::SetRecordStatistics(PRBool bval) +{ + mRecordingStatistics = bval; +} + +/** + * This debug method records an invalid context vector and it's + * associated context vector and URL in a simple flat file mapping which + * resides in the verification directory and is named context.map + * + * @update jevering 6/06/98 + * @param path is the directory structure indicating the bad context vector + * @param pURLRef is the associated URL + * @param filename to record mapping to if not already recorded + * @return TRUE if it is already record (dont rerecord) + */ + +PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename) +{ + char recordPath[2048]; + PRIntn oflags = 0; + + // create the record file name from the verification director + // and the default name. + strcpy(recordPath,mVerificationDir); + strcat(recordPath,CONTEXT_VECTOR_MAP); + + // create the file exists, only open for read/write + // otherwise, create it + if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) + oflags = PR_CREATE_FILE; + oflags |= PR_RDWR; + + // open the record file + PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); + + if (recordFile) { + + char * string = (char *)PR_Malloc(2048); + PRBool found = PR_FALSE; + + // vectors are stored on the format iof "URL vector filename" + // where the vector contains the verification path and + // the filename contains the debug source dump + sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); + + // get the file size, read in the file and parse it line at + // a time to check to see if we have already recorded this + // occurance + + PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); + if (iSize) { + + char * buffer = (char*)PR_Malloc(iSize); + char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); + if (buffer!=NULL && string!=NULL) { + PRInt32 ibufferpos, istringpos; + + // beginning of file for read + PR_Seek(recordFile,0,PR_SEEK_SET); + PR_Read(recordFile,buffer,iSize); + + // run through the file looking for a matching vector + for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) + { + // compare string once we have hit the end of the line + if (buffer[ibufferpos] == '\r') { + stringbuf[istringpos] = '\0'; + istringpos = 0; + // skip newline and space + ibufferpos++; + + if (PL_strlen(stringbuf)) { + char * space; + // chop of the filename for compare + if ((space = PL_strrchr(stringbuf, ' '))!=NULL) + *space = '\0'; + + // we have already recorded this one, free up, and return + if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { + PR_Free(buffer); + PR_Free(stringbuf); + PR_Free(string); + return PR_TRUE; + } + } + } + + // build up the compare string + else + stringbuf[istringpos++] = buffer[ibufferpos]; + } + + // throw away the record file data + PR_Free(buffer); + PR_Free(stringbuf); + } + } + + // if this bad vector was not recorded, add it to record file + + if (!found) { + PR_Seek(recordFile,0,PR_SEEK_END); + PR_Write(recordFile,string,PL_strlen(string)); + } + + PR_Close(recordFile); + PR_Free(string); + } + + // vector was not recorded + return PR_FALSE; +} + +/** + * compare function for quick sort. Compares references and + * sorts in decending order + */ + +static int compare( const void *arg1, const void *arg2 ) +{ + VectorInfo ** p1 = (VectorInfo**)arg1; + VectorInfo ** p2 = (VectorInfo**)arg2; + return (*p2)->references - (*p1)->references; +} + +/** + * This debug routines stores statistical information about a + * context vector. The context vector statistics are stored in + * a global array. The table is resorted each time it grows to + * aid in lookup speed. If a vector has already been noted, its + * reference count is bumped, otherwise it is added to the table + * + * @update jevering 6/11/98 + * @param aTags is the tag list (vector) + * @param count is the size of the vector + * @return + */ + +void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector) +{ + // if the table doesn't exist, create it + if (!mVectorInfoArray) { + mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); + } + else { + // attempt to look up the vector + for (PRInt32 i = 0; i < mVectorCount; i++) + + // check the vector only if they are the same size, if they + // match then just return without doing further work + if (mVectorInfoArray[i]->count == count) + if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) { + + // bzzzt. and we have a winner.. bump the ref count + mVectorInfoArray[i]->references++; + return; + } + } + + // the context vector hasn't been noted, so allocate it and + // initialize it one.. add it to the table + VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); + pVectorInfo->references = 1; + pVectorInfo->count = count; + pVectorInfo->good_vector = good_vector; + pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags)); + memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count); + mVectorInfoArray[mVectorCount++] = pVectorInfo; + + // have we maxed out the table? grow it.. sort it.. love it. + if ((mVectorCount % TABLE_SIZE) == 0) { + mVectorInfoArray = (VectorInfo**)realloc( + mVectorInfoArray, + (sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); + if (mVectorCount) { + qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare); + } + } +} + +void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo) +{ + sprintf (vector_string, "%6d ", pInfo->references); + for (PRInt32 j = 0; j < pInfo->count; j++) { + PL_strcat(vector_string, "<"); + PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); + PL_strcat(vector_string, ">"); + } + PL_strcat(vector_string,"\r\n"); +} + +/** + * This debug routine dumps out the vector statistics to a text + * file in the verification directory and defaults to the name + * "vector.stat". It contains all parsed context vectors and there + * occurance count sorted in decending order. + * + * @update jevering 6/11/98 + * @param + * @return + */ + +void CParserDebug::DumpVectorRecord(void) +{ + // do we have a table? + if (mVectorCount) { + + // hopefully, they wont exceed 1K. + char vector_string[1024]; + char path[1024]; + + path[0] = '\0'; + + // put in the verification directory.. else the root + if (mVerificationDir) + strcpy(path,mVerificationDir); + + strcat(path,CONTEXT_VECTOR_STAT); + + // open the stat file creaming any existing stat file + PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); + if (statisticFile) { + + PRInt32 i; + PRofstream ps; + ps.attach(statisticFile); + + // oh what the heck, sort it again + if (mVectorCount) { + qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare); + } + + // cute little header + sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount); + ps << vector_string; + + ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; + ps << VECTOR_TABLE_HEADER; + + // dump out the bad vectors encountered + for (i = 0; i < mVectorCount; i++) { + if (!mVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, mVectorInfoArray[i]); + ps << vector_string; + } + } + + ps << "\r\n\r\nValid context vector summary\r\n"; + ps << VECTOR_TABLE_HEADER; + + // take a big vector table dump (good vectors) + for (i = 0; i < mVectorCount; i++) { + if (mVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, mVectorInfoArray[i]); + ps << vector_string; + } + // free em up. they mean nothing to me now (I'm such a user) + + if (mVectorInfoArray[i]->vector) + PR_Free(mVectorInfoArray[i]->vector); + PR_Free(mVectorInfoArray[i]); + } + } + + // ok, we are done with the table, free it up as well + PR_Free(mVectorInfoArray); + mVectorInfoArray = 0; + mVectorCount = 0; + PR_Close(statisticFile); + } +} + + +/** + * This debug method allows us to determine whether or not + * we've seen (and can handle) the given context vector. + * + * @update gess4/22/98 + * @param tags is an array of eHTMLTags + * @param count represents the number of items in the tags array + * @param aDTD is the DTD we plan to ask for verification + * @return TRUE if we know how to handle it, else false + */ + +PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef) +{ + PRBool result=PR_TRUE; + + //ok, now see if we understand this vector + + if(0!=mVerificationDir || mRecordingStatistics) { + + if(aDTD && aContextStackPos>1) { + for (int i = 0; i < aContextStackPos-1; i++) + if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) { + result = PR_FALSE; + break; + } + } + } + + if (mRecordingStatistics) { + NoteVector(aContextStack,aContextStackPos,result); + } + + if(0!=mVerificationDir) { + char path[2048]; + strcpy(path,mVerificationDir); + + int i=0; + for(i=0;iDebugDumpSource(ps); + PR_Close(debugFile); + } + } + } + } + + return result; +} diff --git a/mozilla/htmlparser/src/nsTokenizer.cpp b/mozilla/htmlparser/src/nsTokenizer.cpp new file mode 100644 index 00000000000..dacfe58838c --- /dev/null +++ b/mozilla/htmlparser/src/nsTokenizer.cpp @@ -0,0 +1,327 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + + +#include +#include "nsTokenizer.h" +#include "nsToken.h" +#include "nsScanner.h" +#include "nsIParserFilter.h" +#include "nsIURL.h" + +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} + +/** + * Default constructor + * + * @update gess 3/25/98 + * @param aFilename -- name of file to be tokenized + * @param aDelegate -- ref to delegate to be used to tokenize + * @return + */ +CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) : + mTokenDeque(PR_TRUE,TokenFreeProc) { + mParserFilter = aIFilter; + mDelegate=aDelegate; + mScanner=new CScanner(aURL,aMode); + mParseMode=aMode; +} + +/** + * Default constructor + * + * @update gess 3/25/98 + * @param aFilename -- name of file to be tokenized + * @param aDelegate -- ref to delegate to be used to tokenize + * @return + */ +CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) : + mTokenDeque(PR_TRUE,TokenFreeProc) { + mParserFilter = aIFilter; + mDelegate=aDelegate; + mScanner=new CScanner(aFilename,aMode); + mParseMode=aMode; +} + +/** + * Default constructor + * + * @update gess 3/25/98 + * @param aFilename -- name of file to be tokenized + * @param aDelegate -- ref to delegate to be used to tokenize + * @return + */ +CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) : + mTokenDeque(PR_TRUE,TokenFreeProc) { + mParserFilter = aIFilter; + mDelegate=aDelegate; + mScanner=new CScanner(aMode); + mParseMode=aMode; +} + +/** + * default destructor + * + * @update gess 3/25/98 + * @param + * @return + */ +CTokenizer::~CTokenizer() { + delete mScanner; + mDelegate->Destroy(); + mScanner=0; +} + + +/** + * + * + * @update gess 5/13/98 + * @param + * @return + */ +PRBool CTokenizer::Append(nsString& aBuffer) { + if(mScanner) + return mScanner->Append(aBuffer); + return PR_FALSE; +} + + +/** + * + * + * @update gess 5/21/98 + * @param + * @return + */ +PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){ + if(mScanner) + return mScanner->Append(aBuffer,aLen); + return PR_FALSE; +} + +/** + * Retrieve a reference to the internal token deque. + * + * @update gess 4/20/98 + * @return deque reference + */ +nsDeque& CTokenizer::GetDeque(void) { + return mTokenDeque; +} + +/** + * Cause the tokenizer to consume the next token, and + * return an error result. + * + * @update gess 3/25/98 + * @param anError -- ref to error code + * @return new token or null + */ +PRInt32 CTokenizer::GetToken(CToken*& aToken) { + PRInt32 result=mDelegate->GetToken(*mScanner,aToken); + return result; +} + +/** + * Retrieve the number of elements in the deque + * + * @update gess 3/25/98 + * @param + * @return int containing element count + */ +PRInt32 CTokenizer::GetSize(void) { + return mTokenDeque.GetSize(); +} + + +/** + * Part of the code sandwich, this gets called right before + * the tokenization process begins. The main reason for + * this call is to allow the delegate to do initialization. + * + * @update gess 3/25/98 + * @param + * @return TRUE if it's ok to proceed + */ +PRBool CTokenizer::WillTokenize(PRBool aIncremental){ + PRBool result=PR_TRUE; + result=mDelegate->WillTokenize(aIncremental); + return result; +} + +/** + * + * @update gess 3/25/98 + * @return TRUE if it's ok to proceed + */ +PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){ + CToken* theToken=0; + PRInt32 result=kNoError; + + WillTokenize(PR_TRUE); + + while(kNoError==result) { + result=GetToken(theToken); + if(theToken && (kNoError==result)) { + +#ifdef VERBOSE_DEBUG + theToken->DebugDumpToken(cout); +#endif + + PRBool bWillAdd = PR_TRUE; + if (mParserFilter) + bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken); + if(bWillAdd && mDelegate->WillAddToken(*theToken)) { + mTokenDeque.Push(theToken); + } + } + else if (theToken) + delete theToken; + } + if(kEOF==result) + result=kNoError; + DidTokenize(PR_TRUE); + return result; +} + +/** + * This is the primary control routine. It iteratively + * consumes tokens until an error occurs or you run out + * of data. + * + * @update gess 3/25/98 + * @return error code + */ +PRInt32 CTokenizer::Tokenize(int anIteration) { + CToken* theToken=0; + PRInt32 result=kNoError; + PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE; + + + while((PR_FALSE==done) && (kNoError==result)) { + mScanner->Mark(); + result=GetToken(theToken); + if(kNoError==result) { + if(theToken) { + + #ifdef VERBOSE_DEBUG + theToken->DebugDumpToken(cout); + #endif + + PRBool bWillAdd = PR_TRUE; + if (mParserFilter) + bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken); + if(bWillAdd && mDelegate->WillAddToken(*theToken)) { + mTokenDeque.Push(theToken); + } + } + + } + else { + if(theToken) + delete theToken; + mScanner->RewindToMark(); + } + } + if((PR_TRUE==done) && (kInterrupted!=result)) + DidTokenize(PR_TRUE); + return result; +} + +/** + * This is the tail-end of the code sandwich for the + * tokenization process. It gets called once tokenziation + * has completed. + * + * @update gess 3/25/98 + * @param + * @return TRUE if all went well + */ +PRBool CTokenizer::DidTokenize(PRBool aIncremental) { + PRBool result=mDelegate->DidTokenize(aIncremental); + +#ifdef VERBOSE_DEBUG + DebugDumpTokens(cout); +#endif + + return result; +} + +/** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ +void CTokenizer::DebugDumpTokens(ostream& out) { + nsDequeIterator b=mTokenDeque.Begin(); + nsDequeIterator e=mTokenDeque.End(); + + CToken* theToken; + while(b!=e) { + theToken=(CToken*)(b++); + theToken->DebugDumpToken(out); + } +} + + +/** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ +void CTokenizer::DebugDumpSource(ostream& out) { + nsDequeIterator b=mTokenDeque.Begin(); + nsDequeIterator e=mTokenDeque.End(); + + CToken* theToken; + while(b!=e) { + theToken=(CToken*)(b++); + theToken->DebugDumpSource(out); + } + +} + + +/** + * + * + * @update gess 3/25/98 + * @param + * @return + */ +void CTokenizer::SelfTest(void) { +#ifdef _DEBUG +#endif +} + + diff --git a/mozilla/htmlparser/src/nsTokenizer.h b/mozilla/htmlparser/src/nsTokenizer.h new file mode 100644 index 00000000000..7d54555ef6f --- /dev/null +++ b/mozilla/htmlparser/src/nsTokenizer.h @@ -0,0 +1,185 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * LAST MODS: gess 28Feb98 + * + * This file declares the basic tokenizer class. The + * central theme of this class is to control and + * coordinate a tokenization process. Note that this + * class is grammer-neutral: this class doesn't care + * at all what the underlying stream consists of. + * + * The main purpose of this class is to iterate over an + * input stream with the help of a given scanner and a + * given type-specific tokenizer-Delegate. + * + * The primary method here is the tokenize() method, which + * simple loops calling getToken() until an EOF condition + * (or some other error) occurs. + * + */ + + +#ifndef TOKENIZER +#define TOKENIZER + +#include "nsToken.h" +#include "nsITokenizerDelegate.h" +#include "nsDeque.h" +#include + +class CScanner; +class nsIURL; +class nsIParserFilter; + +class CTokenizer { + public: + + CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0); + CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0); + CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0); + + ~CTokenizer(); + + /** + * This method incrementally tokenizes as much content as + * it can get its hands on. + * + * @update gess 3/25/98 + * @return TRUE if it's ok to proceed + */ + PRInt32 Tokenize(int anIteration); //your friendly incremental version + + /** + * + * @update gess 3/25/98 + * @return TRUE if it's ok to proceed + */ + PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE); + + /** + * Cause the tokenizer to consume the next token, and + * return an error result. + * + * @update gess 3/25/98 + * @param anError -- ref to error code + * @return new token or null + */ + PRInt32 GetToken(CToken*& aToken); + + /** + * Retrieve the number of elements in the deque + * + * @update gess 3/25/98 + * @return int containing element count + */ + PRInt32 GetSize(void); + + /** + * Retrieve a reference to the internal token deque. + * + * @update gess 4/20/98 + * @return deque reference + */ + nsDeque& GetDeque(void); + + /** + * + * @update gess 4/20/98 + * @return deque reference + */ + PRBool Append(nsString& aBuffer); + + /** + * + * @update gess 4/20/98 + * @return deque reference + */ + PRBool Append(const char* aBuffer, PRInt32 aLen); + + + /** + * + * + * @update gess 5/13/98 + * @param + * @return + */ + PRBool SetBuffer(nsString& aBuffer); + + /** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ + void DebugDumpSource(ostream& out); + + /** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ + void DebugDumpTokens(ostream& out); + + static void SelfTest(); + + protected: + + /** + * This is the front-end of the code sandwich for the + * tokenization process. It gets called once just before + * tokenziation begins. + * + * @update gess 3/25/98 + * @param aIncremental tells us if tokenization is incremental + * @return TRUE if all went well + */ + PRBool WillTokenize(PRBool aIncremental); + + + /** + * This is the tail-end of the code sandwich for the + * tokenization process. It gets called once tokenziation + * has completed. + * + * @update gess 3/25/98 + * @param aIncremental tells us if tokenization was incremental + * @return TRUE if all went well + */ + PRBool DidTokenize(PRBool aIncremental); + + ITokenizerDelegate* mDelegate; + CScanner* mScanner; + nsDeque mTokenDeque; + eParseMode mParseMode; + nsIParserFilter* mParserFilter; +}; + +#endif + + diff --git a/mozilla/parser/htmlparser/src/CNavDTD.cpp b/mozilla/parser/htmlparser/src/CNavDTD.cpp index ea0844b5856..4547755d1fb 100644 --- a/mozilla/parser/htmlparser/src/CNavDTD.cpp +++ b/mozilla/parser/htmlparser/src/CNavDTD.cpp @@ -31,6 +31,7 @@ * */ +#include "nsIParserDebug.h" #include "CNavDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" @@ -43,13 +44,10 @@ #include "prtypes.h" //this is here for debug reasons... #include "prio.h" #include "plstr.h" -#include "prstrm.h" -#include #ifdef XP_PC #include //this is here for debug reasons... #endif -#include #include "prmem.h" @@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer"; static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; -static char* gVerificationOutputDir=0; -static char* gURLRef=0; static nsAutoString gEmpty; static char formElementTags[]= { @@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller; * @return */ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { + NS_INIT_REFCNT(); mParser=0; + mURLRef=0; + mParserDebug=0; nsCRT::zero(mLeafBits,sizeof(mLeafBits)); nsCRT::zero(mContextStack,sizeof(mContextStack)); nsCRT::zero(mStyleStack,sizeof(mStyleStack)); nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers)); mContextStackPos=0; mStyleStackPos=0; - gURLRef = 0; mHasOpenForm=PR_FALSE; mHasOpenMap=PR_FALSE; - gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER"); InitializeDefaultTokenHandlers(); } @@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { */ CNavDTD::~CNavDTD(){ DeleteTokenHandlers(); - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } + if (mURLRef) + PL_strfree(mURLRef); + if (mParserDebug) + NS_RELEASE(mParserDebug); // NS_RELEASE(mSink); } @@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){ if(aHandler) { result=(*aHandler)(theToken,this); - Verify("xxx",PR_TRUE); + if (mParserDebug) + mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef); } }//if @@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const * @param aChild -- tag enum of child container * @return PR_TRUE if parent can contain child */ -PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { +PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) { PRBool result=PR_FALSE; @@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { //handle form elements (this is very much a WIP!!!) if(0!=strchr(formElementTags,aChild)){ - return CanContainFormElement(aParent,aChild); + return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild); } - switch(aParent) { + switch((eHTMLTags)aParent) { case eHTMLTag_a: case eHTMLTag_acronym: result=PRBool(0!=strchr(gTagSet1,aChild)); break; @@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{ * @param aChild -- tag type of child * @return TRUE if propagation closes; false otherwise */ -PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const { +PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) { PRBool result=PR_FALSE; switch(aParentTag) { @@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag //otherwise, intentionally fall through... case eHTMLTag_tr: - if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) { + if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) { aVector.Append((PRUnichar)eHTMLTag_td); result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td); // result=PR_TRUE; @@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){ return; } - -/************************************************************************ - Here's a bunch of stuff JEvering put into the parser to do debugging. - ************************************************************************/ - -/** - * This debug method records an invalid context vector and it's - * associated context vector and URL in a simple flat file mapping which - * resides in the verification directory and is named context.map - * - * @update jevering 6/06/98 - * @param path is the directory structure indicating the bad context vector - * @param pURLRef is the associated URL - * @param filename to record mapping to if not already recorded - * @return TRUE if it is already record (dont rerecord) - */ - -#define CONTEXT_VECTOR_MAP "/vector.map" -#define CONTEXT_VECTOR_STAT "/vector.stat" -#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" -static PRBool DebugRecord(char * path, char * pURLRef, char * filename) -{ - char recordPath[2048]; - PRIntn oflags = 0; - - // create the record file name from the verification director - // and the default name. - strcpy(recordPath,gVerificationOutputDir); - strcat(recordPath,CONTEXT_VECTOR_MAP); - - // create the file exists, only open for read/write - // otherwise, create it - if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) - oflags = PR_CREATE_FILE; - oflags |= PR_RDWR; - - // open the record file - PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); - - if (recordFile) { - - char * string = (char *)PR_Malloc(2048); - PRBool found = PR_FALSE; - - // vectors are stored on the format iof "URL vector filename" - // where the vector contains the verification path and - // the filename contains the debug source dump - sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); - - // get the file size, read in the file and parse it line at - // a time to check to see if we have already recorded this - // occurance - - PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); - if (iSize) { - - char * buffer = (char*)PR_Malloc(iSize); - char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); - if (buffer!=NULL && string!=NULL) { - PRInt32 ibufferpos, istringpos; - - // beginning of file for read - PR_Seek(recordFile,0,PR_SEEK_SET); - PR_Read(recordFile,buffer,iSize); - - // run through the file looking for a matching vector - for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) - { - // compare string once we have hit the end of the line - if (buffer[ibufferpos] == '\r') { - stringbuf[istringpos] = '\0'; - istringpos = 0; - // skip newline and space - ibufferpos++; - - if (PL_strlen(stringbuf)) { - char * space; - // chop of the filename for compare - if ((space = PL_strrchr(stringbuf, ' '))!=NULL) - *space = '\0'; - - // we have already recorded this one, free up, and return - if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { - PR_Free(buffer); - PR_Free(stringbuf); - PR_Free(string); - return PR_TRUE; - } - } - } - - // build up the compare string - else - stringbuf[istringpos++] = buffer[ibufferpos]; - } - - // throw away the record file data - PR_Free(buffer); - PR_Free(stringbuf); - } - } - - // if this bad vector was not recorded, add it to record file - - if (!found) { - PR_Seek(recordFile,0,PR_SEEK_END); - PR_Write(recordFile,string,PL_strlen(string)); - } - - PR_Close(recordFile); - PR_Free(string); +void CNavDTD::SetURLRef(char * aURLRef){ + if (mURLRef) { + PL_strfree(mURLRef); + mURLRef=0; } - - // vector was not recorded - return PR_FALSE; + if (aURLRef) + mURLRef = PL_strdup(aURLRef); } -// structure to store the vector statistic information - -typedef struct vector_info { - PRInt32 references; // number of occurances counted - PRInt32 count; // number of tags in the vector - PRBool good_vector; // is this a valid vector? - eHTMLTags* vector; // and the vector -} VectorInfo; - -// global table for storing vector statistics and the size -static VectorInfo ** gVectorInfoArray = 0; -static PRInt32 gVectorCount = 0; - -// the statistic vector table grows each time it exceeds this -// stepping value -#define TABLE_SIZE 128 - -// compare function for quick sort. Compares references and -// sorts in decending order - -static int compare( const void *arg1, const void *arg2 ) +void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug) { - VectorInfo ** p1 = (VectorInfo**)arg1; - VectorInfo ** p2 = (VectorInfo**)arg2; - return (*p2)->references - (*p1)->references; -} - - -/** - * This debug routines stores statistical information about a - * context vector. The context vector statistics are stored in - * a global array. The table is resorted each time it grows to - * aid in lookup speed. If a vector has already been noted, its - * reference count is bumped, otherwise it is added to the table - * - * @update jevering 6/11/98 - * @param aTags is the tag list (vector) - * @param count is the size of the vector - * @return - */ - -static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector) -{ - // if the table doesn't exist, create it - if (!gVectorInfoArray) { - gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); - } - else { - // attempt to look up the vector - for (PRInt32 i = 0; i < gVectorCount; i++) - - // check the vector only if they are the same size, if they - // match then just return without doing further work - if (gVectorInfoArray[i]->count == count) - if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) { - - // bzzzt. and we have a winner.. bump the ref count - gVectorInfoArray[i]->references++; - return; - } - } - - // the context vector hasn't been noted, so allocate it and - // initialize it one.. add it to the table - VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); - pVectorInfo->references = 1; - pVectorInfo->count = count; - pVectorInfo->good_vector = good_vector; - pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags)); - memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count); - gVectorInfoArray[gVectorCount++] = pVectorInfo; - - // have we maxed out the table? grow it.. sort it.. love it. - if ((gVectorCount % TABLE_SIZE) == 0) { - gVectorInfoArray = (VectorInfo**)realloc( - gVectorInfoArray, - (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); - if (gVectorCount) { - qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); - } - } -} - -static void MakeVectorString(char * vector_string, VectorInfo * pInfo) -{ - sprintf (vector_string, "%6d ", pInfo->references); - for (PRInt32 j = 0; j < pInfo->count; j++) { - PL_strcat(vector_string, "<"); - PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); - PL_strcat(vector_string, ">"); - } - PL_strcat(vector_string,"\r\n"); -} - -/** - * This debug routine dumps out the vector statistics to a text - * file in the verification directory and defaults to the name - * "vector.stat". It contains all parsed context vectors and there - * occurance count sorted in decending order. - * - * @update jevering 6/11/98 - * @param - * @return - */ - -extern "C" NS_EXPORT void DumpVectorRecord(void) -{ - // do we have a table? - if (gVectorCount) { - - // hopefully, they wont exceed 1K. - char vector_string[1024]; - char path[1024]; - - path[0] = '\0'; - - // put in the verification directory.. else the root - if (gVerificationOutputDir) - strcpy(path,gVerificationOutputDir); - - strcat(path,CONTEXT_VECTOR_STAT); - - // open the stat file creaming any existing stat file - PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); - if (statisticFile) { - - PRInt32 i; - PRofstream ps; - ps.attach(statisticFile); - - // oh what the heck, sort it again - if (gVectorCount) { - qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); - } - - // cute little header - sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); - ps << vector_string; - - ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; - ps << VECTOR_TABLE_HEADER; - - // dump out the bad vectors encountered - for (i = 0; i < gVectorCount; i++) { - if (!gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - } - - ps << "\r\n\r\nValid context vector summary\r\n"; - ps << VECTOR_TABLE_HEADER; - - // take a big vector table dump (good vectors) - for (i = 0; i < gVectorCount; i++) { - if (gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - // free em up. they mean nothing to me now (I'm such a user) - - if (gVectorInfoArray[i]->vector) - PR_Free(gVectorInfoArray[i]->vector); - PR_Free(gVectorInfoArray[i]); - } - } - - // ok, we are done with the table, free it up as well - PR_Free(gVectorInfoArray); - gVectorInfoArray = 0; - gVectorCount = 0; - PR_Close(statisticFile); + if (aParserDebug) { + mParserDebug = aParserDebug; + NS_ADDREF(mParserDebug); } } - - -/** - * This debug method allows us to determine whether or not - * we've seen (and can handle) the given context vector. - * - * @update gess4/22/98 - * @param tags is an array of eHTMLTags - * @param count represents the number of items in the tags array - * @param aDTD is the DTD we plan to ask for verification - * @return TRUE if we know how to handle it, else false - */ -PRBool CNavDTD::VerifyContextVector(void) const { - - PRBool result=PR_TRUE; - - if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; - strcpy(path,gVerificationOutputDir); -#endif - - int i=0; - for(i=0;iDebugDumpSource(ps); - PR_Close(debugFile); - } - } - } - } - - return result; -} diff --git a/mozilla/parser/htmlparser/src/CNavDTD.h b/mozilla/parser/htmlparser/src/CNavDTD.h index 9883a4483e6..cdaa93d9dd9 100644 --- a/mozilla/parser/htmlparser/src/CNavDTD.h +++ b/mozilla/parser/htmlparser/src/CNavDTD.h @@ -42,6 +42,7 @@ class nsHTMLParser; class nsIHTMLContentSink; +class nsIParserDebug; class CNavDTD : public nsIDTD { @@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD { * of one type can contain a tag of another type. * * @update gess 3/25/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container + * @param aParent -- int tag of parent container + * @param aChild -- int tag of child container * @return PR_TRUE if parent can contain child */ - virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const; + virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild); /** * This method is called to determine whether or not a tag @@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD { */ virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const; - /** - * This method gets called at various times by the parser - * whenever we want to verify a valid context stack. This - * method also gives us a hook to add debugging metrics. - * - * @update gess4/6/98 - * @param aStack[] array of ints (tokens) - * @param aCount number of elements in given array - * @return TRUE if stack is valid, else FALSE + * + * @update jevering 6/18/98 + * @param aURLRef if the current URL reference (for debugger) + * @return */ - virtual PRBool VerifyContextVector(void) const; + virtual void SetURLRef(char * aURLRef); /** * - * @update gess5/18/98 - * @param + * @update jevering 6/18/98 + * @param aParserDebug created debug parser object * @return */ - virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats); + virtual void SetParserDebug(nsIParserDebug * aParserDebug); /** * This method tries to design a context map (without actually @@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD { * @param aChild -- tag type of child * @return True if closure was achieved -- other false */ - virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const; + virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag); /** * This method tries to design a context map (without actually @@ -699,7 +695,8 @@ protected: PRBool mHasOpenForm; PRBool mHasOpenMap; nsDeque mTokenDeque; - + char* mURLRef; + nsIParserDebug* mParserDebug; }; diff --git a/mozilla/parser/htmlparser/src/COtherDTD.cpp b/mozilla/parser/htmlparser/src/COtherDTD.cpp index 47ecc728678..ccbf97c725a 100644 --- a/mozilla/parser/htmlparser/src/COtherDTD.cpp +++ b/mozilla/parser/htmlparser/src/COtherDTD.cpp @@ -31,6 +31,7 @@ * */ +#include "nsIParserDebug.h" #include "COtherDTD.h" #include "nsHTMLTokens.h" #include "nsCRT.h" @@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer"; static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; -static char* gVerificationOutputDir=0; -static char* gURLRef=0; static nsAutoString gEmpty; static char formElementTags[]= { @@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller; * @return */ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { + NS_INIT_REFCNT(); mParser=0; + mURLRef=0; + mParserDebug=0; nsCRT::zero(mLeafBits,sizeof(mLeafBits)); nsCRT::zero(mContextStack,sizeof(mContextStack)); nsCRT::zero(mStyleStack,sizeof(mStyleStack)); nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers)); mContextStackPos=0; mStyleStackPos=0; - gURLRef = 0; mHasOpenForm=PR_FALSE; mHasOpenMap=PR_FALSE; - gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER"); InitializeDefaultTokenHandlers(); } @@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) { */ COtherDTD::~COtherDTD(){ DeleteTokenHandlers(); - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } + if (mURLRef) + PL_strfree(mURLRef); + if (mParserDebug) + NS_RELEASE(mParserDebug); // NS_RELEASE(mSink); } @@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){ if(aHandler) { result=(*aHandler)(theToken,this); - Verify("xxx",PR_TRUE); + if (mParserDebug) + mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef); } }//if @@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons * @param aChild -- tag enum of child container * @return PR_TRUE if parent can contain child */ -PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { +PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) { PRBool result=PR_FALSE; @@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const { //handle form elements (this is very much a WIP!!!) if(0!=strchr(formElementTags,aChild)){ - return CanContainFormElement(aParent,aChild); + return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild); } - switch(aParent) { + switch((eHTMLTags)aParent) { case eHTMLTag_a: case eHTMLTag_acronym: result=PRBool(0!=strchr(gTagSet1,aChild)); break; @@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{ * @param aChild -- tag type of child * @return TRUE if propagation closes; false otherwise */ -PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const { +PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) { PRBool result=PR_FALSE; switch(aParentTag) { @@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){ return; } - -/************************************************************************ - Here's a bunch of stuff JEvering put into the parser to do debugging. - ************************************************************************/ - -/** - * This debug method records an invalid context vector and it's - * associated context vector and URL in a simple flat file mapping which - * resides in the verification directory and is named context.map - * - * @update jevering 6/06/98 - * @param path is the directory structure indicating the bad context vector - * @param pURLRef is the associated URL - * @param filename to record mapping to if not already recorded - * @return TRUE if it is already record (dont rerecord) - */ - -#define CONTEXT_VECTOR_MAP "/vector.map" -#define CONTEXT_VECTOR_STAT "/vector.stat" -#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" -static PRBool DebugRecord(char * path, char * pURLRef, char * filename) -{ - char recordPath[2048]; - PRIntn oflags = 0; - - // create the record file name from the verification director - // and the default name. - strcpy(recordPath,gVerificationOutputDir); - strcat(recordPath,CONTEXT_VECTOR_MAP); - - // create the file exists, only open for read/write - // otherwise, create it - if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) - oflags = PR_CREATE_FILE; - oflags |= PR_RDWR; - - // open the record file - PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); - - if (recordFile) { - - char * string = (char *)PR_Malloc(2048); - PRBool found = PR_FALSE; - - // vectors are stored on the format iof "URL vector filename" - // where the vector contains the verification path and - // the filename contains the debug source dump - sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); - - // get the file size, read in the file and parse it line at - // a time to check to see if we have already recorded this - // occurance - - PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); - if (iSize) { - - char * buffer = (char*)PR_Malloc(iSize); - char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); - if (buffer!=NULL && string!=NULL) { - PRInt32 ibufferpos, istringpos; - - // beginning of file for read - PR_Seek(recordFile,0,PR_SEEK_SET); - PR_Read(recordFile,buffer,iSize); - - // run through the file looking for a matching vector - for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) - { - // compare string once we have hit the end of the line - if (buffer[ibufferpos] == '\r') { - stringbuf[istringpos] = '\0'; - istringpos = 0; - // skip newline and space - ibufferpos++; - - if (PL_strlen(stringbuf)) { - char * space; - // chop of the filename for compare - if ((space = PL_strrchr(stringbuf, ' '))!=NULL) - *space = '\0'; - - // we have already recorded this one, free up, and return - if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { - PR_Free(buffer); - PR_Free(stringbuf); - PR_Free(string); - return PR_TRUE; - } - } - } - - // build up the compare string - else - stringbuf[istringpos++] = buffer[ibufferpos]; - } - - // throw away the record file data - PR_Free(buffer); - PR_Free(stringbuf); - } - } - - // if this bad vector was not recorded, add it to record file - - if (!found) { - PR_Seek(recordFile,0,PR_SEEK_END); - PR_Write(recordFile,string,PL_strlen(string)); - } - - PR_Close(recordFile); - PR_Free(string); +void COtherDTD::SetURLRef(char * aURLRef){ + if (mURLRef) { + PL_strfree(mURLRef); + mURLRef=0; } - - // vector was not recorded - return PR_FALSE; + if (aURLRef) + mURLRef = PL_strdup(aURLRef); } -// structure to store the vector statistic information - -typedef struct vector_info { - PRInt32 references; // number of occurances counted - PRInt32 count; // number of tags in the vector - PRBool good_vector; // is this a valid vector? - eHTMLTags* vector; // and the vector -} VectorInfo; - -// global table for storing vector statistics and the size -static VectorInfo ** gVectorInfoArray = 0; -static PRInt32 gVectorCount = 0; - -// the statistic vector table grows each time it exceeds this -// stepping value -#define TABLE_SIZE 128 - -// compare function for quick sort. Compares references and -// sorts in decending order - -static int compare( const void *arg1, const void *arg2 ) +void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug) { - VectorInfo ** p1 = (VectorInfo**)arg1; - VectorInfo ** p2 = (VectorInfo**)arg2; - return (*p2)->references - (*p1)->references; -} - -/** - * quick sort the statistic array causing the most frequently - * used vectors to be at the top (this makes it a little speedier - * when looking them up) - */ -static void SortVectorRecord(void) { - // of course, sort it only if there is something to sort - if (gVectorCount) { - qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); - } -} - - -/** - * This debug routines stores statistical information about a - * context vector. The context vector statistics are stored in - * a global array. The table is resorted each time it grows to - * aid in lookup speed. If a vector has already been noted, its - * reference count is bumped, otherwise it is added to the table - * - * @update jevering 6/11/98 - * @param aTags is the tag list (vector) - * @param count is the size of the vector - * @return - */ - -static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector) -{ - // if the table doesn't exist, create it - if (!gVectorInfoArray) { - gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); - } - else { - // attempt to look up the vector - for (PRInt32 i = 0; i < gVectorCount; i++) - - // check the vector only if they are the same size, if they - // match then just return without doing further work - if (gVectorInfoArray[i]->count == count) - if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) { - - // bzzzt. and we have a winner.. bump the ref count - gVectorInfoArray[i]->references++; - return; - } - } - - // the context vector hasn't been noted, so allocate it and - // initialize it one.. add it to the table - VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); - pVectorInfo->references = 1; - pVectorInfo->count = count; - pVectorInfo->good_vector = good_vector; - pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags)); - memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count); - gVectorInfoArray[gVectorCount++] = pVectorInfo; - - // have we maxed out the table? grow it.. sort it.. love it. - if ((gVectorCount % TABLE_SIZE) == 0) { - gVectorInfoArray = (VectorInfo**)realloc( - gVectorInfoArray, - (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); - SortVectorRecord(); - } -} - -static void MakeVectorString(char * vector_string, VectorInfo * pInfo) -{ - sprintf (vector_string, "%6d ", pInfo->references); - for (PRInt32 j = 0; j < pInfo->count; j++) { - PL_strcat(vector_string, "<"); - PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); - PL_strcat(vector_string, ">"); - } - PL_strcat(vector_string,"\r\n"); -} - -/** - * This debug routine dumps out the vector statistics to a text - * file in the verification directory and defaults to the name - * "vector.stat". It contains all parsed context vectors and there - * occurance count sorted in decending order. - * - * @update jevering 6/11/98 - * @param - * @return - */ - -extern "C" NS_EXPORT void DumpVectorRecord_other(void) -{ - // do we have a table? - if (gVectorCount) { - - // hopefully, they wont exceed 1K. - char vector_string[1024]; - char path[1024]; - - path[0] = '\0'; - - // put in the verification directory.. else the root - if (gVerificationOutputDir) - strcpy(path,gVerificationOutputDir); - - strcat(path,CONTEXT_VECTOR_STAT); - - // open the stat file creaming any existing stat file - PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); - if (statisticFile) { - - PRInt32 i; - PRofstream ps; - ps.attach(statisticFile); - - // oh what the heck, sort it again - SortVectorRecord(); - - // cute little header - sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); - ps << vector_string; - - ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; - ps << VECTOR_TABLE_HEADER; - - // dump out the bad vectors encountered - for (i = 0; i < gVectorCount; i++) { - if (!gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - } - - ps << "\r\n\r\nValid context vector summary\r\n"; - ps << VECTOR_TABLE_HEADER; - - // take a big vector table dump (good vectors) - for (i = 0; i < gVectorCount; i++) { - if (gVectorInfoArray[i]->good_vector) { - MakeVectorString(vector_string, gVectorInfoArray[i]); - ps << vector_string; - } - // free em up. they mean nothing to me now (I'm such a user) - - if (gVectorInfoArray[i]->vector) - PR_Free(gVectorInfoArray[i]->vector); - PR_Free(gVectorInfoArray[i]); - } - } - - // ok, we are done with the table, free it up as well - PR_Free(gVectorInfoArray); - gVectorInfoArray = 0; - gVectorCount = 0; - PR_Close(statisticFile); + if (aParserDebug) { + mParserDebug = aParserDebug; + NS_ADDREF(mParserDebug); } } - - -/** - * This debug method allows us to determine whether or not - * we've seen (and can handle) the given context vector. - * - * @update gess4/22/98 - * @param tags is an array of eHTMLTags - * @param count represents the number of items in the tags array - * @param aDTD is the DTD we plan to ask for verification - * @return TRUE if we know how to handle it, else false - */ -PRBool COtherDTD::VerifyContextVector(void) const { - - PRBool result=PR_TRUE; - - if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; - strcpy(path,gVerificationOutputDir); -#endif - - int i=0; - for(i=0;iDebugDumpSource(ps); - PR_Close(debugFile); - } - } - } - } - - return result; -} - diff --git a/mozilla/parser/htmlparser/src/COtherDTD.h b/mozilla/parser/htmlparser/src/COtherDTD.h index 21d2346eafd..7a74866cf13 100644 --- a/mozilla/parser/htmlparser/src/COtherDTD.h +++ b/mozilla/parser/htmlparser/src/COtherDTD.h @@ -34,7 +34,6 @@ #include "nsDeque.h" - #define NS_IOtherHTML_DTD_IID \ {0x8a5e89c0, 0xd16d, 0x11d1, \ {0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}} @@ -42,6 +41,7 @@ class nsIParser; class nsIHTMLContentSink; +class nsIParserDebug; class COtherDTD : public nsIDTD { @@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD { * of one type can contain a tag of another type. * * @update gess 3/25/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container + * @param aParent -- int tag of parent container + * @param aChild -- int tag of child container * @return PR_TRUE if parent can contain child */ - virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const; + virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild); /** * This method is called to determine whether or not a tag @@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD { */ virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const; - /** - * This method gets called at various times by the parser - * whenever we want to verify a valid context stack. This - * method also gives us a hook to add debugging metrics. - * - * @update gess4/6/98 - * @param aStack[] array of ints (tokens) - * @param aCount number of elements in given array - * @return TRUE if stack is valid, else FALSE + * + * @update jevering 6/18/98 + * @param aURLRef if the current URL reference (for debugger) + * @return */ - virtual PRBool VerifyContextVector(void) const; + virtual void SetURLRef(char * aURLRef); /** * - * @update gess5/18/98 - * @param + * @update jevering 6/18/98 + * @param aParserDebug created debug parser object * @return */ - virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats); + virtual void SetParserDebug(nsIParserDebug * aParserDebug); /** * This method tries to design a context map (without actually @@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD { * @param aChild -- tag type of child * @return True if closure was achieved -- other false */ - virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const; + virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag); /** * This method tries to design a context map (without actually @@ -701,7 +696,8 @@ protected: PRBool mHasOpenForm; PRBool mHasOpenMap; nsDeque mTokenDeque; - + char* mURLRef; + nsIParserDebug* mParserDebug; }; diff --git a/mozilla/parser/htmlparser/src/Makefile b/mozilla/parser/htmlparser/src/Makefile index 37a8ca552ce..243a5c4aa98 100644 --- a/mozilla/parser/htmlparser/src/Makefile +++ b/mozilla/parser/htmlparser/src/Makefile @@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS CPPSRCS = \ nsHTMLContentSink.cpp \ nsParserNode.cpp \ + nsParserDebug.cpp \ nsScanner.cpp \ nsToken.cpp \ nsTokenHandler.cpp \ @@ -41,6 +42,8 @@ EXPORTS = \ nsHTMLTokens.h \ nsIParserNode.h \ nsIParser.h \ + nsIParserDebug.h \ + nsIParserFilter.h \ nsToken.h \ $(NULL) diff --git a/mozilla/parser/htmlparser/src/makefile.win b/mozilla/parser/htmlparser/src/makefile.win index 6f479747931..940db9e04f2 100644 --- a/mozilla/parser/htmlparser/src/makefile.win +++ b/mozilla/parser/htmlparser/src/makefile.win @@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \ nsHTMLParser.cpp prstrm.cpp EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \ - nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h + nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \ + nsIParserDebug.h nsIParserFilter.h CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\CNavDTD.obj \ @@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\nsHTMLParser.obj \ .\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \ .\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \ - .\$(OBJDIR)\nsTokenHandler.obj \ + .\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \ .\$(OBJDIR)\prstrm.obj LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib diff --git a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp index 99b1d81a340..d361ceed444 100644 --- a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp +++ b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp @@ -30,6 +30,7 @@ #include "prstrm.h" #include #include "nsIInputStream.h" +#include "nsIParserFilter.h" /* UNCOMMENT THIS IF STUFF STOPS WORKING... #ifdef XP_PC @@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given"; static const char* kNullFilename= "Error: Null filename given"; static const char* kNullTokenizer = "Error: Unable to construct tokenizer"; -static char* gVerificationOutputDir=0; -static PRBool gRecordingStatistics=PR_TRUE; static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream -static char* gURLRef=0; //#define DEBUG_SAVE_SOURCE_DOC 1 #ifdef DEBUG_SAVE_SOURCE_DOC @@ -58,17 +56,6 @@ fstream* gTempStream=0; #endif -extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir) -{ - gVerificationOutputDir = verify_dir; -} - - -extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval) -{ - gRecordingStatistics = bval; -} - /** * This method is defined in nsIParser. It is used to * cause the COM-like construction of an nsHTMLParser. @@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller; */ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) { NS_INIT_REFCNT(); + mParserFilter = nsnull; mListener = nsnull; mTransferBuffer=0; mSink=0; @@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) { * @return */ nsHTMLParser::~nsHTMLParser() { - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } NS_IF_RELEASE(mListener); if(mTransferBuffer) delete [] mTransferBuffer; @@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() { delete mCurrentPos; mCurrentPos=0; if(mDTD) - delete mDTD; + NS_RELEASE(mDTD); mDTD=0; if(mScanner) delete mScanner; @@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr) return NS_OK; } +nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter) +{ + nsIParserFilter* old=mParserFilter; + if(old) + NS_RELEASE(old); + if(aFilter) { + mParserFilter=aFilter; + NS_ADDREF(aFilter); + } + return old; +} + /** * This method gets called in order to set the content * sink for this parser to dump nodes to. @@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) { mDTD=aDTD; } +nsIDTD * nsHTMLParser::GetDTD(void) { + return mDTD; +} + /** * * @@ -287,7 +286,7 @@ eParseMode DetermineParseMode() { * @param * @return */ -nsIDTD* GetDTD(eParseMode aMode) { +nsIDTD* NewDTD(eParseMode aMode) { nsIDTD* aDTD=0; switch(aMode) { case eParseMode_navigator: @@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) { default: break; } + if (aDTD) + aDTD->AddRef(); return aDTD; } @@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){ nsString theBuffer; const int kLocalBufSize=10; - if (gURLRef) - PL_strfree(gURLRef); - if (aFilename) - gURLRef = PL_strdup(aFilename); - mMajorIteration=-1; mMinorIteration=-1; @@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){ * @param aFilename -- const char* containing file to be parsed. * @return PR_TRUE if parse succeeded, PR_FALSE otherwise. */ -PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){ +PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){ NS_PRECONDITION(0!=aFilename,kNullFilename); PRInt32 status=kBadFilename; mIncremental=aIncremental; if(aFilename) { - if (gURLRef) - PL_strfree(gURLRef); - gURLRef = PL_strdup(aFilename); - mParseMode=DetermineParseMode(); - mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD; + mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD; if(mDTD) { mDTD->SetParser(this); mDTD->SetContentSink(mSink); + mDTD->SetURLRef((char *)aFilename); + mDTD->SetParserDebug(aDebug); } WillBuildModel(); @@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){ */ PRInt32 nsHTMLParser::Parse(nsIURL* aURL, nsIStreamListener* aListener, - PRBool aIncremental) { + PRBool aIncremental, + nsIParserDebug * aDebug) { NS_PRECONDITION(0!=aURL,kNullURL); PRInt32 status=kBadURL; @@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL, if(aURL) { - if (gURLRef) - { - PL_strfree(gURLRef); - gURLRef = 0; - } - if (aURL->GetSpec()) - gURLRef = PL_strdup(aURL->GetSpec()); - mParseMode=DetermineParseMode(); - mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD; + mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD; if(mDTD) { mDTD->SetParser(this); mDTD->SetContentSink(mSink); + mDTD->SetURLRef((char *)aURL->GetSpec()); + mDTD->SetParserDebug(aDebug); } WillBuildModel(); @@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length) } #endif + if (mParserFilter) + mParserFilter->RawBuffer(mTransferBuffer, &len); + mScanner->Append(&mTransferBuffer[offset],len); } //if diff --git a/mozilla/parser/htmlparser/src/nsHTMLParser.h b/mozilla/parser/htmlparser/src/nsHTMLParser.h index 1b1c8470aa3..d8638dd3b32 100644 --- a/mozilla/parser/htmlparser/src/nsHTMLParser.h +++ b/mozilla/parser/htmlparser/src/nsHTMLParser.h @@ -73,6 +73,8 @@ class nsIHTMLContentSink; class nsIURL; class nsIDTD; class CScanner; +class nsIParserFilter; +class nsIParserDebug; class nsHTMLParser : public nsIParser, public nsIStreamListener { @@ -103,8 +105,12 @@ friend class CTokenHandler; * @return old sink, or NULL */ virtual nsIContentSink* SetContentSink(nsIContentSink* aSink); + + virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter); virtual void SetDTD(nsIDTD* aDTD); + + virtual nsIDTD * GetDTD(void); /** * @@ -124,7 +130,8 @@ friend class CTokenHandler; */ virtual PRInt32 Parse(nsIURL* aURL, nsIStreamListener* aListener, - PRBool aIncremental=PR_TRUE); + PRBool aIncremental=PR_TRUE, + nsIParserDebug * aDebug = 0); /** * Cause parser to parse input from given file in given mode @@ -133,7 +140,7 @@ friend class CTokenHandler; * @param aMode is the desired parser mode (Nav, other, etc.) * @return TRUE if all went well -- FALSE otherwise */ - virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental); + virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0); /** * @update gess5/11/98 @@ -290,7 +297,8 @@ protected: //********************************************* nsIStreamListener* mListener; - nsIContentSink* mSink; + nsIContentSink* mSink; + nsIParserFilter* mParserFilter; nsDequeIterator* mCurrentPos; nsDequeIterator* mMarkPos; diff --git a/mozilla/parser/htmlparser/src/nsIDTD.h b/mozilla/parser/htmlparser/src/nsIDTD.h index 926ee926943..77ae7f9868c 100644 --- a/mozilla/parser/htmlparser/src/nsIDTD.h +++ b/mozilla/parser/htmlparser/src/nsIDTD.h @@ -37,6 +37,7 @@ class nsIParser; class CToken; class nsIContentSink; +class nsIParserDebug; class nsIDTD : public nsISupports { @@ -115,12 +116,28 @@ class nsIDTD : public nsISupports { /** * - * @update gess5/18/98 - * @param + * @update jevering 6/18/98 + * @param aURLRef if the current URL reference (for debugger) * @return */ - virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0; + virtual void SetURLRef(char * aURLRef) = 0; + /** + * + * @update jevering 6/18/98 + * @param aParent parent tag + * @param aChild child tag + * @return PR_TRUE if valid container + */ + virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0; + + /** + * + * @update jevering 6/18/98 + * @param aParserDebug created debug parser object + * @return + */ + virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0; }; diff --git a/mozilla/parser/htmlparser/src/nsIParser.h b/mozilla/parser/htmlparser/src/nsIParser.h index f509e1c909e..a4ffd45bc27 100644 --- a/mozilla/parser/htmlparser/src/nsIParser.h +++ b/mozilla/parser/htmlparser/src/nsIParser.h @@ -34,6 +34,7 @@ class nsString; class CToken; class nsIURL; class nsIDTD; +class nsIParserDebug; /** * This class defines the iparser interface. This XPCOM @@ -60,9 +61,10 @@ class nsIParser : public nsISupports { virtual PRInt32 Parse(nsIURL* aURL, nsIStreamListener* aListener, - PRBool aIncremental=PR_TRUE) = 0; + PRBool aIncremental=PR_TRUE, + nsIParserDebug * aDebug = 0) = 0; - virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0; + virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0; virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0; diff --git a/mozilla/parser/htmlparser/src/nsIParserDebug.h b/mozilla/parser/htmlparser/src/nsIParserDebug.h new file mode 100644 index 00000000000..a1e45204291 --- /dev/null +++ b/mozilla/parser/htmlparser/src/nsIParserDebug.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * @update gess 4/8/98 + * + * + */ + +#ifndef NS_IPARSERDEBUG__ +#define NS_IPARSERDEBUG__ + +#include "nsISupports.h" +#include "nsHTMLTokens.h" +#include "prtypes.h" + +#define NS_IPARSERDEBUG_IID \ + {0x7b68c220, 0x0685, 0x11d2, \ + {0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}} + + +class nsIDTD; +class nsHTMLParser; + +class nsIParserDebug : public nsISupports { + +public: + + virtual void SetVerificationDirectory(char * verify_dir) = 0; + + virtual void SetRecordStatistics(PRBool bval) = 0; + + virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0; + + virtual void DumpVectorRecord(void) = 0; + +}; + +extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult); + +#endif /* NS_IPARSERDEBUG__ */ \ No newline at end of file diff --git a/mozilla/parser/htmlparser/src/nsIParserFilter.h b/mozilla/parser/htmlparser/src/nsIParserFilter.h new file mode 100644 index 00000000000..8b257515efc --- /dev/null +++ b/mozilla/parser/htmlparser/src/nsIParserFilter.h @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * @update jevering 6/17/98 + * + */ + +#ifndef IPARSERFILTER +#define IPARSERFILTER + +#include "nsISupports.h" + +class CToken; + +#define NS_IPARSERFILTER_IID \ + {0x14d6ff0, 0x0610, 0x11d2, \ + {0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}} + + +class nsIParserFilter : public nsISupports { + public: + + NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0; + + NS_IMETHOD WillAddToken(CToken & token) = 0; + + NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0; +}; + +extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult); + + +#endif + diff --git a/mozilla/parser/htmlparser/src/nsParserDebug.cpp b/mozilla/parser/htmlparser/src/nsParserDebug.cpp new file mode 100644 index 00000000000..e998807e309 --- /dev/null +++ b/mozilla/parser/htmlparser/src/nsParserDebug.cpp @@ -0,0 +1,534 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * @update jevering 06/18/98 + * + * This file contains the parser debugger object which aids in + * walking links and reporting statistic information, reporting + * bad vectors. + */ + +#include "CNavDTD.h" +#include "nsHTMLTokens.h" +#include "nsHTMLParser.h" +#include "nsIParserDebug.h" +#include "nsCRT.h" +#include "prenv.h" //this is here for debug reasons... +#include "prtypes.h" //this is here for debug reasons... +#include "prio.h" +#include "plstr.h" +#include "prstrm.h" +#include +#include +#include "prmem.h" + +#define CONTEXT_VECTOR_MAP "/vector.map" +#define CONTEXT_VECTOR_STAT "/vector.stat" +#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" + +// structure to store the vector statistic information + +typedef struct vector_info { + PRInt32 references; // number of occurances counted + PRInt32 count; // number of tags in the vector + PRBool good_vector; // is this a valid vector? + eHTMLTags* vector; // and the vector +} VectorInfo; + +// the statistic vector table grows each time it exceeds this +// stepping value +#define TABLE_SIZE 128 + +class CParserDebug : public nsIParserDebug { +public: + + CParserDebug(char * aVerifyDir = 0); + ~CParserDebug(); + + NS_DECL_ISUPPORTS + + void SetVerificationDirectory(char * verify_dir); + void SetRecordStatistics(PRBool bval); + PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef); + void DumpVectorRecord(void); + + // global table for storing vector statistics and the size + +private: + VectorInfo ** mVectorInfoArray; + PRInt32 mVectorCount; + char * mVerificationDir; + PRBool mRecordingStatistics; + + PRBool DebugRecord(char * path, char * pURLRef, char * filename); + void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector); + void MakeVectorString(char * vector_string, VectorInfo * pInfo); +}; + +static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); +static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID); + +/** + * This method is defined in nsIParser. It is used to + * cause the COM-like construction of an nsHTMLParser. + * + * @update jevering 3/25/98 + * @param nsIParser** ptr to newly instantiated parser + * @return NS_xxx error result + */ + +NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult) +{ + CParserDebug *it = new CParserDebug(); + + if (it == 0) { + return NS_ERROR_OUT_OF_MEMORY; + } + + return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult); +} + +CParserDebug::CParserDebug(char * aVerifyDir) +{ + NS_INIT_REFCNT(); + mVectorInfoArray = 0; + mVectorCount = 0; + if (aVerifyDir) + mVerificationDir = PL_strdup(aVerifyDir); + else { + char * pString = PR_GetEnv("VERIFY_PARSER"); + if (pString) + mVerificationDir = PL_strdup(pString); + else + mVerificationDir = 0; + } + mRecordingStatistics = PR_TRUE; +} + +CParserDebug::~CParserDebug() +{ + if (mVerificationDir) + PL_strfree(mVerificationDir); +} + +/** + * This method gets called as part of our COM-like interfaces. + * Its purpose is to create an interface to parser object + * of some type. + * + * @update gess 4/8/98 + * @param nsIID id of object to discover + * @param aInstancePtr ptr to newly discovered interface + * @return NS_xxx result code + */ +nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr) +{ + if (NULL == aInstancePtr) { + return NS_ERROR_NULL_POINTER; + } + + if(aIID.Equals(kISupportsIID)) { //do IUnknown... + *aInstancePtr = (nsIParserDebug*)(this); + } + else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class... + *aInstancePtr = (nsIParserDebug*)(this); + } + else { + *aInstancePtr=0; + return NS_NOINTERFACE; + } + ((nsISupports*) *aInstancePtr)->AddRef(); + return NS_OK; +} + +NS_IMPL_ADDREF(CParserDebug) +NS_IMPL_RELEASE(CParserDebug) + +void CParserDebug::SetVerificationDirectory(char * verify_dir) +{ + if (mVerificationDir) { + PL_strfree(mVerificationDir); + mVerificationDir = 0; + } + mVerificationDir = PL_strdup(verify_dir); +} + +void CParserDebug::SetRecordStatistics(PRBool bval) +{ + mRecordingStatistics = bval; +} + +/** + * This debug method records an invalid context vector and it's + * associated context vector and URL in a simple flat file mapping which + * resides in the verification directory and is named context.map + * + * @update jevering 6/06/98 + * @param path is the directory structure indicating the bad context vector + * @param pURLRef is the associated URL + * @param filename to record mapping to if not already recorded + * @return TRUE if it is already record (dont rerecord) + */ + +PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename) +{ + char recordPath[2048]; + PRIntn oflags = 0; + + // create the record file name from the verification director + // and the default name. + strcpy(recordPath,mVerificationDir); + strcat(recordPath,CONTEXT_VECTOR_MAP); + + // create the file exists, only open for read/write + // otherwise, create it + if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) + oflags = PR_CREATE_FILE; + oflags |= PR_RDWR; + + // open the record file + PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); + + if (recordFile) { + + char * string = (char *)PR_Malloc(2048); + PRBool found = PR_FALSE; + + // vectors are stored on the format iof "URL vector filename" + // where the vector contains the verification path and + // the filename contains the debug source dump + sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); + + // get the file size, read in the file and parse it line at + // a time to check to see if we have already recorded this + // occurance + + PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); + if (iSize) { + + char * buffer = (char*)PR_Malloc(iSize); + char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); + if (buffer!=NULL && string!=NULL) { + PRInt32 ibufferpos, istringpos; + + // beginning of file for read + PR_Seek(recordFile,0,PR_SEEK_SET); + PR_Read(recordFile,buffer,iSize); + + // run through the file looking for a matching vector + for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) + { + // compare string once we have hit the end of the line + if (buffer[ibufferpos] == '\r') { + stringbuf[istringpos] = '\0'; + istringpos = 0; + // skip newline and space + ibufferpos++; + + if (PL_strlen(stringbuf)) { + char * space; + // chop of the filename for compare + if ((space = PL_strrchr(stringbuf, ' '))!=NULL) + *space = '\0'; + + // we have already recorded this one, free up, and return + if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { + PR_Free(buffer); + PR_Free(stringbuf); + PR_Free(string); + return PR_TRUE; + } + } + } + + // build up the compare string + else + stringbuf[istringpos++] = buffer[ibufferpos]; + } + + // throw away the record file data + PR_Free(buffer); + PR_Free(stringbuf); + } + } + + // if this bad vector was not recorded, add it to record file + + if (!found) { + PR_Seek(recordFile,0,PR_SEEK_END); + PR_Write(recordFile,string,PL_strlen(string)); + } + + PR_Close(recordFile); + PR_Free(string); + } + + // vector was not recorded + return PR_FALSE; +} + +/** + * compare function for quick sort. Compares references and + * sorts in decending order + */ + +static int compare( const void *arg1, const void *arg2 ) +{ + VectorInfo ** p1 = (VectorInfo**)arg1; + VectorInfo ** p2 = (VectorInfo**)arg2; + return (*p2)->references - (*p1)->references; +} + +/** + * This debug routines stores statistical information about a + * context vector. The context vector statistics are stored in + * a global array. The table is resorted each time it grows to + * aid in lookup speed. If a vector has already been noted, its + * reference count is bumped, otherwise it is added to the table + * + * @update jevering 6/11/98 + * @param aTags is the tag list (vector) + * @param count is the size of the vector + * @return + */ + +void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector) +{ + // if the table doesn't exist, create it + if (!mVectorInfoArray) { + mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); + } + else { + // attempt to look up the vector + for (PRInt32 i = 0; i < mVectorCount; i++) + + // check the vector only if they are the same size, if they + // match then just return without doing further work + if (mVectorInfoArray[i]->count == count) + if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) { + + // bzzzt. and we have a winner.. bump the ref count + mVectorInfoArray[i]->references++; + return; + } + } + + // the context vector hasn't been noted, so allocate it and + // initialize it one.. add it to the table + VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); + pVectorInfo->references = 1; + pVectorInfo->count = count; + pVectorInfo->good_vector = good_vector; + pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags)); + memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count); + mVectorInfoArray[mVectorCount++] = pVectorInfo; + + // have we maxed out the table? grow it.. sort it.. love it. + if ((mVectorCount % TABLE_SIZE) == 0) { + mVectorInfoArray = (VectorInfo**)realloc( + mVectorInfoArray, + (sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); + if (mVectorCount) { + qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare); + } + } +} + +void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo) +{ + sprintf (vector_string, "%6d ", pInfo->references); + for (PRInt32 j = 0; j < pInfo->count; j++) { + PL_strcat(vector_string, "<"); + PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); + PL_strcat(vector_string, ">"); + } + PL_strcat(vector_string,"\r\n"); +} + +/** + * This debug routine dumps out the vector statistics to a text + * file in the verification directory and defaults to the name + * "vector.stat". It contains all parsed context vectors and there + * occurance count sorted in decending order. + * + * @update jevering 6/11/98 + * @param + * @return + */ + +void CParserDebug::DumpVectorRecord(void) +{ + // do we have a table? + if (mVectorCount) { + + // hopefully, they wont exceed 1K. + char vector_string[1024]; + char path[1024]; + + path[0] = '\0'; + + // put in the verification directory.. else the root + if (mVerificationDir) + strcpy(path,mVerificationDir); + + strcat(path,CONTEXT_VECTOR_STAT); + + // open the stat file creaming any existing stat file + PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); + if (statisticFile) { + + PRInt32 i; + PRofstream ps; + ps.attach(statisticFile); + + // oh what the heck, sort it again + if (mVectorCount) { + qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare); + } + + // cute little header + sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount); + ps << vector_string; + + ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; + ps << VECTOR_TABLE_HEADER; + + // dump out the bad vectors encountered + for (i = 0; i < mVectorCount; i++) { + if (!mVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, mVectorInfoArray[i]); + ps << vector_string; + } + } + + ps << "\r\n\r\nValid context vector summary\r\n"; + ps << VECTOR_TABLE_HEADER; + + // take a big vector table dump (good vectors) + for (i = 0; i < mVectorCount; i++) { + if (mVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, mVectorInfoArray[i]); + ps << vector_string; + } + // free em up. they mean nothing to me now (I'm such a user) + + if (mVectorInfoArray[i]->vector) + PR_Free(mVectorInfoArray[i]->vector); + PR_Free(mVectorInfoArray[i]); + } + } + + // ok, we are done with the table, free it up as well + PR_Free(mVectorInfoArray); + mVectorInfoArray = 0; + mVectorCount = 0; + PR_Close(statisticFile); + } +} + + +/** + * This debug method allows us to determine whether or not + * we've seen (and can handle) the given context vector. + * + * @update gess4/22/98 + * @param tags is an array of eHTMLTags + * @param count represents the number of items in the tags array + * @param aDTD is the DTD we plan to ask for verification + * @return TRUE if we know how to handle it, else false + */ + +PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef) +{ + PRBool result=PR_TRUE; + + //ok, now see if we understand this vector + + if(0!=mVerificationDir || mRecordingStatistics) { + + if(aDTD && aContextStackPos>1) { + for (int i = 0; i < aContextStackPos-1; i++) + if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) { + result = PR_FALSE; + break; + } + } + } + + if (mRecordingStatistics) { + NoteVector(aContextStack,aContextStackPos,result); + } + + if(0!=mVerificationDir) { + char path[2048]; + strcpy(path,mVerificationDir); + + int i=0; + for(i=0;iDebugDumpSource(ps); + PR_Close(debugFile); + } + } + } + } + + return result; +} diff --git a/mozilla/parser/htmlparser/src/nsTokenizer.cpp b/mozilla/parser/htmlparser/src/nsTokenizer.cpp new file mode 100644 index 00000000000..dacfe58838c --- /dev/null +++ b/mozilla/parser/htmlparser/src/nsTokenizer.cpp @@ -0,0 +1,327 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + + +#include +#include "nsTokenizer.h" +#include "nsToken.h" +#include "nsScanner.h" +#include "nsIParserFilter.h" +#include "nsIURL.h" + +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} + +/** + * Default constructor + * + * @update gess 3/25/98 + * @param aFilename -- name of file to be tokenized + * @param aDelegate -- ref to delegate to be used to tokenize + * @return + */ +CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) : + mTokenDeque(PR_TRUE,TokenFreeProc) { + mParserFilter = aIFilter; + mDelegate=aDelegate; + mScanner=new CScanner(aURL,aMode); + mParseMode=aMode; +} + +/** + * Default constructor + * + * @update gess 3/25/98 + * @param aFilename -- name of file to be tokenized + * @param aDelegate -- ref to delegate to be used to tokenize + * @return + */ +CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) : + mTokenDeque(PR_TRUE,TokenFreeProc) { + mParserFilter = aIFilter; + mDelegate=aDelegate; + mScanner=new CScanner(aFilename,aMode); + mParseMode=aMode; +} + +/** + * Default constructor + * + * @update gess 3/25/98 + * @param aFilename -- name of file to be tokenized + * @param aDelegate -- ref to delegate to be used to tokenize + * @return + */ +CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) : + mTokenDeque(PR_TRUE,TokenFreeProc) { + mParserFilter = aIFilter; + mDelegate=aDelegate; + mScanner=new CScanner(aMode); + mParseMode=aMode; +} + +/** + * default destructor + * + * @update gess 3/25/98 + * @param + * @return + */ +CTokenizer::~CTokenizer() { + delete mScanner; + mDelegate->Destroy(); + mScanner=0; +} + + +/** + * + * + * @update gess 5/13/98 + * @param + * @return + */ +PRBool CTokenizer::Append(nsString& aBuffer) { + if(mScanner) + return mScanner->Append(aBuffer); + return PR_FALSE; +} + + +/** + * + * + * @update gess 5/21/98 + * @param + * @return + */ +PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){ + if(mScanner) + return mScanner->Append(aBuffer,aLen); + return PR_FALSE; +} + +/** + * Retrieve a reference to the internal token deque. + * + * @update gess 4/20/98 + * @return deque reference + */ +nsDeque& CTokenizer::GetDeque(void) { + return mTokenDeque; +} + +/** + * Cause the tokenizer to consume the next token, and + * return an error result. + * + * @update gess 3/25/98 + * @param anError -- ref to error code + * @return new token or null + */ +PRInt32 CTokenizer::GetToken(CToken*& aToken) { + PRInt32 result=mDelegate->GetToken(*mScanner,aToken); + return result; +} + +/** + * Retrieve the number of elements in the deque + * + * @update gess 3/25/98 + * @param + * @return int containing element count + */ +PRInt32 CTokenizer::GetSize(void) { + return mTokenDeque.GetSize(); +} + + +/** + * Part of the code sandwich, this gets called right before + * the tokenization process begins. The main reason for + * this call is to allow the delegate to do initialization. + * + * @update gess 3/25/98 + * @param + * @return TRUE if it's ok to proceed + */ +PRBool CTokenizer::WillTokenize(PRBool aIncremental){ + PRBool result=PR_TRUE; + result=mDelegate->WillTokenize(aIncremental); + return result; +} + +/** + * + * @update gess 3/25/98 + * @return TRUE if it's ok to proceed + */ +PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){ + CToken* theToken=0; + PRInt32 result=kNoError; + + WillTokenize(PR_TRUE); + + while(kNoError==result) { + result=GetToken(theToken); + if(theToken && (kNoError==result)) { + +#ifdef VERBOSE_DEBUG + theToken->DebugDumpToken(cout); +#endif + + PRBool bWillAdd = PR_TRUE; + if (mParserFilter) + bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken); + if(bWillAdd && mDelegate->WillAddToken(*theToken)) { + mTokenDeque.Push(theToken); + } + } + else if (theToken) + delete theToken; + } + if(kEOF==result) + result=kNoError; + DidTokenize(PR_TRUE); + return result; +} + +/** + * This is the primary control routine. It iteratively + * consumes tokens until an error occurs or you run out + * of data. + * + * @update gess 3/25/98 + * @return error code + */ +PRInt32 CTokenizer::Tokenize(int anIteration) { + CToken* theToken=0; + PRInt32 result=kNoError; + PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE; + + + while((PR_FALSE==done) && (kNoError==result)) { + mScanner->Mark(); + result=GetToken(theToken); + if(kNoError==result) { + if(theToken) { + + #ifdef VERBOSE_DEBUG + theToken->DebugDumpToken(cout); + #endif + + PRBool bWillAdd = PR_TRUE; + if (mParserFilter) + bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken); + if(bWillAdd && mDelegate->WillAddToken(*theToken)) { + mTokenDeque.Push(theToken); + } + } + + } + else { + if(theToken) + delete theToken; + mScanner->RewindToMark(); + } + } + if((PR_TRUE==done) && (kInterrupted!=result)) + DidTokenize(PR_TRUE); + return result; +} + +/** + * This is the tail-end of the code sandwich for the + * tokenization process. It gets called once tokenziation + * has completed. + * + * @update gess 3/25/98 + * @param + * @return TRUE if all went well + */ +PRBool CTokenizer::DidTokenize(PRBool aIncremental) { + PRBool result=mDelegate->DidTokenize(aIncremental); + +#ifdef VERBOSE_DEBUG + DebugDumpTokens(cout); +#endif + + return result; +} + +/** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ +void CTokenizer::DebugDumpTokens(ostream& out) { + nsDequeIterator b=mTokenDeque.Begin(); + nsDequeIterator e=mTokenDeque.End(); + + CToken* theToken; + while(b!=e) { + theToken=(CToken*)(b++); + theToken->DebugDumpToken(out); + } +} + + +/** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ +void CTokenizer::DebugDumpSource(ostream& out) { + nsDequeIterator b=mTokenDeque.Begin(); + nsDequeIterator e=mTokenDeque.End(); + + CToken* theToken; + while(b!=e) { + theToken=(CToken*)(b++); + theToken->DebugDumpSource(out); + } + +} + + +/** + * + * + * @update gess 3/25/98 + * @param + * @return + */ +void CTokenizer::SelfTest(void) { +#ifdef _DEBUG +#endif +} + + diff --git a/mozilla/parser/htmlparser/src/nsTokenizer.h b/mozilla/parser/htmlparser/src/nsTokenizer.h new file mode 100644 index 00000000000..7d54555ef6f --- /dev/null +++ b/mozilla/parser/htmlparser/src/nsTokenizer.h @@ -0,0 +1,185 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "NPL"); you may not use this file except in + * compliance with the NPL. You may obtain a copy of the NPL at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the NPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL + * for the specific language governing rights and limitations under the + * NPL. + * + * The Initial Developer of this code under the NPL is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1998 Netscape Communications Corporation. All Rights + * Reserved. + */ + +/** + * MODULE NOTES: + * LAST MODS: gess 28Feb98 + * + * This file declares the basic tokenizer class. The + * central theme of this class is to control and + * coordinate a tokenization process. Note that this + * class is grammer-neutral: this class doesn't care + * at all what the underlying stream consists of. + * + * The main purpose of this class is to iterate over an + * input stream with the help of a given scanner and a + * given type-specific tokenizer-Delegate. + * + * The primary method here is the tokenize() method, which + * simple loops calling getToken() until an EOF condition + * (or some other error) occurs. + * + */ + + +#ifndef TOKENIZER +#define TOKENIZER + +#include "nsToken.h" +#include "nsITokenizerDelegate.h" +#include "nsDeque.h" +#include + +class CScanner; +class nsIURL; +class nsIParserFilter; + +class CTokenizer { + public: + + CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0); + CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0); + CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0); + + ~CTokenizer(); + + /** + * This method incrementally tokenizes as much content as + * it can get its hands on. + * + * @update gess 3/25/98 + * @return TRUE if it's ok to proceed + */ + PRInt32 Tokenize(int anIteration); //your friendly incremental version + + /** + * + * @update gess 3/25/98 + * @return TRUE if it's ok to proceed + */ + PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE); + + /** + * Cause the tokenizer to consume the next token, and + * return an error result. + * + * @update gess 3/25/98 + * @param anError -- ref to error code + * @return new token or null + */ + PRInt32 GetToken(CToken*& aToken); + + /** + * Retrieve the number of elements in the deque + * + * @update gess 3/25/98 + * @return int containing element count + */ + PRInt32 GetSize(void); + + /** + * Retrieve a reference to the internal token deque. + * + * @update gess 4/20/98 + * @return deque reference + */ + nsDeque& GetDeque(void); + + /** + * + * @update gess 4/20/98 + * @return deque reference + */ + PRBool Append(nsString& aBuffer); + + /** + * + * @update gess 4/20/98 + * @return deque reference + */ + PRBool Append(const char* aBuffer, PRInt32 aLen); + + + /** + * + * + * @update gess 5/13/98 + * @param + * @return + */ + PRBool SetBuffer(nsString& aBuffer); + + /** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ + void DebugDumpSource(ostream& out); + + /** + * This debug routine is used to cause the tokenizer to + * iterate its token list, asking each token to dump its + * contents to the given output stream. + * + * @update gess 3/25/98 + * @param + * @return + */ + void DebugDumpTokens(ostream& out); + + static void SelfTest(); + + protected: + + /** + * This is the front-end of the code sandwich for the + * tokenization process. It gets called once just before + * tokenziation begins. + * + * @update gess 3/25/98 + * @param aIncremental tells us if tokenization is incremental + * @return TRUE if all went well + */ + PRBool WillTokenize(PRBool aIncremental); + + + /** + * This is the tail-end of the code sandwich for the + * tokenization process. It gets called once tokenziation + * has completed. + * + * @update gess 3/25/98 + * @param aIncremental tells us if tokenization was incremental + * @return TRUE if all went well + */ + PRBool DidTokenize(PRBool aIncremental); + + ITokenizerDelegate* mDelegate; + CScanner* mScanner; + nsDeque mTokenDeque; + eParseMode mParseMode; + nsIParserFilter* mParserFilter; +}; + +#endif + +