Major whacking of the debug robot code.
The Debug robot has now been moved into it's own class (xp_com object) and is fairly well separated from the rest of the parser. An argument to pass the ParserDebug object into the parsing process has been added to the Parse member. This should clean up Unix a bit as well as provide the ability to multiple simultaneous robots. This also cleaned up the global variable hackage. git-svn-id: svn://10.0.0.236/trunk@4092 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
parent
b2fb00f735
commit
ccd797086b
@ -31,6 +31,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
@ -43,13 +44,10 @@
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
|
||||
#ifdef XP_PC
|
||||
#include <direct.h> //this is here for debug reasons...
|
||||
#endif
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
|
||||
@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
|
||||
* @return
|
||||
*/
|
||||
CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
*/
|
||||
CNavDTD::~CNavDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){
|
||||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
|
||||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
||||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
|
||||
//otherwise, intentionally fall through...
|
||||
|
||||
case eHTMLTag_tr:
|
||||
if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
|
||||
if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
|
||||
aVector.Append((PRUnichar)eHTMLTag_td);
|
||||
result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
|
||||
// result=PR_TRUE;
|
||||
@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
void CNavDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
|
||||
class nsHTMLParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class CNavDTD : public nsIDTD {
|
||||
|
||||
@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
|
||||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
|
||||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
|
||||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -699,7 +695,8 @@ protected:
|
||||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "COtherDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
|
||||
* @return
|
||||
*/
|
||||
COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
*/
|
||||
COtherDTD::~COtherDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){
|
||||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
|
||||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
||||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
void COtherDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* quick sort the statistic array causing the most frequently
|
||||
* used vectors to be at the top (this makes it a little speedier
|
||||
* when looking them up)
|
||||
*/
|
||||
static void SortVectorRecord(void) {
|
||||
// of course, sort it only if there is something to sort
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
SortVectorRecord();
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord_other(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
SortVectorRecord();
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -34,7 +34,6 @@
|
||||
#include "nsDeque.h"
|
||||
|
||||
|
||||
|
||||
#define NS_IOtherHTML_DTD_IID \
|
||||
{0x8a5e89c0, 0xd16d, 0x11d1, \
|
||||
{0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}}
|
||||
@ -42,6 +41,7 @@
|
||||
|
||||
class nsIParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class COtherDTD : public nsIDTD {
|
||||
|
||||
@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
|
||||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
|
||||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
|
||||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -701,7 +696,8 @@ protected:
|
||||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
|
||||
CPPSRCS = \
|
||||
nsHTMLContentSink.cpp \
|
||||
nsParserNode.cpp \
|
||||
nsParserDebug.cpp \
|
||||
nsScanner.cpp \
|
||||
nsToken.cpp \
|
||||
nsTokenHandler.cpp \
|
||||
@ -41,6 +42,8 @@ EXPORTS = \
|
||||
nsHTMLTokens.h \
|
||||
nsIParserNode.h \
|
||||
nsIParser.h \
|
||||
nsIParserDebug.h \
|
||||
nsIParserFilter.h \
|
||||
nsToken.h \
|
||||
$(NULL)
|
||||
|
||||
|
||||
@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
|
||||
nsHTMLParser.cpp prstrm.cpp
|
||||
|
||||
EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \
|
||||
nsIParserDebug.h nsIParserFilter.h
|
||||
|
||||
CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
||||
.\$(OBJDIR)\CNavDTD.obj \
|
||||
@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
||||
.\$(OBJDIR)\nsHTMLParser.obj \
|
||||
.\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \
|
||||
.\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \
|
||||
.\$(OBJDIR)\prstrm.obj
|
||||
|
||||
LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include "nsIInputStream.h"
|
||||
#include "nsIParserFilter.h"
|
||||
|
||||
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
|
||||
#ifdef XP_PC
|
||||
@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
|
||||
static const char* kNullFilename= "Error: Null filename given";
|
||||
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static PRBool gRecordingStatistics=PR_TRUE;
|
||||
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
|
||||
static char* gURLRef=0;
|
||||
|
||||
//#define DEBUG_SAVE_SOURCE_DOC 1
|
||||
#ifdef DEBUG_SAVE_SOURCE_DOC
|
||||
@ -58,17 +56,6 @@ fstream* gTempStream=0;
|
||||
#endif
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
gVerificationOutputDir = verify_dir;
|
||||
}
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
gRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
|
||||
*/
|
||||
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParserFilter = nsnull;
|
||||
mListener = nsnull;
|
||||
mTransferBuffer=0;
|
||||
mSink=0;
|
||||
@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
||||
* @return
|
||||
*/
|
||||
nsHTMLParser::~nsHTMLParser() {
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
NS_IF_RELEASE(mListener);
|
||||
if(mTransferBuffer)
|
||||
delete [] mTransferBuffer;
|
||||
@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
|
||||
delete mCurrentPos;
|
||||
mCurrentPos=0;
|
||||
if(mDTD)
|
||||
delete mDTD;
|
||||
NS_RELEASE(mDTD);
|
||||
mDTD=0;
|
||||
if(mScanner)
|
||||
delete mScanner;
|
||||
@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
|
||||
{
|
||||
nsIParserFilter* old=mParserFilter;
|
||||
if(old)
|
||||
NS_RELEASE(old);
|
||||
if(aFilter) {
|
||||
mParserFilter=aFilter;
|
||||
NS_ADDREF(aFilter);
|
||||
}
|
||||
return old;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called in order to set the content
|
||||
* sink for this parser to dump nodes to.
|
||||
@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
|
||||
mDTD=aDTD;
|
||||
}
|
||||
|
||||
nsIDTD * nsHTMLParser::GetDTD(void) {
|
||||
return mDTD;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsIDTD* GetDTD(eParseMode aMode) {
|
||||
nsIDTD* NewDTD(eParseMode aMode) {
|
||||
nsIDTD* aDTD=0;
|
||||
switch(aMode) {
|
||||
case eParseMode_navigator:
|
||||
@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (aDTD)
|
||||
aDTD->AddRef();
|
||||
return aDTD;
|
||||
}
|
||||
|
||||
@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
||||
nsString theBuffer;
|
||||
const int kLocalBufSize=10;
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
if (aFilename)
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mMajorIteration=-1;
|
||||
mMinorIteration=-1;
|
||||
|
||||
@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
|
||||
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
||||
PRInt32 status=kBadFilename;
|
||||
mIncremental=aIncremental;
|
||||
|
||||
if(aFilename) {
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aFilename);
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental) {
|
||||
PRBool aIncremental,
|
||||
nsIParserDebug * aDebug) {
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRInt32 status=kBadURL;
|
||||
@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
||||
|
||||
if(aURL) {
|
||||
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (aURL->GetSpec())
|
||||
gURLRef = PL_strdup(aURL->GetSpec());
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aURL->GetSpec());
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mParserFilter)
|
||||
mParserFilter->RawBuffer(mTransferBuffer, &len);
|
||||
|
||||
mScanner->Append(&mTransferBuffer[offset],len);
|
||||
|
||||
} //if
|
||||
|
||||
@ -73,6 +73,8 @@ class nsIHTMLContentSink;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class CScanner;
|
||||
class nsIParserFilter;
|
||||
class nsIParserDebug;
|
||||
|
||||
|
||||
class nsHTMLParser : public nsIParser, public nsIStreamListener {
|
||||
@ -103,8 +105,12 @@ friend class CTokenHandler;
|
||||
* @return old sink, or NULL
|
||||
*/
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
|
||||
|
||||
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
|
||||
|
||||
virtual void SetDTD(nsIDTD* aDTD);
|
||||
|
||||
virtual nsIDTD * GetDTD(void);
|
||||
|
||||
/**
|
||||
*
|
||||
@ -124,7 +130,8 @@ friend class CTokenHandler;
|
||||
*/
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE);
|
||||
PRBool aIncremental=PR_TRUE,
|
||||
nsIParserDebug * aDebug = 0);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given file in given mode
|
||||
@ -133,7 +140,7 @@ friend class CTokenHandler;
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0);
|
||||
|
||||
/**
|
||||
* @update gess5/11/98
|
||||
@ -290,7 +297,8 @@ protected:
|
||||
//*********************************************
|
||||
|
||||
nsIStreamListener* mListener;
|
||||
nsIContentSink* mSink;
|
||||
nsIContentSink* mSink;
|
||||
nsIParserFilter* mParserFilter;
|
||||
|
||||
nsDequeIterator* mCurrentPos;
|
||||
nsDequeIterator* mMarkPos;
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
class nsIParser;
|
||||
class CToken;
|
||||
class nsIContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class nsIDTD : public nsISupports {
|
||||
|
||||
@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
|
||||
virtual void SetURLRef(char * aURLRef) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParent parent tag
|
||||
* @param aChild child tag
|
||||
* @return PR_TRUE if valid container
|
||||
*/
|
||||
virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -34,6 +34,7 @@ class nsString;
|
||||
class CToken;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class nsIParserDebug;
|
||||
|
||||
/**
|
||||
* This class defines the iparser interface. This XPCOM
|
||||
@ -60,9 +61,10 @@ class nsIParser : public nsISupports {
|
||||
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE) = 0;
|
||||
PRBool aIncremental=PR_TRUE,
|
||||
nsIParserDebug * aDebug = 0) = 0;
|
||||
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;
|
||||
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
|
||||
|
||||
|
||||
57
mozilla/htmlparser/src/nsIParserDebug.h
Normal file
57
mozilla/htmlparser/src/nsIParserDebug.h
Normal file
@ -0,0 +1,57 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/8/98
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NS_IPARSERDEBUG__
|
||||
#define NS_IPARSERDEBUG__
|
||||
|
||||
#include "nsISupports.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "prtypes.h"
|
||||
|
||||
#define NS_IPARSERDEBUG_IID \
|
||||
{0x7b68c220, 0x0685, 0x11d2, \
|
||||
{0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
|
||||
|
||||
|
||||
class nsIDTD;
|
||||
class nsHTMLParser;
|
||||
|
||||
class nsIParserDebug : public nsISupports {
|
||||
|
||||
public:
|
||||
|
||||
virtual void SetVerificationDirectory(char * verify_dir) = 0;
|
||||
|
||||
virtual void SetRecordStatistics(PRBool bval) = 0;
|
||||
|
||||
virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
|
||||
|
||||
virtual void DumpVectorRecord(void) = 0;
|
||||
|
||||
};
|
||||
|
||||
extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
|
||||
|
||||
#endif /* NS_IPARSERDEBUG__ */
|
||||
51
mozilla/htmlparser/src/nsIParserFilter.h
Normal file
51
mozilla/htmlparser/src/nsIParserFilter.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 6/17/98
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef IPARSERFILTER
|
||||
#define IPARSERFILTER
|
||||
|
||||
#include "nsISupports.h"
|
||||
|
||||
class CToken;
|
||||
|
||||
#define NS_IPARSERFILTER_IID \
|
||||
{0x14d6ff0, 0x0610, 0x11d2, \
|
||||
{0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
|
||||
|
||||
|
||||
class nsIParserFilter : public nsISupports {
|
||||
public:
|
||||
|
||||
NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
|
||||
|
||||
NS_IMETHOD WillAddToken(CToken & token) = 0;
|
||||
|
||||
NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
|
||||
};
|
||||
|
||||
extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
534
mozilla/htmlparser/src/nsParserDebug.cpp
Normal file
534
mozilla/htmlparser/src/nsParserDebug.cpp
Normal file
@ -0,0 +1,534 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 06/18/98
|
||||
*
|
||||
* This file contains the parser debugger object which aids in
|
||||
* walking links and reporting statistic information, reporting
|
||||
* bad vectors.
|
||||
*/
|
||||
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsHTMLParser.h"
|
||||
#include "nsIParserDebug.h"
|
||||
#include "nsCRT.h"
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
class CParserDebug : public nsIParserDebug {
|
||||
public:
|
||||
|
||||
CParserDebug(char * aVerifyDir = 0);
|
||||
~CParserDebug();
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
void SetVerificationDirectory(char * verify_dir);
|
||||
void SetRecordStatistics(PRBool bval);
|
||||
PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
|
||||
void DumpVectorRecord(void);
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
|
||||
private:
|
||||
VectorInfo ** mVectorInfoArray;
|
||||
PRInt32 mVectorCount;
|
||||
char * mVerificationDir;
|
||||
PRBool mRecordingStatistics;
|
||||
|
||||
PRBool DebugRecord(char * path, char * pURLRef, char * filename);
|
||||
void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
|
||||
void MakeVectorString(char * vector_string, VectorInfo * pInfo);
|
||||
};
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
*
|
||||
* @update jevering 3/25/98
|
||||
* @param nsIParser** ptr to newly instantiated parser
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
|
||||
NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
|
||||
{
|
||||
CParserDebug *it = new CParserDebug();
|
||||
|
||||
if (it == 0) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
CParserDebug::CParserDebug(char * aVerifyDir)
|
||||
{
|
||||
NS_INIT_REFCNT();
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
if (aVerifyDir)
|
||||
mVerificationDir = PL_strdup(aVerifyDir);
|
||||
else {
|
||||
char * pString = PR_GetEnv("VERIFY_PARSER");
|
||||
if (pString)
|
||||
mVerificationDir = PL_strdup(pString);
|
||||
else
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mRecordingStatistics = PR_TRUE;
|
||||
}
|
||||
|
||||
CParserDebug::~CParserDebug()
|
||||
{
|
||||
if (mVerificationDir)
|
||||
PL_strfree(mVerificationDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called as part of our COM-like interfaces.
|
||||
* Its purpose is to create an interface to parser object
|
||||
* of some type.
|
||||
*
|
||||
* @update gess 4/8/98
|
||||
* @param nsIID id of object to discover
|
||||
* @param aInstancePtr ptr to newly discovered interface
|
||||
* @return NS_xxx result code
|
||||
*/
|
||||
nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
||||
{
|
||||
if (NULL == aInstancePtr) {
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
|
||||
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else {
|
||||
*aInstancePtr=0;
|
||||
return NS_NOINTERFACE;
|
||||
}
|
||||
((nsISupports*) *aInstancePtr)->AddRef();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMPL_ADDREF(CParserDebug)
|
||||
NS_IMPL_RELEASE(CParserDebug)
|
||||
|
||||
void CParserDebug::SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
if (mVerificationDir) {
|
||||
PL_strfree(mVerificationDir);
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mVerificationDir = PL_strdup(verify_dir);
|
||||
}
|
||||
|
||||
void CParserDebug::SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
mRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,mVerificationDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* compare function for quick sort. Compares references and
|
||||
* sorts in decending order
|
||||
*/
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!mVectorInfoArray) {
|
||||
mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < mVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (mVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
mVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
mVectorInfoArray[mVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((mVectorCount % TABLE_SIZE) == 0) {
|
||||
mVectorInfoArray = (VectorInfo**)realloc(
|
||||
mVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (mVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (mVerificationDir)
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (!mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (mVectorInfoArray[i]->vector)
|
||||
PR_Free(mVectorInfoArray[i]->vector);
|
||||
PR_Free(mVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(mVectorInfoArray);
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef)
|
||||
{
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=mVerificationDir || mRecordingStatistics) {
|
||||
|
||||
if(aDTD && aContextStackPos>1) {
|
||||
for (int i = 0; i < aContextStackPos-1; i++)
|
||||
if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
|
||||
result = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mRecordingStatistics) {
|
||||
NoteVector(aContextStack,aContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=mVerificationDir) {
|
||||
char path[2048];
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<aContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(aContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path, aURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
if (aParser)
|
||||
aParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
327
mozilla/htmlparser/src/nsTokenizer.cpp
Normal file
327
mozilla/htmlparser/src/nsTokenizer.cpp
Normal file
@ -0,0 +1,327 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <fstream.h>
|
||||
#include "nsTokenizer.h"
|
||||
#include "nsToken.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsIParserFilter.h"
|
||||
#include "nsIURL.h"
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mParserFilter = aIFilter;
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aURL,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mParserFilter = aIFilter;
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aFilename,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mParserFilter = aIFilter;
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::~CTokenizer() {
|
||||
delete mScanner;
|
||||
mDelegate->Destroy();
|
||||
mScanner=0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(nsString& aBuffer) {
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/21/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer,aLen);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
nsDeque& CTokenizer::GetDeque(void) {
|
||||
return mTokenDeque;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CTokenizer::GetToken(CToken*& aToken) {
|
||||
PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the number of elements in the deque
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return int containing element count
|
||||
*/
|
||||
PRInt32 CTokenizer::GetSize(void) {
|
||||
return mTokenDeque.GetSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Part of the code sandwich, this gets called right before
|
||||
* the tokenization process begins. The main reason for
|
||||
* this call is to allow the delegate to do initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRBool CTokenizer::WillTokenize(PRBool aIncremental){
|
||||
PRBool result=PR_TRUE;
|
||||
result=mDelegate->WillTokenize(aIncremental);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
WillTokenize(PR_TRUE);
|
||||
|
||||
while(kNoError==result) {
|
||||
result=GetToken(theToken);
|
||||
if(theToken && (kNoError==result)) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
PRBool bWillAdd = PR_TRUE;
|
||||
if (mParserFilter)
|
||||
bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
|
||||
if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
else if (theToken)
|
||||
delete theToken;
|
||||
}
|
||||
if(kEOF==result)
|
||||
result=kNoError;
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(int anIteration) {
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
||||
|
||||
|
||||
while((PR_FALSE==done) && (kNoError==result)) {
|
||||
mScanner->Mark();
|
||||
result=GetToken(theToken);
|
||||
if(kNoError==result) {
|
||||
if(theToken) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
PRBool bWillAdd = PR_TRUE;
|
||||
if (mParserFilter)
|
||||
bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
|
||||
if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if(theToken)
|
||||
delete theToken;
|
||||
mScanner->RewindToMark();
|
||||
}
|
||||
}
|
||||
if((PR_TRUE==done) && (kInterrupted!=result))
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=mDelegate->DidTokenize(aIncremental);
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
DebugDumpTokens(cout);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpTokens(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpToken(out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpSource(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpSource(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
185
mozilla/htmlparser/src/nsTokenizer.h
Normal file
185
mozilla/htmlparser/src/nsTokenizer.h
Normal file
@ -0,0 +1,185 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* LAST MODS: gess 28Feb98
|
||||
*
|
||||
* This file declares the basic tokenizer class. The
|
||||
* central theme of this class is to control and
|
||||
* coordinate a tokenization process. Note that this
|
||||
* class is grammer-neutral: this class doesn't care
|
||||
* at all what the underlying stream consists of.
|
||||
*
|
||||
* The main purpose of this class is to iterate over an
|
||||
* input stream with the help of a given scanner and a
|
||||
* given type-specific tokenizer-Delegate.
|
||||
*
|
||||
* The primary method here is the tokenize() method, which
|
||||
* simple loops calling getToken() until an EOF condition
|
||||
* (or some other error) occurs.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TOKENIZER
|
||||
#define TOKENIZER
|
||||
|
||||
#include "nsToken.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
#include "nsDeque.h"
|
||||
#include <iostream.h>
|
||||
|
||||
class CScanner;
|
||||
class nsIURL;
|
||||
class nsIParserFilter;
|
||||
|
||||
class CTokenizer {
|
||||
public:
|
||||
|
||||
CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
|
||||
CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
|
||||
CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
|
||||
|
||||
~CTokenizer();
|
||||
|
||||
/**
|
||||
* This method incrementally tokenizes as much content as
|
||||
* it can get its hands on.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(int anIteration); //your friendly incremental version
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE);
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 GetToken(CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve the number of elements in the deque
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return int containing element count
|
||||
*/
|
||||
PRInt32 GetSize(void);
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
nsDeque& GetDeque(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
PRBool Append(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
PRBool Append(const char* aBuffer, PRInt32 aLen);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool SetBuffer(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpSource(ostream& out);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpTokens(ostream& out);
|
||||
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* This is the front-end of the code sandwich for the
|
||||
* tokenization process. It gets called once just before
|
||||
* tokenziation begins.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aIncremental tells us if tokenization is incremental
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aIncremental tells us if tokenization was incremental
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
ITokenizerDelegate* mDelegate;
|
||||
CScanner* mScanner;
|
||||
nsDeque mTokenDeque;
|
||||
eParseMode mParseMode;
|
||||
nsIParserFilter* mParserFilter;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
@ -43,13 +44,10 @@
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
|
||||
#ifdef XP_PC
|
||||
#include <direct.h> //this is here for debug reasons...
|
||||
#endif
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
|
||||
@ -63,8 +61,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
@ -234,17 +230,18 @@ static CNavTokenDeallocator gTokenKiller;
|
||||
* @return
|
||||
*/
|
||||
CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
@ -257,11 +254,10 @@ CNavDTD::CNavDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
*/
|
||||
CNavDTD::~CNavDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
@ -321,7 +317,8 @@ PRInt32 CNavDTD::HandleToken(CToken* aToken){
|
||||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
@ -807,7 +804,7 @@ PRBool CNavDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) const
|
||||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
@ -884,11 +881,11 @@ PRBool CNavDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
@ -1475,7 +1472,7 @@ eHTMLTags CNavDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
||||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
@ -1490,7 +1487,7 @@ PRBool CNavDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTag
|
||||
//otherwise, intentionally fall through...
|
||||
|
||||
case eHTMLTag_tr:
|
||||
if(PR_TRUE==CanContain(eHTMLTag_td,aChildTag)) {
|
||||
if(PR_TRUE==CanContain((PRInt32)eHTMLTag_td,(PRInt32)aChildTag)) {
|
||||
aVector.Append((PRUnichar)eHTMLTag_td);
|
||||
result=BackwardPropagate(aVector,aParentTag,eHTMLTag_td);
|
||||
// result=PR_TRUE;
|
||||
@ -2723,433 +2720,19 @@ void CNavDTD::WillInterruptParse(void){
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
void CNavDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
void CNavDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool CNavDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
|
||||
class nsHTMLParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class CNavDTD : public nsIDTD {
|
||||
|
||||
@ -141,11 +142,11 @@ class CNavDTD : public nsIDTD {
|
||||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
@ -199,26 +200,21 @@ class CNavDTD : public nsIDTD {
|
||||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -230,7 +226,7 @@ class CNavDTD : public nsIDTD {
|
||||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -699,7 +695,8 @@ protected:
|
||||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "nsIParserDebug.h"
|
||||
#include "COtherDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsCRT.h"
|
||||
@ -63,8 +64,6 @@ static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static char* gURLRef=0;
|
||||
static nsAutoString gEmpty;
|
||||
|
||||
static char formElementTags[]= {
|
||||
@ -235,17 +234,18 @@ static COtherTokenDeallocator gTokenKiller;
|
||||
* @return
|
||||
*/
|
||||
COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParser=0;
|
||||
mURLRef=0;
|
||||
mParserDebug=0;
|
||||
nsCRT::zero(mLeafBits,sizeof(mLeafBits));
|
||||
nsCRT::zero(mContextStack,sizeof(mContextStack));
|
||||
nsCRT::zero(mStyleStack,sizeof(mStyleStack));
|
||||
nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers));
|
||||
mContextStackPos=0;
|
||||
mStyleStackPos=0;
|
||||
gURLRef = 0;
|
||||
mHasOpenForm=PR_FALSE;
|
||||
mHasOpenMap=PR_FALSE;
|
||||
gVerificationOutputDir = PR_GetEnv("VERIFY_PARSER");
|
||||
InitializeDefaultTokenHandlers();
|
||||
}
|
||||
|
||||
@ -258,11 +258,10 @@ COtherDTD::COtherDTD() : nsIDTD(), mTokenDeque(gTokenKiller) {
|
||||
*/
|
||||
COtherDTD::~COtherDTD(){
|
||||
DeleteTokenHandlers();
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (mURLRef)
|
||||
PL_strfree(mURLRef);
|
||||
if (mParserDebug)
|
||||
NS_RELEASE(mParserDebug);
|
||||
// NS_RELEASE(mSink);
|
||||
}
|
||||
|
||||
@ -321,7 +320,8 @@ PRInt32 COtherDTD::HandleToken(CToken* aToken){
|
||||
|
||||
if(aHandler) {
|
||||
result=(*aHandler)(theToken,this);
|
||||
Verify("xxx",PR_TRUE);
|
||||
if (mParserDebug)
|
||||
mParserDebug->Verify(this, mParser, mContextStackPos, mContextStack, mURLRef);
|
||||
}
|
||||
|
||||
}//if
|
||||
@ -807,7 +807,7 @@ PRBool COtherDTD::CanContainFormElement(eHTMLTags aParent,eHTMLTags aChild) cons
|
||||
* @param aChild -- tag enum of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) {
|
||||
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
@ -884,11 +884,11 @@ PRBool COtherDTD::CanContain(eHTMLTags aParent,eHTMLTags aChild) const {
|
||||
|
||||
//handle form elements (this is very much a WIP!!!)
|
||||
if(0!=strchr(formElementTags,aChild)){
|
||||
return CanContainFormElement(aParent,aChild);
|
||||
return CanContainFormElement((eHTMLTags)aParent,(eHTMLTags)aChild);
|
||||
}
|
||||
|
||||
|
||||
switch(aParent) {
|
||||
switch((eHTMLTags)aParent) {
|
||||
case eHTMLTag_a:
|
||||
case eHTMLTag_acronym:
|
||||
result=PRBool(0!=strchr(gTagSet1,aChild)); break;
|
||||
@ -1475,7 +1475,7 @@ eHTMLTags COtherDTD::GetDefaultParentTagFor(eHTMLTags aTag) const{
|
||||
* @param aChild -- tag type of child
|
||||
* @return TRUE if propagation closes; false otherwise
|
||||
*/
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const {
|
||||
PRBool COtherDTD::ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) {
|
||||
PRBool result=PR_FALSE;
|
||||
|
||||
switch(aParentTag) {
|
||||
@ -2723,442 +2723,19 @@ void COtherDTD::WillInterruptParse(void){
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
Here's a bunch of stuff JEvering put into the parser to do debugging.
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
static PRBool DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,gVerificationOutputDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
void COtherDTD::SetURLRef(char * aURLRef){
|
||||
if (mURLRef) {
|
||||
PL_strfree(mURLRef);
|
||||
mURLRef=0;
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
if (aURLRef)
|
||||
mURLRef = PL_strdup(aURLRef);
|
||||
}
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
static VectorInfo ** gVectorInfoArray = 0;
|
||||
static PRInt32 gVectorCount = 0;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
// compare function for quick sort. Compares references and
|
||||
// sorts in decending order
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
void COtherDTD::SetParserDebug(nsIParserDebug * aParserDebug)
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* quick sort the statistic array causing the most frequently
|
||||
* used vectors to be at the top (this makes it a little speedier
|
||||
* when looking them up)
|
||||
*/
|
||||
static void SortVectorRecord(void) {
|
||||
// of course, sort it only if there is something to sort
|
||||
if (gVectorCount) {
|
||||
qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
static void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!gVectorInfoArray) {
|
||||
gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < gVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (gVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
gVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
gVectorInfoArray[gVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((gVectorCount % TABLE_SIZE) == 0) {
|
||||
gVectorInfoArray = (VectorInfo**)realloc(
|
||||
gVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
SortVectorRecord();
|
||||
}
|
||||
}
|
||||
|
||||
static void MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
extern "C" NS_EXPORT void DumpVectorRecord_other(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (gVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (gVerificationOutputDir)
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
SortVectorRecord();
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (!gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < gVectorCount; i++) {
|
||||
if (gVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, gVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (gVectorInfoArray[i]->vector)
|
||||
PR_Free(gVectorInfoArray[i]->vector);
|
||||
PR_Free(gVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(gVectorInfoArray);
|
||||
gVectorInfoArray = 0;
|
||||
gVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
if (aParserDebug) {
|
||||
mParserDebug = aParserDebug;
|
||||
NS_ADDREF(mParserDebug);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::VerifyContextVector(void) const {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
if(0!=gVerificationOutputDir) {
|
||||
|
||||
#ifdef XP_PC
|
||||
char path[_MAX_PATH+1];
|
||||
strcpy(path,gVerificationOutputDir);
|
||||
#endif
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
|
||||
#ifdef NS_WIN32
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
mkdir(path);
|
||||
#endif
|
||||
}
|
||||
|
||||
//**************************************************
|
||||
//Add code here to see if we understand this vector
|
||||
//**************************************************
|
||||
|
||||
if(PR_FALSE==result){
|
||||
#ifdef NS_WIN32
|
||||
// save file to directory indicated by bad context vector
|
||||
int iCount = 1;
|
||||
char filename[_MAX_PATH];
|
||||
do {
|
||||
sprintf(filename,"%s/html%04d.dbg", path, iCount++);
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (debugFile) {
|
||||
PR_Write(debugFile,gURLRef,PL_strlen(gURLRef));
|
||||
PR_Write(debugFile,"\n",PL_strlen("\n"));
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
#endif
|
||||
//add debugging code here to record the fact that we just encountered
|
||||
//a context vector we don't know how to handle.
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
PRBool COtherDTD::Verify(const char* anOutputDir,PRBool aRecordStats) {
|
||||
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=anOutputDir || aRecordStats)
|
||||
result=VerifyContextVector();
|
||||
|
||||
if (aRecordStats) {
|
||||
NoteVector(mContextStack,mContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=anOutputDir) {
|
||||
char path[2048];
|
||||
strcpy(path,anOutputDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<mContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(mContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path,gURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
mParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -34,7 +34,6 @@
|
||||
#include "nsDeque.h"
|
||||
|
||||
|
||||
|
||||
#define NS_IOtherHTML_DTD_IID \
|
||||
{0x8a5e89c0, 0xd16d, 0x11d1, \
|
||||
{0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}}
|
||||
@ -42,6 +41,7 @@
|
||||
|
||||
class nsIParser;
|
||||
class nsIHTMLContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class COtherDTD : public nsIDTD {
|
||||
|
||||
@ -143,11 +143,11 @@ class COtherDTD : public nsIDTD {
|
||||
* of one type can contain a tag of another type.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aParent -- tag enum of parent container
|
||||
* @param aChild -- tag enum of child container
|
||||
* @param aParent -- int tag of parent container
|
||||
* @param aChild -- int tag of child container
|
||||
* @return PR_TRUE if parent can contain child
|
||||
*/
|
||||
virtual PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild) const;
|
||||
virtual PRBool CanContain(PRInt32 aParent,PRInt32 aChild);
|
||||
|
||||
/**
|
||||
* This method is called to determine whether or not a tag
|
||||
@ -201,26 +201,21 @@ class COtherDTD : public nsIDTD {
|
||||
*/
|
||||
virtual eHTMLTags GetDefaultParentTagFor(eHTMLTags aTag) const;
|
||||
|
||||
|
||||
/**
|
||||
* This method gets called at various times by the parser
|
||||
* whenever we want to verify a valid context stack. This
|
||||
* method also gives us a hook to add debugging metrics.
|
||||
*
|
||||
* @update gess4/6/98
|
||||
* @param aStack[] array of ints (tokens)
|
||||
* @param aCount number of elements in given array
|
||||
* @return TRUE if stack is valid, else FALSE
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool VerifyContextVector(void) const;
|
||||
virtual void SetURLRef(char * aURLRef);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual PRBool Verify(const char* anOutputDir,PRBool aRecordStats);
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -232,7 +227,7 @@ class COtherDTD : public nsIDTD {
|
||||
* @param aChild -- tag type of child
|
||||
* @return True if closure was achieved -- other false
|
||||
*/
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag) const;
|
||||
virtual PRBool ForwardPropagate(nsString& aVector,eHTMLTags aParentTag,eHTMLTags aChildTag);
|
||||
|
||||
/**
|
||||
* This method tries to design a context map (without actually
|
||||
@ -701,7 +696,8 @@ protected:
|
||||
PRBool mHasOpenForm;
|
||||
PRBool mHasOpenMap;
|
||||
nsDeque mTokenDeque;
|
||||
|
||||
char* mURLRef;
|
||||
nsIParserDebug* mParserDebug;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ DEFINES = -D_IMPL_NS_HTMLPARS
|
||||
CPPSRCS = \
|
||||
nsHTMLContentSink.cpp \
|
||||
nsParserNode.cpp \
|
||||
nsParserDebug.cpp \
|
||||
nsScanner.cpp \
|
||||
nsToken.cpp \
|
||||
nsTokenHandler.cpp \
|
||||
@ -41,6 +42,8 @@ EXPORTS = \
|
||||
nsHTMLTokens.h \
|
||||
nsIParserNode.h \
|
||||
nsIParser.h \
|
||||
nsIParserDebug.h \
|
||||
nsIParserFilter.h \
|
||||
nsToken.h \
|
||||
$(NULL)
|
||||
|
||||
|
||||
@ -31,7 +31,8 @@ CPPSRCS=nsHTMLContentSink.cpp \
|
||||
nsHTMLParser.cpp prstrm.cpp
|
||||
|
||||
EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h
|
||||
nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h \
|
||||
nsIParserDebug.h nsIParserFilter.h
|
||||
|
||||
CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
||||
.\$(OBJDIR)\CNavDTD.obj \
|
||||
@ -39,7 +40,7 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \
|
||||
.\$(OBJDIR)\nsHTMLParser.obj \
|
||||
.\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \
|
||||
.\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj \
|
||||
.\$(OBJDIR)\nsTokenHandler.obj .\$(OBJDIR)\nsParserDebug.obj \
|
||||
.\$(OBJDIR)\prstrm.obj
|
||||
|
||||
LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include "nsIInputStream.h"
|
||||
#include "nsIParserFilter.h"
|
||||
|
||||
/* UNCOMMENT THIS IF STUFF STOPS WORKING...
|
||||
#ifdef XP_PC
|
||||
@ -47,10 +48,7 @@ static const char* kNullURL = "Error: Null URL given";
|
||||
static const char* kNullFilename= "Error: Null filename given";
|
||||
static const char* kNullTokenizer = "Error: Unable to construct tokenizer";
|
||||
|
||||
static char* gVerificationOutputDir=0;
|
||||
static PRBool gRecordingStatistics=PR_TRUE;
|
||||
static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream
|
||||
static char* gURLRef=0;
|
||||
|
||||
//#define DEBUG_SAVE_SOURCE_DOC 1
|
||||
#ifdef DEBUG_SAVE_SOURCE_DOC
|
||||
@ -58,17 +56,6 @@ fstream* gTempStream=0;
|
||||
#endif
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
gVerificationOutputDir = verify_dir;
|
||||
}
|
||||
|
||||
|
||||
extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
gRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
@ -107,6 +94,7 @@ CTokenDeallocator gTokenKiller;
|
||||
*/
|
||||
nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
||||
NS_INIT_REFCNT();
|
||||
mParserFilter = nsnull;
|
||||
mListener = nsnull;
|
||||
mTransferBuffer=0;
|
||||
mSink=0;
|
||||
@ -125,11 +113,6 @@ nsHTMLParser::nsHTMLParser() : mTokenDeque(gTokenKiller) {
|
||||
* @return
|
||||
*/
|
||||
nsHTMLParser::~nsHTMLParser() {
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
NS_IF_RELEASE(mListener);
|
||||
if(mTransferBuffer)
|
||||
delete [] mTransferBuffer;
|
||||
@ -139,7 +122,7 @@ nsHTMLParser::~nsHTMLParser() {
|
||||
delete mCurrentPos;
|
||||
mCurrentPos=0;
|
||||
if(mDTD)
|
||||
delete mDTD;
|
||||
NS_RELEASE(mDTD);
|
||||
mDTD=0;
|
||||
if(mScanner)
|
||||
delete mScanner;
|
||||
@ -185,6 +168,18 @@ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsIParserFilter * nsHTMLParser::SetParserFilter(nsIParserFilter * aFilter)
|
||||
{
|
||||
nsIParserFilter* old=mParserFilter;
|
||||
if(old)
|
||||
NS_RELEASE(old);
|
||||
if(aFilter) {
|
||||
mParserFilter=aFilter;
|
||||
NS_ADDREF(aFilter);
|
||||
}
|
||||
return old;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called in order to set the content
|
||||
* sink for this parser to dump nodes to.
|
||||
@ -217,6 +212,10 @@ void nsHTMLParser::SetDTD(nsIDTD* aDTD) {
|
||||
mDTD=aDTD;
|
||||
}
|
||||
|
||||
nsIDTD * nsHTMLParser::GetDTD(void) {
|
||||
return mDTD;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
@ -287,7 +286,7 @@ eParseMode DetermineParseMode() {
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
nsIDTD* GetDTD(eParseMode aMode) {
|
||||
nsIDTD* NewDTD(eParseMode aMode) {
|
||||
nsIDTD* aDTD=0;
|
||||
switch(aMode) {
|
||||
case eParseMode_navigator:
|
||||
@ -297,6 +296,8 @@ nsIDTD* GetDTD(eParseMode aMode) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (aDTD)
|
||||
aDTD->AddRef();
|
||||
return aDTD;
|
||||
}
|
||||
|
||||
@ -364,11 +365,6 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
||||
nsString theBuffer;
|
||||
const int kLocalBufSize=10;
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
if (aFilename)
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mMajorIteration=-1;
|
||||
mMinorIteration=-1;
|
||||
|
||||
@ -417,22 +413,20 @@ PRInt32 nsHTMLParser::ParseFileIncrementally(const char* aFilename){
|
||||
* @param aFilename -- const char* containing file to be parsed.
|
||||
* @return PR_TRUE if parse succeeded, PR_FALSE otherwise.
|
||||
*/
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug){
|
||||
NS_PRECONDITION(0!=aFilename,kNullFilename);
|
||||
PRInt32 status=kBadFilename;
|
||||
mIncremental=aIncremental;
|
||||
|
||||
if(aFilename) {
|
||||
|
||||
if (gURLRef)
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = PL_strdup(aFilename);
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aFilename);
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
@ -466,7 +460,8 @@ PRBool nsHTMLParser::Parse(const char* aFilename,PRBool aIncremental){
|
||||
*/
|
||||
PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental) {
|
||||
PRBool aIncremental,
|
||||
nsIParserDebug * aDebug) {
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
PRInt32 status=kBadURL;
|
||||
@ -485,19 +480,13 @@ PRInt32 nsHTMLParser::Parse(nsIURL* aURL,
|
||||
|
||||
if(aURL) {
|
||||
|
||||
if (gURLRef)
|
||||
{
|
||||
PL_strfree(gURLRef);
|
||||
gURLRef = 0;
|
||||
}
|
||||
if (aURL->GetSpec())
|
||||
gURLRef = PL_strdup(aURL->GetSpec());
|
||||
|
||||
mParseMode=DetermineParseMode();
|
||||
mDTD=(0==mDTD) ? GetDTD(mParseMode) : mDTD;
|
||||
mDTD=(0==mDTD) ? NewDTD(mParseMode) : mDTD;
|
||||
if(mDTD) {
|
||||
mDTD->SetParser(this);
|
||||
mDTD->SetContentSink(mSink);
|
||||
mDTD->SetURLRef((char *)aURL->GetSpec());
|
||||
mDTD->SetParserDebug(aDebug);
|
||||
}
|
||||
|
||||
WillBuildModel();
|
||||
@ -689,6 +678,9 @@ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mParserFilter)
|
||||
mParserFilter->RawBuffer(mTransferBuffer, &len);
|
||||
|
||||
mScanner->Append(&mTransferBuffer[offset],len);
|
||||
|
||||
} //if
|
||||
|
||||
@ -73,6 +73,8 @@ class nsIHTMLContentSink;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class CScanner;
|
||||
class nsIParserFilter;
|
||||
class nsIParserDebug;
|
||||
|
||||
|
||||
class nsHTMLParser : public nsIParser, public nsIStreamListener {
|
||||
@ -103,8 +105,12 @@ friend class CTokenHandler;
|
||||
* @return old sink, or NULL
|
||||
*/
|
||||
virtual nsIContentSink* SetContentSink(nsIContentSink* aSink);
|
||||
|
||||
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
|
||||
|
||||
virtual void SetDTD(nsIDTD* aDTD);
|
||||
|
||||
virtual nsIDTD * GetDTD(void);
|
||||
|
||||
/**
|
||||
*
|
||||
@ -124,7 +130,8 @@ friend class CTokenHandler;
|
||||
*/
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE);
|
||||
PRBool aIncremental=PR_TRUE,
|
||||
nsIParserDebug * aDebug = 0);
|
||||
|
||||
/**
|
||||
* Cause parser to parse input from given file in given mode
|
||||
@ -133,7 +140,7 @@ friend class CTokenHandler;
|
||||
* @param aMode is the desired parser mode (Nav, other, etc.)
|
||||
* @return TRUE if all went well -- FALSE otherwise
|
||||
*/
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental);
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0);
|
||||
|
||||
/**
|
||||
* @update gess5/11/98
|
||||
@ -290,7 +297,8 @@ protected:
|
||||
//*********************************************
|
||||
|
||||
nsIStreamListener* mListener;
|
||||
nsIContentSink* mSink;
|
||||
nsIContentSink* mSink;
|
||||
nsIParserFilter* mParserFilter;
|
||||
|
||||
nsDequeIterator* mCurrentPos;
|
||||
nsDequeIterator* mMarkPos;
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
class nsIParser;
|
||||
class CToken;
|
||||
class nsIContentSink;
|
||||
class nsIParserDebug;
|
||||
|
||||
class nsIDTD : public nsISupports {
|
||||
|
||||
@ -115,12 +116,28 @@ class nsIDTD : public nsISupports {
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess5/18/98
|
||||
* @param
|
||||
* @update jevering 6/18/98
|
||||
* @param aURLRef if the current URL reference (for debugger)
|
||||
* @return
|
||||
*/
|
||||
virtual PRInt32 Verify(const char* anOutputDir,PRBool aRecordStats)=0;
|
||||
virtual void SetURLRef(char * aURLRef) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParent parent tag
|
||||
* @param aChild child tag
|
||||
* @return PR_TRUE if valid container
|
||||
*/
|
||||
virtual PRBool CanContain(PRInt32 aParent, PRInt32 aChild) = 0;
|
||||
|
||||
/**
|
||||
*
|
||||
* @update jevering 6/18/98
|
||||
* @param aParserDebug created debug parser object
|
||||
* @return
|
||||
*/
|
||||
virtual void SetParserDebug(nsIParserDebug * aParserDebug) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -34,6 +34,7 @@ class nsString;
|
||||
class CToken;
|
||||
class nsIURL;
|
||||
class nsIDTD;
|
||||
class nsIParserDebug;
|
||||
|
||||
/**
|
||||
* This class defines the iparser interface. This XPCOM
|
||||
@ -60,9 +61,10 @@ class nsIParser : public nsISupports {
|
||||
|
||||
virtual PRInt32 Parse(nsIURL* aURL,
|
||||
nsIStreamListener* aListener,
|
||||
PRBool aIncremental=PR_TRUE) = 0;
|
||||
PRBool aIncremental=PR_TRUE,
|
||||
nsIParserDebug * aDebug = 0) = 0;
|
||||
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental)=0;
|
||||
virtual PRInt32 Parse(const char* aFilename,PRBool aIncremental, nsIParserDebug * aDebug = 0)=0;
|
||||
|
||||
virtual PRInt32 Parse(nsString& anHTMLString,PRBool appendTokens)=0;
|
||||
|
||||
|
||||
57
mozilla/parser/htmlparser/src/nsIParserDebug.h
Normal file
57
mozilla/parser/htmlparser/src/nsIParserDebug.h
Normal file
@ -0,0 +1,57 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update gess 4/8/98
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NS_IPARSERDEBUG__
|
||||
#define NS_IPARSERDEBUG__
|
||||
|
||||
#include "nsISupports.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "prtypes.h"
|
||||
|
||||
#define NS_IPARSERDEBUG_IID \
|
||||
{0x7b68c220, 0x0685, 0x11d2, \
|
||||
{0xa4, 0xb5, 0x00, 0x80, 0x5f, 0x2a, 0x0e, 0xd2}}
|
||||
|
||||
|
||||
class nsIDTD;
|
||||
class nsHTMLParser;
|
||||
|
||||
class nsIParserDebug : public nsISupports {
|
||||
|
||||
public:
|
||||
|
||||
virtual void SetVerificationDirectory(char * verify_dir) = 0;
|
||||
|
||||
virtual void SetRecordStatistics(PRBool bval) = 0;
|
||||
|
||||
virtual PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef) = 0;
|
||||
|
||||
virtual void DumpVectorRecord(void) = 0;
|
||||
|
||||
};
|
||||
|
||||
extern NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult);
|
||||
|
||||
#endif /* NS_IPARSERDEBUG__ */
|
||||
51
mozilla/parser/htmlparser/src/nsIParserFilter.h
Normal file
51
mozilla/parser/htmlparser/src/nsIParserFilter.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 6/17/98
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef IPARSERFILTER
|
||||
#define IPARSERFILTER
|
||||
|
||||
#include "nsISupports.h"
|
||||
|
||||
class CToken;
|
||||
|
||||
#define NS_IPARSERFILTER_IID \
|
||||
{0x14d6ff0, 0x0610, 0x11d2, \
|
||||
{0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}}
|
||||
|
||||
|
||||
class nsIParserFilter : public nsISupports {
|
||||
public:
|
||||
|
||||
NS_IMETHOD RawBuffer(char * buffer, int * buffer_length) = 0;
|
||||
|
||||
NS_IMETHOD WillAddToken(CToken & token) = 0;
|
||||
|
||||
NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0;
|
||||
};
|
||||
|
||||
extern nsresult NS_NewParserFilter(nsIParserFilter** aInstancePtrResult);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
534
mozilla/parser/htmlparser/src/nsParserDebug.cpp
Normal file
534
mozilla/parser/htmlparser/src/nsParserDebug.cpp
Normal file
@ -0,0 +1,534 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* @update jevering 06/18/98
|
||||
*
|
||||
* This file contains the parser debugger object which aids in
|
||||
* walking links and reporting statistic information, reporting
|
||||
* bad vectors.
|
||||
*/
|
||||
|
||||
#include "CNavDTD.h"
|
||||
#include "nsHTMLTokens.h"
|
||||
#include "nsHTMLParser.h"
|
||||
#include "nsIParserDebug.h"
|
||||
#include "nsCRT.h"
|
||||
#include "prenv.h" //this is here for debug reasons...
|
||||
#include "prtypes.h" //this is here for debug reasons...
|
||||
#include "prio.h"
|
||||
#include "plstr.h"
|
||||
#include "prstrm.h"
|
||||
#include <fstream.h>
|
||||
#include <time.h>
|
||||
#include "prmem.h"
|
||||
|
||||
#define CONTEXT_VECTOR_MAP "/vector.map"
|
||||
#define CONTEXT_VECTOR_STAT "/vector.stat"
|
||||
#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n"
|
||||
|
||||
// structure to store the vector statistic information
|
||||
|
||||
typedef struct vector_info {
|
||||
PRInt32 references; // number of occurances counted
|
||||
PRInt32 count; // number of tags in the vector
|
||||
PRBool good_vector; // is this a valid vector?
|
||||
eHTMLTags* vector; // and the vector
|
||||
} VectorInfo;
|
||||
|
||||
// the statistic vector table grows each time it exceeds this
|
||||
// stepping value
|
||||
#define TABLE_SIZE 128
|
||||
|
||||
class CParserDebug : public nsIParserDebug {
|
||||
public:
|
||||
|
||||
CParserDebug(char * aVerifyDir = 0);
|
||||
~CParserDebug();
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
|
||||
void SetVerificationDirectory(char * verify_dir);
|
||||
void SetRecordStatistics(PRBool bval);
|
||||
PRBool Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int ContextStackPos, eHTMLTags aContextStack[], char * aURLRef);
|
||||
void DumpVectorRecord(void);
|
||||
|
||||
// global table for storing vector statistics and the size
|
||||
|
||||
private:
|
||||
VectorInfo ** mVectorInfoArray;
|
||||
PRInt32 mVectorCount;
|
||||
char * mVerificationDir;
|
||||
PRBool mRecordingStatistics;
|
||||
|
||||
PRBool DebugRecord(char * path, char * pURLRef, char * filename);
|
||||
void NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector);
|
||||
void MakeVectorString(char * vector_string, VectorInfo * pInfo);
|
||||
};
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIDebugParserIID, NS_IPARSERDEBUG_IID);
|
||||
|
||||
/**
|
||||
* This method is defined in nsIParser. It is used to
|
||||
* cause the COM-like construction of an nsHTMLParser.
|
||||
*
|
||||
* @update jevering 3/25/98
|
||||
* @param nsIParser** ptr to newly instantiated parser
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
|
||||
NS_EXPORT nsresult NS_NewParserDebug(nsIParserDebug** aInstancePtrResult)
|
||||
{
|
||||
CParserDebug *it = new CParserDebug();
|
||||
|
||||
if (it == 0) {
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
return it->QueryInterface(kIDebugParserIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
CParserDebug::CParserDebug(char * aVerifyDir)
|
||||
{
|
||||
NS_INIT_REFCNT();
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
if (aVerifyDir)
|
||||
mVerificationDir = PL_strdup(aVerifyDir);
|
||||
else {
|
||||
char * pString = PR_GetEnv("VERIFY_PARSER");
|
||||
if (pString)
|
||||
mVerificationDir = PL_strdup(pString);
|
||||
else
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mRecordingStatistics = PR_TRUE;
|
||||
}
|
||||
|
||||
CParserDebug::~CParserDebug()
|
||||
{
|
||||
if (mVerificationDir)
|
||||
PL_strfree(mVerificationDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method gets called as part of our COM-like interfaces.
|
||||
* Its purpose is to create an interface to parser object
|
||||
* of some type.
|
||||
*
|
||||
* @update gess 4/8/98
|
||||
* @param nsIID id of object to discover
|
||||
* @param aInstancePtr ptr to newly discovered interface
|
||||
* @return NS_xxx result code
|
||||
*/
|
||||
nsresult CParserDebug::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
||||
{
|
||||
if (NULL == aInstancePtr) {
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
}
|
||||
|
||||
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else if(aIID.Equals(kIDebugParserIID)) { //do IParserDebug base class...
|
||||
*aInstancePtr = (nsIParserDebug*)(this);
|
||||
}
|
||||
else {
|
||||
*aInstancePtr=0;
|
||||
return NS_NOINTERFACE;
|
||||
}
|
||||
((nsISupports*) *aInstancePtr)->AddRef();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMPL_ADDREF(CParserDebug)
|
||||
NS_IMPL_RELEASE(CParserDebug)
|
||||
|
||||
void CParserDebug::SetVerificationDirectory(char * verify_dir)
|
||||
{
|
||||
if (mVerificationDir) {
|
||||
PL_strfree(mVerificationDir);
|
||||
mVerificationDir = 0;
|
||||
}
|
||||
mVerificationDir = PL_strdup(verify_dir);
|
||||
}
|
||||
|
||||
void CParserDebug::SetRecordStatistics(PRBool bval)
|
||||
{
|
||||
mRecordingStatistics = bval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug method records an invalid context vector and it's
|
||||
* associated context vector and URL in a simple flat file mapping which
|
||||
* resides in the verification directory and is named context.map
|
||||
*
|
||||
* @update jevering 6/06/98
|
||||
* @param path is the directory structure indicating the bad context vector
|
||||
* @param pURLRef is the associated URL
|
||||
* @param filename to record mapping to if not already recorded
|
||||
* @return TRUE if it is already record (dont rerecord)
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::DebugRecord(char * path, char * pURLRef, char * filename)
|
||||
{
|
||||
char recordPath[2048];
|
||||
PRIntn oflags = 0;
|
||||
|
||||
// create the record file name from the verification director
|
||||
// and the default name.
|
||||
strcpy(recordPath,mVerificationDir);
|
||||
strcat(recordPath,CONTEXT_VECTOR_MAP);
|
||||
|
||||
// create the file exists, only open for read/write
|
||||
// otherwise, create it
|
||||
if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS)
|
||||
oflags = PR_CREATE_FILE;
|
||||
oflags |= PR_RDWR;
|
||||
|
||||
// open the record file
|
||||
PRFileDesc * recordFile = PR_Open(recordPath,oflags,0);
|
||||
|
||||
if (recordFile) {
|
||||
|
||||
char * string = (char *)PR_Malloc(2048);
|
||||
PRBool found = PR_FALSE;
|
||||
|
||||
// vectors are stored on the format iof "URL vector filename"
|
||||
// where the vector contains the verification path and
|
||||
// the filename contains the debug source dump
|
||||
sprintf(string,"%s %s %s\r\n", pURLRef, path, filename);
|
||||
|
||||
// get the file size, read in the file and parse it line at
|
||||
// a time to check to see if we have already recorded this
|
||||
// occurance
|
||||
|
||||
PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
if (iSize) {
|
||||
|
||||
char * buffer = (char*)PR_Malloc(iSize);
|
||||
char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048);
|
||||
if (buffer!=NULL && string!=NULL) {
|
||||
PRInt32 ibufferpos, istringpos;
|
||||
|
||||
// beginning of file for read
|
||||
PR_Seek(recordFile,0,PR_SEEK_SET);
|
||||
PR_Read(recordFile,buffer,iSize);
|
||||
|
||||
// run through the file looking for a matching vector
|
||||
for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++)
|
||||
{
|
||||
// compare string once we have hit the end of the line
|
||||
if (buffer[ibufferpos] == '\r') {
|
||||
stringbuf[istringpos] = '\0';
|
||||
istringpos = 0;
|
||||
// skip newline and space
|
||||
ibufferpos++;
|
||||
|
||||
if (PL_strlen(stringbuf)) {
|
||||
char * space;
|
||||
// chop of the filename for compare
|
||||
if ((space = PL_strrchr(stringbuf, ' '))!=NULL)
|
||||
*space = '\0';
|
||||
|
||||
// we have already recorded this one, free up, and return
|
||||
if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) {
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
PR_Free(string);
|
||||
return PR_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build up the compare string
|
||||
else
|
||||
stringbuf[istringpos++] = buffer[ibufferpos];
|
||||
}
|
||||
|
||||
// throw away the record file data
|
||||
PR_Free(buffer);
|
||||
PR_Free(stringbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// if this bad vector was not recorded, add it to record file
|
||||
|
||||
if (!found) {
|
||||
PR_Seek(recordFile,0,PR_SEEK_END);
|
||||
PR_Write(recordFile,string,PL_strlen(string));
|
||||
}
|
||||
|
||||
PR_Close(recordFile);
|
||||
PR_Free(string);
|
||||
}
|
||||
|
||||
// vector was not recorded
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* compare function for quick sort. Compares references and
|
||||
* sorts in decending order
|
||||
*/
|
||||
|
||||
static int compare( const void *arg1, const void *arg2 )
|
||||
{
|
||||
VectorInfo ** p1 = (VectorInfo**)arg1;
|
||||
VectorInfo ** p2 = (VectorInfo**)arg2;
|
||||
return (*p2)->references - (*p1)->references;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routines stores statistical information about a
|
||||
* context vector. The context vector statistics are stored in
|
||||
* a global array. The table is resorted each time it grows to
|
||||
* aid in lookup speed. If a vector has already been noted, its
|
||||
* reference count is bumped, otherwise it is added to the table
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param aTags is the tag list (vector)
|
||||
* @param count is the size of the vector
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::NoteVector(eHTMLTags aTags[],PRInt32 count, PRBool good_vector)
|
||||
{
|
||||
// if the table doesn't exist, create it
|
||||
if (!mVectorInfoArray) {
|
||||
mVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*));
|
||||
}
|
||||
else {
|
||||
// attempt to look up the vector
|
||||
for (PRInt32 i = 0; i < mVectorCount; i++)
|
||||
|
||||
// check the vector only if they are the same size, if they
|
||||
// match then just return without doing further work
|
||||
if (mVectorInfoArray[i]->count == count)
|
||||
if (!memcmp(mVectorInfoArray[i]->vector, aTags, sizeof(eHTMLTags)*count)) {
|
||||
|
||||
// bzzzt. and we have a winner.. bump the ref count
|
||||
mVectorInfoArray[i]->references++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the context vector hasn't been noted, so allocate it and
|
||||
// initialize it one.. add it to the table
|
||||
VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo));
|
||||
pVectorInfo->references = 1;
|
||||
pVectorInfo->count = count;
|
||||
pVectorInfo->good_vector = good_vector;
|
||||
pVectorInfo->vector = (eHTMLTags*)PR_Malloc(count*sizeof(eHTMLTags));
|
||||
memcpy(pVectorInfo->vector,aTags,sizeof(eHTMLTags)*count);
|
||||
mVectorInfoArray[mVectorCount++] = pVectorInfo;
|
||||
|
||||
// have we maxed out the table? grow it.. sort it.. love it.
|
||||
if ((mVectorCount % TABLE_SIZE) == 0) {
|
||||
mVectorInfoArray = (VectorInfo**)realloc(
|
||||
mVectorInfoArray,
|
||||
(sizeof(VectorInfo*)*((mVectorCount/TABLE_SIZE)+1)*TABLE_SIZE));
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CParserDebug::MakeVectorString(char * vector_string, VectorInfo * pInfo)
|
||||
{
|
||||
sprintf (vector_string, "%6d ", pInfo->references);
|
||||
for (PRInt32 j = 0; j < pInfo->count; j++) {
|
||||
PL_strcat(vector_string, "<");
|
||||
PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j]));
|
||||
PL_strcat(vector_string, ">");
|
||||
}
|
||||
PL_strcat(vector_string,"\r\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine dumps out the vector statistics to a text
|
||||
* file in the verification directory and defaults to the name
|
||||
* "vector.stat". It contains all parsed context vectors and there
|
||||
* occurance count sorted in decending order.
|
||||
*
|
||||
* @update jevering 6/11/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
|
||||
void CParserDebug::DumpVectorRecord(void)
|
||||
{
|
||||
// do we have a table?
|
||||
if (mVectorCount) {
|
||||
|
||||
// hopefully, they wont exceed 1K.
|
||||
char vector_string[1024];
|
||||
char path[1024];
|
||||
|
||||
path[0] = '\0';
|
||||
|
||||
// put in the verification directory.. else the root
|
||||
if (mVerificationDir)
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
strcat(path,CONTEXT_VECTOR_STAT);
|
||||
|
||||
// open the stat file creaming any existing stat file
|
||||
PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0);
|
||||
if (statisticFile) {
|
||||
|
||||
PRInt32 i;
|
||||
PRofstream ps;
|
||||
ps.attach(statisticFile);
|
||||
|
||||
// oh what the heck, sort it again
|
||||
if (mVectorCount) {
|
||||
qsort((void*)mVectorInfoArray,(size_t)mVectorCount,sizeof(VectorInfo*),compare);
|
||||
}
|
||||
|
||||
// cute little header
|
||||
sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", mVectorCount);
|
||||
ps << vector_string;
|
||||
|
||||
ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// dump out the bad vectors encountered
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (!mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
}
|
||||
|
||||
ps << "\r\n\r\nValid context vector summary\r\n";
|
||||
ps << VECTOR_TABLE_HEADER;
|
||||
|
||||
// take a big vector table dump (good vectors)
|
||||
for (i = 0; i < mVectorCount; i++) {
|
||||
if (mVectorInfoArray[i]->good_vector) {
|
||||
MakeVectorString(vector_string, mVectorInfoArray[i]);
|
||||
ps << vector_string;
|
||||
}
|
||||
// free em up. they mean nothing to me now (I'm such a user)
|
||||
|
||||
if (mVectorInfoArray[i]->vector)
|
||||
PR_Free(mVectorInfoArray[i]->vector);
|
||||
PR_Free(mVectorInfoArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, we are done with the table, free it up as well
|
||||
PR_Free(mVectorInfoArray);
|
||||
mVectorInfoArray = 0;
|
||||
mVectorCount = 0;
|
||||
PR_Close(statisticFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug method allows us to determine whether or not
|
||||
* we've seen (and can handle) the given context vector.
|
||||
*
|
||||
* @update gess4/22/98
|
||||
* @param tags is an array of eHTMLTags
|
||||
* @param count represents the number of items in the tags array
|
||||
* @param aDTD is the DTD we plan to ask for verification
|
||||
* @return TRUE if we know how to handle it, else false
|
||||
*/
|
||||
|
||||
PRBool CParserDebug::Verify(nsIDTD * aDTD, nsHTMLParser * aParser, int aContextStackPos, eHTMLTags aContextStack[], char * aURLRef)
|
||||
{
|
||||
PRBool result=PR_TRUE;
|
||||
|
||||
//ok, now see if we understand this vector
|
||||
|
||||
if(0!=mVerificationDir || mRecordingStatistics) {
|
||||
|
||||
if(aDTD && aContextStackPos>1) {
|
||||
for (int i = 0; i < aContextStackPos-1; i++)
|
||||
if (!aDTD->CanContain(aContextStack[i],aContextStack[i+1])) {
|
||||
result = PR_FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mRecordingStatistics) {
|
||||
NoteVector(aContextStack,aContextStackPos,result);
|
||||
}
|
||||
|
||||
if(0!=mVerificationDir) {
|
||||
char path[2048];
|
||||
strcpy(path,mVerificationDir);
|
||||
|
||||
int i=0;
|
||||
for(i=0;i<aContextStackPos;i++){
|
||||
strcat(path,"/");
|
||||
const char* name=GetTagName(aContextStack[i]);
|
||||
strcat(path,name);
|
||||
PR_MkDir(path,0);
|
||||
}
|
||||
if(PR_FALSE==result){
|
||||
static PRBool rnd_initialized = PR_FALSE;
|
||||
|
||||
if (!rnd_initialized) {
|
||||
// seed randomn number generator to aid in temp file
|
||||
// creation.
|
||||
rnd_initialized = PR_TRUE;
|
||||
srand((unsigned)time(NULL));
|
||||
}
|
||||
|
||||
// generate a filename to dump the html source into
|
||||
char filename[1024];
|
||||
do {
|
||||
// use system time to generate a temporary file name
|
||||
time_t ltime;
|
||||
time (<ime);
|
||||
// add in random number so that we can create uniques names
|
||||
// faster than simply every second.
|
||||
ltime += (time_t)rand();
|
||||
sprintf(filename,"%s/%lX.html", path, ltime);
|
||||
// try until we find one we can create
|
||||
} while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS);
|
||||
|
||||
// check to see if we already recorded an instance of this particular
|
||||
// bad vector.
|
||||
if (!DebugRecord(path, aURLRef, filename))
|
||||
{
|
||||
// save file to directory indicated by bad context vector
|
||||
PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0);
|
||||
// if we were able to open the debug file, then
|
||||
// write the true URL at the top of the file.
|
||||
if (debugFile) {
|
||||
// dump the html source into the newly created file.
|
||||
PRofstream ps;
|
||||
ps.attach(debugFile);
|
||||
if (aParser)
|
||||
aParser->DebugDumpSource(ps);
|
||||
PR_Close(debugFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
327
mozilla/parser/htmlparser/src/nsTokenizer.cpp
Normal file
327
mozilla/parser/htmlparser/src/nsTokenizer.cpp
Normal file
@ -0,0 +1,327 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
|
||||
#include <fstream.h>
|
||||
#include "nsTokenizer.h"
|
||||
#include "nsToken.h"
|
||||
#include "nsScanner.h"
|
||||
#include "nsIParserFilter.h"
|
||||
#include "nsIURL.h"
|
||||
|
||||
static void TokenFreeProc(void * pToken)
|
||||
{
|
||||
if (pToken!=NULL) {
|
||||
CToken * pCToken = (CToken*)pToken;
|
||||
delete pCToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mParserFilter = aIFilter;
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aURL,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mParserFilter = aIFilter;
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aFilename,aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aFilename -- name of file to be tokenized
|
||||
* @param aDelegate -- ref to delegate to be used to tokenize
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode, nsIParserFilter * aIFilter) :
|
||||
mTokenDeque(PR_TRUE,TokenFreeProc) {
|
||||
mParserFilter = aIFilter;
|
||||
mDelegate=aDelegate;
|
||||
mScanner=new CScanner(aMode);
|
||||
mParseMode=aMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* default destructor
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
CTokenizer::~CTokenizer() {
|
||||
delete mScanner;
|
||||
mDelegate->Destroy();
|
||||
mScanner=0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(nsString& aBuffer) {
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/21/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool CTokenizer::Append(const char* aBuffer, PRInt32 aLen){
|
||||
if(mScanner)
|
||||
return mScanner->Append(aBuffer,aLen);
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
nsDeque& CTokenizer::GetDeque(void) {
|
||||
return mTokenDeque;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 CTokenizer::GetToken(CToken*& aToken) {
|
||||
PRInt32 result=mDelegate->GetToken(*mScanner,aToken);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the number of elements in the deque
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return int containing element count
|
||||
*/
|
||||
PRInt32 CTokenizer::GetSize(void) {
|
||||
return mTokenDeque.GetSize();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Part of the code sandwich, this gets called right before
|
||||
* the tokenization process begins. The main reason for
|
||||
* this call is to allow the delegate to do initialization.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRBool CTokenizer::WillTokenize(PRBool aIncremental){
|
||||
PRBool result=PR_TRUE;
|
||||
result=mDelegate->WillTokenize(aIncremental);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(nsString& aSourceBuffer,PRBool appendTokens){
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
|
||||
WillTokenize(PR_TRUE);
|
||||
|
||||
while(kNoError==result) {
|
||||
result=GetToken(theToken);
|
||||
if(theToken && (kNoError==result)) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
PRBool bWillAdd = PR_TRUE;
|
||||
if (mParserFilter)
|
||||
bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
|
||||
if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
else if (theToken)
|
||||
delete theToken;
|
||||
}
|
||||
if(kEOF==result)
|
||||
result=kNoError;
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the primary control routine. It iteratively
|
||||
* consumes tokens until an error occurs or you run out
|
||||
* of data.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return error code
|
||||
*/
|
||||
PRInt32 CTokenizer::Tokenize(int anIteration) {
|
||||
CToken* theToken=0;
|
||||
PRInt32 result=kNoError;
|
||||
PRBool done=(0==anIteration) ? (!WillTokenize(PR_TRUE)) : PR_FALSE;
|
||||
|
||||
|
||||
while((PR_FALSE==done) && (kNoError==result)) {
|
||||
mScanner->Mark();
|
||||
result=GetToken(theToken);
|
||||
if(kNoError==result) {
|
||||
if(theToken) {
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
theToken->DebugDumpToken(cout);
|
||||
#endif
|
||||
|
||||
PRBool bWillAdd = PR_TRUE;
|
||||
if (mParserFilter)
|
||||
bWillAdd = (PRBool)mParserFilter->WillAddToken(*theToken);
|
||||
if(bWillAdd && mDelegate->WillAddToken(*theToken)) {
|
||||
mTokenDeque.Push(theToken);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if(theToken)
|
||||
delete theToken;
|
||||
mScanner->RewindToMark();
|
||||
}
|
||||
}
|
||||
if((PR_TRUE==done) && (kInterrupted!=result))
|
||||
DidTokenize(PR_TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool CTokenizer::DidTokenize(PRBool aIncremental) {
|
||||
PRBool result=mDelegate->DidTokenize(aIncremental);
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
DebugDumpTokens(cout);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpTokens(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpToken(out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::DebugDumpSource(ostream& out) {
|
||||
nsDequeIterator b=mTokenDeque.Begin();
|
||||
nsDequeIterator e=mTokenDeque.End();
|
||||
|
||||
CToken* theToken;
|
||||
while(b!=e) {
|
||||
theToken=(CToken*)(b++);
|
||||
theToken->DebugDumpSource(out);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void CTokenizer::SelfTest(void) {
|
||||
#ifdef _DEBUG
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
185
mozilla/parser/htmlparser/src/nsTokenizer.h
Normal file
185
mozilla/parser/htmlparser/src/nsTokenizer.h
Normal file
@ -0,0 +1,185 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.0 (the "NPL"); you may not use this file except in
|
||||
* compliance with the NPL. You may obtain a copy of the NPL at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
||||
* for the specific language governing rights and limitations under the
|
||||
* NPL.
|
||||
*
|
||||
* The Initial Developer of this code under the NPL is Netscape
|
||||
* Communications Corporation. Portions created by Netscape are
|
||||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* MODULE NOTES:
|
||||
* LAST MODS: gess 28Feb98
|
||||
*
|
||||
* This file declares the basic tokenizer class. The
|
||||
* central theme of this class is to control and
|
||||
* coordinate a tokenization process. Note that this
|
||||
* class is grammer-neutral: this class doesn't care
|
||||
* at all what the underlying stream consists of.
|
||||
*
|
||||
* The main purpose of this class is to iterate over an
|
||||
* input stream with the help of a given scanner and a
|
||||
* given type-specific tokenizer-Delegate.
|
||||
*
|
||||
* The primary method here is the tokenize() method, which
|
||||
* simple loops calling getToken() until an EOF condition
|
||||
* (or some other error) occurs.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TOKENIZER
|
||||
#define TOKENIZER
|
||||
|
||||
#include "nsToken.h"
|
||||
#include "nsITokenizerDelegate.h"
|
||||
#include "nsDeque.h"
|
||||
#include <iostream.h>
|
||||
|
||||
class CScanner;
|
||||
class nsIURL;
|
||||
class nsIParserFilter;
|
||||
|
||||
class CTokenizer {
|
||||
public:
|
||||
|
||||
CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
|
||||
CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
|
||||
CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode=eParseMode_navigator, nsIParserFilter * aIFilter = 0);
|
||||
|
||||
~CTokenizer();
|
||||
|
||||
/**
|
||||
* This method incrementally tokenizes as much content as
|
||||
* it can get its hands on.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(int anIteration); //your friendly incremental version
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return TRUE if it's ok to proceed
|
||||
*/
|
||||
PRInt32 Tokenize(nsString& aSourceBuffer,PRBool appendTokens=PR_TRUE);
|
||||
|
||||
/**
|
||||
* Cause the tokenizer to consume the next token, and
|
||||
* return an error result.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param anError -- ref to error code
|
||||
* @return new token or null
|
||||
*/
|
||||
PRInt32 GetToken(CToken*& aToken);
|
||||
|
||||
/**
|
||||
* Retrieve the number of elements in the deque
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @return int containing element count
|
||||
*/
|
||||
PRInt32 GetSize(void);
|
||||
|
||||
/**
|
||||
* Retrieve a reference to the internal token deque.
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
nsDeque& GetDeque(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
PRBool Append(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
*
|
||||
* @update gess 4/20/98
|
||||
* @return deque reference
|
||||
*/
|
||||
PRBool Append(const char* aBuffer, PRInt32 aLen);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @update gess 5/13/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
PRBool SetBuffer(nsString& aBuffer);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpSource(ostream& out);
|
||||
|
||||
/**
|
||||
* This debug routine is used to cause the tokenizer to
|
||||
* iterate its token list, asking each token to dump its
|
||||
* contents to the given output stream.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void DebugDumpTokens(ostream& out);
|
||||
|
||||
static void SelfTest();
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* This is the front-end of the code sandwich for the
|
||||
* tokenization process. It gets called once just before
|
||||
* tokenziation begins.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aIncremental tells us if tokenization is incremental
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool WillTokenize(PRBool aIncremental);
|
||||
|
||||
|
||||
/**
|
||||
* This is the tail-end of the code sandwich for the
|
||||
* tokenization process. It gets called once tokenziation
|
||||
* has completed.
|
||||
*
|
||||
* @update gess 3/25/98
|
||||
* @param aIncremental tells us if tokenization was incremental
|
||||
* @return TRUE if all went well
|
||||
*/
|
||||
PRBool DidTokenize(PRBool aIncremental);
|
||||
|
||||
ITokenizerDelegate* mDelegate;
|
||||
CScanner* mScanner;
|
||||
nsDeque mTokenDeque;
|
||||
eParseMode mParseMode;
|
||||
nsIParserFilter* mParserFilter;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user