diff --git a/mozilla/htmlparser/src/CNavDTD.cpp b/mozilla/htmlparser/src/CNavDTD.cpp index ecfe599d4bb..e3301a0e259 100644 --- a/mozilla/htmlparser/src/CNavDTD.cpp +++ b/mozilla/htmlparser/src/CNavDTD.cpp @@ -842,8 +842,12 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{ PRBool CNavDTD::VerifyContextVector(PRInt32* aVector,PRInt32 aCount) const { PRBool result=PR_TRUE; - if(aCount>0) { - + if(aCount>1) { + for (int i = 0; i < aCount-1; i++) + if (!CanContain(aVector[i],aVector[i+1])) { + result = PR_FALSE; + break; + } } return result; } diff --git a/mozilla/htmlparser/src/CNavDelegate.cpp b/mozilla/htmlparser/src/CNavDelegate.cpp index 0eb2be51551..44044201c98 100644 --- a/mozilla/htmlparser/src/CNavDelegate.cpp +++ b/mozilla/htmlparser/src/CNavDelegate.cpp @@ -31,6 +31,13 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -40,7 +47,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ CNavDelegate::CNavDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/mozilla/htmlparser/src/COtherDelegate.cpp b/mozilla/htmlparser/src/COtherDelegate.cpp index 1728b49dee3..321c2ee1c0b 100644 --- a/mozilla/htmlparser/src/COtherDelegate.cpp +++ b/mozilla/htmlparser/src/COtherDelegate.cpp @@ -32,6 +32,14 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} + /** * Default constructor * @@ -40,7 +48,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ COtherDelegate::COtherDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/mozilla/htmlparser/src/makefile.win b/mozilla/htmlparser/src/makefile.win index 348e3a4f1ca..58213d2e54c 100644 --- a/mozilla/htmlparser/src/makefile.win +++ b/mozilla/htmlparser/src/makefile.win @@ -28,7 +28,7 @@ CPPSRCS=nsHTMLContentSink.cpp \ nsToken.cpp nsTokenizer.cpp nsTokenHandler.cpp \ CNavDTD.cpp CNavDelegate.cpp \ COtherDTD.cpp COtherDelegate.cpp \ - nsHTMLParser.cpp + nsHTMLParser.cpp prstrm.cpp EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \ nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h @@ -39,7 +39,8 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\nsHTMLParser.obj \ .\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \ .\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \ - .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj + .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj \ + .\$(OBJDIR)\prstrm.obj LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib diff --git a/mozilla/htmlparser/src/nsHTMLParser.cpp b/mozilla/htmlparser/src/nsHTMLParser.cpp index 59eaf1d06f6..ba375429ef9 100644 --- a/mozilla/htmlparser/src/nsHTMLParser.cpp +++ b/mozilla/htmlparser/src/nsHTMLParser.cpp @@ -33,10 +33,13 @@ #include "prio.h" #include "plstr.h" #include +#include "prstrm.h" #include "nsIInputStream.h" #ifdef XP_PC #include //this is here for debug reasons... #endif +#include +#include "prmem.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); @@ -49,10 +52,20 @@ static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* gVerificationOutputDir=0; +static PRBool gRecordingStatistics=PR_TRUE; static char* gURLRef=0; static int rickGDebug=0; static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream +extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir) +{ + gVerificationOutputDir = verify_dir; +} + +extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval) +{ + gRecordingStatistics = bval; +} /** * This method is defined in nsIParser. It is used to @@ -73,7 +86,6 @@ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult) return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult); } - /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be @@ -367,6 +379,298 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { return old; } +/** + * This debug method records an invalid context vector and it's + * associated context vector and URL in a simple flat file mapping which + * resides in the verification directory and is named context.map + * + * @update jevering 6/06/98 + * @param path is the directory structure indicating the bad context vector + * @param pURLRef is the associated URL + * @param filename to record mapping to if not already recorded + * @return TRUE if it is already record (dont rerecord) + */ + +#define CONTEXT_VECTOR_MAP "/vector.map" +#define CONTEXT_VECTOR_STAT "/vector.stat" +#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" +PRBool DebugRecord(char * path, char * pURLRef, char * filename) +{ + char recordPath[2048]; + PRIntn oflags = 0; + + // create the record file name from the verification director + // and the default name. + strcpy(recordPath,gVerificationOutputDir); + strcat(recordPath,CONTEXT_VECTOR_MAP); + + // create the file exists, only open for read/write + // otherwise, create it + if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) + oflags = PR_CREATE_FILE; + oflags |= PR_RDWR; + + // open the record file + PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); + + if (recordFile) { + + char * string = (char *)PR_Malloc(2048); + PRBool found = PR_FALSE; + + // vectors are stored on the format iof "URL vector filename" + // where the vector contains the verification path and + // the filename contains the debug source dump + sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); + + // get the file size, read in the file and parse it line at + // a time to check to see if we have already recorded this + // occurance + + PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); + if (iSize) { + + char * buffer = (char*)PR_Malloc(iSize); + char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); + if (buffer!=NULL && string!=NULL) { + PRInt32 ibufferpos, istringpos; + + // beginning of file for read + PR_Seek(recordFile,0,PR_SEEK_SET); + PR_Read(recordFile,buffer,iSize); + + // run through the file looking for a matching vector + for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) + { + // compare string once we have hit the end of the line + if (buffer[ibufferpos] == '\r') { + stringbuf[istringpos] = '\0'; + istringpos = 0; + // skip newline and space + ibufferpos++; + + if (PL_strlen(stringbuf)) { + char * space; + // chop of the filename for compare + if ((space = PL_strrchr(stringbuf, ' '))!=NULL) + *space = '\0'; + + // we have already recorded this one, free up, and return + if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { + PR_Free(buffer); + PR_Free(stringbuf); + PR_Free(string); + return PR_TRUE; + } + } + } + + // build up the compare string + else + stringbuf[istringpos++] = buffer[ibufferpos]; + } + + // throw away the record file data + PR_Free(buffer); + PR_Free(stringbuf); + } + } + + // if this bad vector was not recorded, add it to record file + + if (!found) { + PR_Seek(recordFile,0,PR_SEEK_END); + PR_Write(recordFile,string,PL_strlen(string)); + } + + PR_Close(recordFile); + PR_Free(string); + } + + // vector was not recorded + return PR_FALSE; +} + +// structure to store the vector statistic information + +typedef struct vector_info { + PRInt32 references; // number of occurances counted + PRInt32 count; // number of tags in the vector + PRBool good_vector; // is this a valid vector? + PRInt32 * vector; // and the vector +} VectorInfo; + +// global table for storing vector statistics and the size +static VectorInfo ** gVectorInfoArray = 0; +static PRInt32 gVectorCount = 0; + +// the statistic vector table grows each time it exceeds this +// stepping value +#define TABLE_SIZE 128 + +// compare function for quick sort. Compares references and +// sorts in decending order + +static int compare( const void *arg1, const void *arg2 ) +{ + VectorInfo ** p1 = (VectorInfo**)arg1; + VectorInfo ** p2 = (VectorInfo**)arg2; + return (*p2)->references - (*p1)->references; +} + +/** + * quick sort the statistic array causing the most frequently + * used vectors to be at the top (this makes it a little speedier + * when looking them up) + */ + +void SortVectorRecord(void) +{ + // of course, sort it only if there is something to sort + if (gVectorCount) { + qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); + } +} + +/** + * This debug routines stores statistical information about a + * context vector. The context vector statistics are stored in + * a global array. The table is resorted each time it grows to + * aid in lookup speed. If a vector has already been noted, its + * reference count is bumped, otherwise it is added to the table + * + * @update jevering 6/11/98 + * @param aTags is the tag list (vector) + * @param count is the size of the vector + * @return + */ + +void NoteVector(PRInt32 aTags[],PRInt32 count, PRBool good_vector) +{ + // if the table doesn't exist, create it + if (!gVectorInfoArray) { + gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); + } + else { + // attempt to look up the vector + for (PRInt32 i = 0; i < gVectorCount; i++) + + // check the vector only if they are the same size, if they + // match then just return without doing further work + if (gVectorInfoArray[i]->count == count) + if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(PRInt32)*count)) { + + // bzzzt. and we have a winner.. bump the ref count + gVectorInfoArray[i]->references++; + return; + } + } + + // the context vector hasn't been noted, so allocate it and + // initialize it one.. add it to the table + VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); + pVectorInfo->references = 1; + pVectorInfo->count = count; + pVectorInfo->good_vector = good_vector; + pVectorInfo->vector = (PRInt32*)PR_Malloc(count*sizeof(PRInt32)); + memcpy(pVectorInfo->vector,aTags,sizeof(PRInt32)*count); + gVectorInfoArray[gVectorCount++] = pVectorInfo; + + // have we maxed out the table? grow it.. sort it.. love it. + if ((gVectorCount % TABLE_SIZE) == 0) { + gVectorInfoArray = (VectorInfo**)realloc( + gVectorInfoArray, + (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); + SortVectorRecord(); + } +} + +void MakeVectorString(char * vector_string, VectorInfo * pInfo) +{ + sprintf (vector_string, "%6d ", pInfo->references); + for (PRInt32 j = 0; j < pInfo->count; j++) { + PL_strcat(vector_string, "<"); + PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); + PL_strcat(vector_string, ">"); + } + PL_strcat(vector_string,"\r\n"); +} + +/** + * This debug routine dumps out the vector statistics to a text + * file in the verification directory and defaults to the name + * "vector.stat". It contains all parsed context vectors and there + * occurance count sorted in decending order. + * + * @update jevering 6/11/98 + * @param + * @return + */ + +extern "C" NS_EXPORT void DumpVectorRecord(void) +{ + // do we have a table? + if (gVectorCount) { + + // hopefully, they wont exceed 1K. + char vector_string[1024]; + char path[1024]; + + path[0] = '\0'; + + // put in the verification directory.. else the root + if (gVerificationOutputDir) + strcpy(path,gVerificationOutputDir); + + strcat(path,CONTEXT_VECTOR_STAT); + + // open the stat file creaming any existing stat file + PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); + if (statisticFile) { + + PRInt32 i; + PRofstream ps; + ps.attach(statisticFile); + + // oh what the heck, sort it again + SortVectorRecord(); + + // cute little header + sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); + ps << vector_string; + + ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; + ps << VECTOR_TABLE_HEADER; + + // dump out the bad vectors encountered + for (i = 0; i < gVectorCount; i++) { + if (!gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + } + + ps << "\r\n\r\nValid context vector summary\r\n"; + ps << VECTOR_TABLE_HEADER; + + // take a big vector table dump (good vectors) + for (i = 0; i < gVectorCount; i++) { + if (gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + // free em up. they mean nothing to me now (I'm such a user) + PR_Free(gVectorInfoArray[i]); + } + } + + // ok, we are done with the table, free it up as well + PR_Free(gVectorInfoArray); + gVectorInfoArray = 0; + gVectorCount = 0; + PR_Close(statisticFile); + } +} /** * This debug method allows us to determine whether or not @@ -378,47 +682,72 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { * @param aDTD is the DTD we plan to ask for verification * @return TRUE if we know how to handle it, else false */ -PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { + +PRBool VerifyContextVector(CTokenizer * tokenizer, PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { PRBool result=PR_TRUE; + //ok, now see if we understand this vector + + if(0!=gVerificationOutputDir || gRecordingStatistics) + result=aDTD->VerifyContextVector(aTags,count); + + if (gRecordingStatistics) { + NoteVector(aTags,count,result); + } + if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; + char path[2048]; strcpy(path,gVerificationOutputDir); -#endif int i=0; for(i=0;iVerifyContextVector(aTags,count); - if(PR_FALSE==result){ -#ifdef NS_WIN32 - // save file to directory indicated by bad context vector - int iCount = 1; + static PRBool rnd_initialized = PR_FALSE; + + if (!rnd_initialized) { + // seed randomn number generator to aid in temp file + // creation. + rnd_initialized = PR_TRUE; + srand((unsigned)time(NULL)); + } + + // generate a filename to dump the html source into char filename[_MAX_PATH]; do { - sprintf(filename,"%s/html%04d.dbg", path, iCount++); + // use system time to generate a temporary file name + time_t ltime; + time (<ime); + // add in random number so that we can create uniques names + // faster than simply every second. + ltime += (time_t)rand(); + sprintf(filename,"%s/%lX.html", path, ltime); + // try until we find one we can create } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS); - PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); - if (debugFile) { - PR_Write(debugFile,gURLRef,PL_strlen(gURLRef)); - PR_Write(debugFile,"\n",PL_strlen("\n")); - PR_Close(debugFile); + + // check to see if we already recorded an instance of this particular + // bad vector. + if (!DebugRecord(path,gURLRef, filename)) + { + // save file to directory indicated by bad context vector + PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); + // if we were able to open the debug file, then + // write the true URL at the top of the file. + if (debugFile) { + // dump the html source into the newly created file. + if (tokenizer) { + PRofstream ps; + ps.attach(debugFile); + tokenizer->DebugDumpSource(ps); + } + PR_Close(debugFile); + } } -#endif - //add debugging code here to record the fact that we just encountered - //a context vector we don't know how to handle. } } @@ -454,7 +783,7 @@ PRInt32 nsHTMLParser::IterateTokens() { if(aHandler) { theMarkPos=*mCurrentPos; result=(*aHandler)(theToken,this); - VerifyContextVector(mContextStack,mContextStackPos,mDTD); + VerifyContextVector(mTokenizer, mContextStack,mContextStackPos,mDTD); } ++(*mCurrentPos); } diff --git a/mozilla/htmlparser/src/nsTokenizer.cpp b/mozilla/htmlparser/src/nsTokenizer.cpp index 62c6967e78a..8d37467c794 100644 --- a/mozilla/htmlparser/src/nsTokenizer.cpp +++ b/mozilla/htmlparser/src/nsTokenizer.cpp @@ -23,6 +23,13 @@ #include "nsScanner.h" #include "nsIURL.h" +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -33,7 +40,7 @@ * @return */ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aURL,aMode); mParseMode=aMode; @@ -48,7 +55,7 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo * @return */ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aFilename,aMode); mParseMode=aMode; @@ -63,7 +70,7 @@ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,ePars * @return */ CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aMode); mParseMode=aMode; diff --git a/mozilla/htmlparser/src/prstrm.cpp b/mozilla/htmlparser/src/prstrm.cpp new file mode 100644 index 00000000000..0b09b4f938f --- /dev/null +++ b/mozilla/htmlparser/src/prstrm.cpp @@ -0,0 +1,343 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ +#include "prtypes.h" +#include "prstrm.h" +#include + +const PRIntn STRM_BUFSIZ = 8192; + +PRfilebuf::PRfilebuf(): +_fd(0), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd): +streambuf(), +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen): +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ + PRfilebuf::setbuf(buffptr, bufflen); +} + +PRfilebuf::~PRfilebuf() +{ + if (_opened){ + close(); + }else + sync(); + if (_allocated) + delete base(); +} + +PRfilebuf* +PRfilebuf::open(const char *name, int mode, int flags) +{ + if (_fd != 0) + return 0; // error if already open + PRIntn PRmode = 0; + // translate mode argument + if (!(mode & ios::nocreate)) + PRmode |= PR_CREATE_FILE; + //if (mode & ios::noreplace) + // PRmode |= O_EXCL; + if (mode & ios::app){ + mode |= ios::out; + PRmode |= PR_APPEND; + } + if (mode & ios::trunc){ + mode |= ios::out; // IMPLIED + PRmode |= PR_TRUNCATE; + } + if (mode & ios::out){ + if (mode & ios::in) + PRmode |= PR_RDWR; + else + PRmode |= PR_WRONLY; + if (!(mode & (ios::in|ios::app|ios::ate|ios::noreplace))){ + mode |= ios::trunc; // IMPLIED + PRmode |= PR_TRUNCATE; + } + }else if (mode & ios::in) + PRmode |= PR_RDONLY; + else + return 0; // error if not ios:in or ios::out + + + // + // The usual portable across unix crap... + // NT gets a hokey piece of junk layer that prevents + // access to the API. +#ifdef WIN32 + _fd = PR_Open(name, PRmode, PRmode); +#else + _fd = PR_Open(name, PRmode, flags); +#endif + if (_fd == 0) + return 0; + _opened = PR_TRUE; + if ((!unbuffered()) && (!ebuf())){ + char * sbuf = new char[STRM_BUFSIZ]; + if (!sbuf) + unbuffered(1); + else{ + _allocated = PR_TRUE; + streambuf::setb(sbuf,sbuf+STRM_BUFSIZ,0); + } + } + if (mode & ios::ate){ + if (seekoff(0,ios::end,mode)==EOF){ + close(); + return 0; + } + } + return this; +} + +PRfilebuf* +PRfilebuf::attach(PRFileDesc *fd) +{ + _opened = PR_FALSE; + _fd = fd; + return this; +} + +int +PRfilebuf::overflow(int c) +{ + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) // sync before new buffer created below + return EOF; + + if (!unbuffered()) + setp(base(),ebuf()); + + if (c!=EOF){ + if ((!unbuffered()) && (pptr() < epptr())) // guard against recursion + sputc(c); + else{ + if (PR_Write(_fd, &c, 1)!=1) + return(EOF); + } + } + return(1); // return something other than EOF if successful +} + +int +PRfilebuf::underflow() +{ + int count; + unsigned char tbuf; + + if (in_avail()) + return (int)(unsigned char) *gptr(); + + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) + return EOF; + + if (unbuffered()) + { + if (PR_Read(_fd,(void *)&tbuf,1)<=0) + return EOF; + return (int)tbuf; + } + + if ((count=PR_Read(_fd,(void *)base(),blen())) <= 0) + return EOF; // reached EOF + setg(base(),base(),base()+count); + return (int)(unsigned char) *gptr(); +} + +streambuf* +PRfilebuf::setbuf(char *buffptr, int bufflen) +{ + if (is_open() && (ebuf())) + return 0; + if ((!buffptr) || (bufflen <= 0)) + unbuffered(1); + else + setb(buffptr, buffptr+bufflen, 0); + return this; +} + +streampos +PRfilebuf::seekoff(streamoff offset, ios::seek_dir dir, int /* mode */) +{ + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + PRSeekWhence fdir; + PRInt32 retpos; + switch (dir) { + case ios::beg : + fdir = PR_SEEK_SET; + break; + case ios::cur : + fdir = PR_SEEK_CUR; + break; + case ios::end : + fdir = PR_SEEK_END; + break; + default: + // error + return(EOF); + } + + if (PRfilebuf::sync()==EOF) + return EOF; + if ((retpos=PR_Seek(_fd, offset, fdir))==-1L) + return (EOF); + return((streampos)retpos); + }else + return (EOF); +} + + +int +PRfilebuf::sync() +{ + PRInt32 count; + + if (_fd==0) + return(EOF); + + if (!unbuffered()){ + // Sync write area + if ((count=out_waiting())!=0){ + PRInt32 nout; + if ((nout =PR_Write(_fd, + (void *) pbase(), + (unsigned int)count)) != count){ + if (nout > 0) { + // should set _pptr -= nout + pbump(-(int)nout); + memmove(pbase(), pbase()+nout, (int)(count-nout)); + } + return(EOF); + } + } + setp(0,0); // empty put area + + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + // Sockets can't seek; don't need this + if ((count=in_avail()) > 0){ + if (PR_Seek(_fd, -count, PR_SEEK_CUR)!=-1L) + { + return (EOF); + } + } + } + setg(0,0,0); // empty get area + } + return(0); +} + +PRfilebuf * +PRfilebuf::close() +{ + int retval; + if (_fd==0) + return 0; + + retval = sync(); + + if ((PR_Close(_fd)==0) || (retval==EOF)) + return 0; + _fd = 0; + return this; +} + +PRofstream::PRofstream(): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd): +ostream(new PRfilebuf(fd)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd, char *buff, int bufflen): +ostream(new PRfilebuf(fd, buff, bufflen)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(const char *name, int mode, int flags): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); + if (!rdbuf()->open(name, (mode|ios::out), flags)) + clear(rdstate() | ios::failbit); +} + +PRofstream::~PRofstream() +{ + flush(); + + delete rdbuf(); +#ifdef _PRSTR_BP + _PRSTR_BP = 0; +#endif +} + +streambuf * +PRofstream::setbuf(char * ptr, int len) +{ + if ((is_open()) || (!(rdbuf()->setbuf(ptr, len)))){ + clear(rdstate() | ios::failbit); + return 0; + } + return rdbuf(); +} + +void +PRofstream::attach(PRFileDesc *fd) +{ + if (!(rdbuf()->attach(fd))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::open(const char * name, int mode, int flags) +{ + if (is_open() || !(rdbuf()->open(name, (mode|ios::out), flags))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::close() +{ + clear((rdbuf()->close()) ? 0 : (rdstate() | ios::failbit)); +} + + + diff --git a/mozilla/htmlparser/src/prstrm.h b/mozilla/htmlparser/src/prstrm.h new file mode 100644 index 00000000000..d19957889ea --- /dev/null +++ b/mozilla/htmlparser/src/prstrm.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ + +// The originals are in: nsprpub/lib/pstreams/ +// currently not being built into nspr.. these files will go away. + +#ifndef __PRSTRM +#define __PRSTRM + +#include "prtypes.h" +#include "prio.h" +#include + +#if defined(__GNUC__) +#define _PRSTR_BP _strbuf +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#elif defined(WIN32) +#define _PRSTR_BP bp +#define _PRSTR_DELBUF(x) delbuf(x) +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#elif defined(OSF1) +#define _PRSTR_BP m_psb +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#else +#define _PRSTR_BP bp +// Unix compilers don't believe in encapsulation +// At least on Solaris this is also ignored +#define _PRSTR_DELBUF(x) delbuf = x +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#endif + +class PR_IMPLEMENT(PRfilebuf): public streambuf +{ +public: + PRfilebuf(); + PRfilebuf(PRFileDesc *fd); + PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen); + ~PRfilebuf(); + virtual int overflow(int=EOF); + virtual int underflow(); + virtual streambuf *setbuf(char *buff, int bufflen); + virtual streampos seekoff(streamoff, ios::seek_dir, int); + virtual int sync(); + PRfilebuf *open(const char *name, int mode, int flags); + PRfilebuf *attach(PRFileDesc *fd); + PRfilebuf *close(); + int is_open() const {return (_fd != 0);} + PRFileDesc *fd(){return _fd;} + +private: + PRFileDesc * _fd; + PRBool _opened; + PRBool _allocated; +}; + + +class PR_IMPLEMENT(PRofstream) : public ostream { +public: + PRofstream(); + PRofstream(const char *, int mode=ios::out, int flags = 0); + PRofstream(PRFileDesc *); + PRofstream(PRFileDesc *, char *, int); + ~PRofstream(); + + streambuf * setbuf(char *, int); + PRfilebuf* rdbuf() { return (PRfilebuf*) ios::rdbuf(); } + + void attach(PRFileDesc *); + PRFileDesc *fd() {return rdbuf()->fd();} + + int is_open(){return rdbuf()->is_open();} + void open(const char *, int =ios::out, int = 0); + void close(); +}; + +#endif /* __PRSTRM */ \ No newline at end of file diff --git a/mozilla/parser/htmlparser/src/CNavDTD.cpp b/mozilla/parser/htmlparser/src/CNavDTD.cpp index ecfe599d4bb..e3301a0e259 100644 --- a/mozilla/parser/htmlparser/src/CNavDTD.cpp +++ b/mozilla/parser/htmlparser/src/CNavDTD.cpp @@ -842,8 +842,12 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{ PRBool CNavDTD::VerifyContextVector(PRInt32* aVector,PRInt32 aCount) const { PRBool result=PR_TRUE; - if(aCount>0) { - + if(aCount>1) { + for (int i = 0; i < aCount-1; i++) + if (!CanContain(aVector[i],aVector[i+1])) { + result = PR_FALSE; + break; + } } return result; } diff --git a/mozilla/parser/htmlparser/src/CNavDelegate.cpp b/mozilla/parser/htmlparser/src/CNavDelegate.cpp index 0eb2be51551..44044201c98 100644 --- a/mozilla/parser/htmlparser/src/CNavDelegate.cpp +++ b/mozilla/parser/htmlparser/src/CNavDelegate.cpp @@ -31,6 +31,13 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -40,7 +47,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ CNavDelegate::CNavDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/mozilla/parser/htmlparser/src/COtherDelegate.cpp b/mozilla/parser/htmlparser/src/COtherDelegate.cpp index 1728b49dee3..321c2ee1c0b 100644 --- a/mozilla/parser/htmlparser/src/COtherDelegate.cpp +++ b/mozilla/parser/htmlparser/src/COtherDelegate.cpp @@ -32,6 +32,14 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} + /** * Default constructor * @@ -40,7 +48,7 @@ static char gIdentChars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY * @return */ COtherDelegate::COtherDelegate() : - ITokenizerDelegate(), mTokenDeque() { + ITokenizerDelegate(), mTokenDeque(PR_TRUE,TokenFreeProc) { } /** diff --git a/mozilla/parser/htmlparser/src/makefile.win b/mozilla/parser/htmlparser/src/makefile.win index 348e3a4f1ca..58213d2e54c 100644 --- a/mozilla/parser/htmlparser/src/makefile.win +++ b/mozilla/parser/htmlparser/src/makefile.win @@ -28,7 +28,7 @@ CPPSRCS=nsHTMLContentSink.cpp \ nsToken.cpp nsTokenizer.cpp nsTokenHandler.cpp \ CNavDTD.cpp CNavDelegate.cpp \ COtherDTD.cpp COtherDelegate.cpp \ - nsHTMLParser.cpp + nsHTMLParser.cpp prstrm.cpp EXPORTS=nshtmlpars.h nsIContentSink.h nsIHTMLContentSink.h \ nsHTMLTokens.h nsIParserNode.h nsIParser.h nsToken.h @@ -39,7 +39,8 @@ CPP_OBJS=.\$(OBJDIR)\nsHTMLContentSink.obj \ .\$(OBJDIR)\nsHTMLParser.obj \ .\$(OBJDIR)\nsHTMLTokens.obj .\$(OBJDIR)\nsParserNode.obj \ .\$(OBJDIR)\nsScanner.obj .\$(OBJDIR)\nsToken.obj \ - .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj + .\$(OBJDIR)\nsTokenizer.obj .\$(OBJDIR)\nsTokenHandler.obj \ + .\$(OBJDIR)\prstrm.obj LINCS=-I$(PUBLIC)\xpcom -I$(PUBLIC)\raptor -I$(PUBLIC)\netlib diff --git a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp index 59eaf1d06f6..ba375429ef9 100644 --- a/mozilla/parser/htmlparser/src/nsHTMLParser.cpp +++ b/mozilla/parser/htmlparser/src/nsHTMLParser.cpp @@ -33,10 +33,13 @@ #include "prio.h" #include "plstr.h" #include +#include "prstrm.h" #include "nsIInputStream.h" #ifdef XP_PC #include //this is here for debug reasons... #endif +#include +#include "prmem.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); @@ -49,10 +52,20 @@ static const char* kNullToken = "Error: Null token given"; static const char* kInvalidTagStackPos = "Error: invalid tag stack position"; static char* gVerificationOutputDir=0; +static PRBool gRecordingStatistics=PR_TRUE; static char* gURLRef=0; static int rickGDebug=0; static const int gTransferBufferSize=4096; //size of the buffer used in moving data from iistream +extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir) +{ + gVerificationOutputDir = verify_dir; +} + +extern "C" NS_EXPORT void SetRecordStatistics(PRBool bval) +{ + gRecordingStatistics = bval; +} /** * This method is defined in nsIParser. It is used to @@ -73,7 +86,6 @@ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult) return it->QueryInterface(kIParserIID, (void **) aInstancePtrResult); } - /** * This big dispatch method is used to route token handler calls to the right place. * What's wrong with it? This table, and the dispatch methods themselves need to be @@ -367,6 +379,298 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { return old; } +/** + * This debug method records an invalid context vector and it's + * associated context vector and URL in a simple flat file mapping which + * resides in the verification directory and is named context.map + * + * @update jevering 6/06/98 + * @param path is the directory structure indicating the bad context vector + * @param pURLRef is the associated URL + * @param filename to record mapping to if not already recorded + * @return TRUE if it is already record (dont rerecord) + */ + +#define CONTEXT_VECTOR_MAP "/vector.map" +#define CONTEXT_VECTOR_STAT "/vector.stat" +#define VECTOR_TABLE_HEADER "count vector\r\n====== =============================================\r\n" +PRBool DebugRecord(char * path, char * pURLRef, char * filename) +{ + char recordPath[2048]; + PRIntn oflags = 0; + + // create the record file name from the verification director + // and the default name. + strcpy(recordPath,gVerificationOutputDir); + strcat(recordPath,CONTEXT_VECTOR_MAP); + + // create the file exists, only open for read/write + // otherwise, create it + if(PR_Access(recordPath,PR_ACCESS_EXISTS) != PR_SUCCESS) + oflags = PR_CREATE_FILE; + oflags |= PR_RDWR; + + // open the record file + PRFileDesc * recordFile = PR_Open(recordPath,oflags,0); + + if (recordFile) { + + char * string = (char *)PR_Malloc(2048); + PRBool found = PR_FALSE; + + // vectors are stored on the format iof "URL vector filename" + // where the vector contains the verification path and + // the filename contains the debug source dump + sprintf(string,"%s %s %s\r\n", pURLRef, path, filename); + + // get the file size, read in the file and parse it line at + // a time to check to see if we have already recorded this + // occurance + + PRInt32 iSize = PR_Seek(recordFile,0,PR_SEEK_END); + if (iSize) { + + char * buffer = (char*)PR_Malloc(iSize); + char * stringbuf = (char*)PR_Calloc(sizeof(char*),2048); + if (buffer!=NULL && string!=NULL) { + PRInt32 ibufferpos, istringpos; + + // beginning of file for read + PR_Seek(recordFile,0,PR_SEEK_SET); + PR_Read(recordFile,buffer,iSize); + + // run through the file looking for a matching vector + for (ibufferpos = istringpos = 0; ibufferpos < iSize; ibufferpos++) + { + // compare string once we have hit the end of the line + if (buffer[ibufferpos] == '\r') { + stringbuf[istringpos] = '\0'; + istringpos = 0; + // skip newline and space + ibufferpos++; + + if (PL_strlen(stringbuf)) { + char * space; + // chop of the filename for compare + if ((space = PL_strrchr(stringbuf, ' '))!=NULL) + *space = '\0'; + + // we have already recorded this one, free up, and return + if (!PL_strncmp(string,stringbuf,PL_strlen(stringbuf))) { + PR_Free(buffer); + PR_Free(stringbuf); + PR_Free(string); + return PR_TRUE; + } + } + } + + // build up the compare string + else + stringbuf[istringpos++] = buffer[ibufferpos]; + } + + // throw away the record file data + PR_Free(buffer); + PR_Free(stringbuf); + } + } + + // if this bad vector was not recorded, add it to record file + + if (!found) { + PR_Seek(recordFile,0,PR_SEEK_END); + PR_Write(recordFile,string,PL_strlen(string)); + } + + PR_Close(recordFile); + PR_Free(string); + } + + // vector was not recorded + return PR_FALSE; +} + +// structure to store the vector statistic information + +typedef struct vector_info { + PRInt32 references; // number of occurances counted + PRInt32 count; // number of tags in the vector + PRBool good_vector; // is this a valid vector? + PRInt32 * vector; // and the vector +} VectorInfo; + +// global table for storing vector statistics and the size +static VectorInfo ** gVectorInfoArray = 0; +static PRInt32 gVectorCount = 0; + +// the statistic vector table grows each time it exceeds this +// stepping value +#define TABLE_SIZE 128 + +// compare function for quick sort. Compares references and +// sorts in decending order + +static int compare( const void *arg1, const void *arg2 ) +{ + VectorInfo ** p1 = (VectorInfo**)arg1; + VectorInfo ** p2 = (VectorInfo**)arg2; + return (*p2)->references - (*p1)->references; +} + +/** + * quick sort the statistic array causing the most frequently + * used vectors to be at the top (this makes it a little speedier + * when looking them up) + */ + +void SortVectorRecord(void) +{ + // of course, sort it only if there is something to sort + if (gVectorCount) { + qsort((void*)gVectorInfoArray,(size_t)gVectorCount,sizeof(VectorInfo*),compare); + } +} + +/** + * This debug routines stores statistical information about a + * context vector. The context vector statistics are stored in + * a global array. The table is resorted each time it grows to + * aid in lookup speed. If a vector has already been noted, its + * reference count is bumped, otherwise it is added to the table + * + * @update jevering 6/11/98 + * @param aTags is the tag list (vector) + * @param count is the size of the vector + * @return + */ + +void NoteVector(PRInt32 aTags[],PRInt32 count, PRBool good_vector) +{ + // if the table doesn't exist, create it + if (!gVectorInfoArray) { + gVectorInfoArray = (VectorInfo**)PR_Calloc(TABLE_SIZE,sizeof(VectorInfo*)); + } + else { + // attempt to look up the vector + for (PRInt32 i = 0; i < gVectorCount; i++) + + // check the vector only if they are the same size, if they + // match then just return without doing further work + if (gVectorInfoArray[i]->count == count) + if (!memcmp(gVectorInfoArray[i]->vector, aTags, sizeof(PRInt32)*count)) { + + // bzzzt. and we have a winner.. bump the ref count + gVectorInfoArray[i]->references++; + return; + } + } + + // the context vector hasn't been noted, so allocate it and + // initialize it one.. add it to the table + VectorInfo * pVectorInfo = (VectorInfo*)PR_Malloc(sizeof(VectorInfo)); + pVectorInfo->references = 1; + pVectorInfo->count = count; + pVectorInfo->good_vector = good_vector; + pVectorInfo->vector = (PRInt32*)PR_Malloc(count*sizeof(PRInt32)); + memcpy(pVectorInfo->vector,aTags,sizeof(PRInt32)*count); + gVectorInfoArray[gVectorCount++] = pVectorInfo; + + // have we maxed out the table? grow it.. sort it.. love it. + if ((gVectorCount % TABLE_SIZE) == 0) { + gVectorInfoArray = (VectorInfo**)realloc( + gVectorInfoArray, + (sizeof(VectorInfo*)*((gVectorCount/TABLE_SIZE)+1)*TABLE_SIZE)); + SortVectorRecord(); + } +} + +void MakeVectorString(char * vector_string, VectorInfo * pInfo) +{ + sprintf (vector_string, "%6d ", pInfo->references); + for (PRInt32 j = 0; j < pInfo->count; j++) { + PL_strcat(vector_string, "<"); + PL_strcat(vector_string, (const char *)GetTagName(pInfo->vector[j])); + PL_strcat(vector_string, ">"); + } + PL_strcat(vector_string,"\r\n"); +} + +/** + * This debug routine dumps out the vector statistics to a text + * file in the verification directory and defaults to the name + * "vector.stat". It contains all parsed context vectors and there + * occurance count sorted in decending order. + * + * @update jevering 6/11/98 + * @param + * @return + */ + +extern "C" NS_EXPORT void DumpVectorRecord(void) +{ + // do we have a table? + if (gVectorCount) { + + // hopefully, they wont exceed 1K. + char vector_string[1024]; + char path[1024]; + + path[0] = '\0'; + + // put in the verification directory.. else the root + if (gVerificationOutputDir) + strcpy(path,gVerificationOutputDir); + + strcat(path,CONTEXT_VECTOR_STAT); + + // open the stat file creaming any existing stat file + PRFileDesc * statisticFile = PR_Open(path,PR_CREATE_FILE|PR_RDWR,0); + if (statisticFile) { + + PRInt32 i; + PRofstream ps; + ps.attach(statisticFile); + + // oh what the heck, sort it again + SortVectorRecord(); + + // cute little header + sprintf(vector_string,"Context vector occurance results. Processed %d unique vectors.\r\n\r\n", gVectorCount); + ps << vector_string; + + ps << "Invalid context vector summary (see " CONTEXT_VECTOR_STAT ") for mapping.\r\n"; + ps << VECTOR_TABLE_HEADER; + + // dump out the bad vectors encountered + for (i = 0; i < gVectorCount; i++) { + if (!gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + } + + ps << "\r\n\r\nValid context vector summary\r\n"; + ps << VECTOR_TABLE_HEADER; + + // take a big vector table dump (good vectors) + for (i = 0; i < gVectorCount; i++) { + if (gVectorInfoArray[i]->good_vector) { + MakeVectorString(vector_string, gVectorInfoArray[i]); + ps << vector_string; + } + // free em up. they mean nothing to me now (I'm such a user) + PR_Free(gVectorInfoArray[i]); + } + } + + // ok, we are done with the table, free it up as well + PR_Free(gVectorInfoArray); + gVectorInfoArray = 0; + gVectorCount = 0; + PR_Close(statisticFile); + } +} /** * This debug method allows us to determine whether or not @@ -378,47 +682,72 @@ nsIContentSink* nsHTMLParser::SetContentSink(nsIContentSink* aSink) { * @param aDTD is the DTD we plan to ask for verification * @return TRUE if we know how to handle it, else false */ -PRBool VerifyContextVector(PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { + +PRBool VerifyContextVector(CTokenizer * tokenizer, PRInt32 aTags[],PRInt32 count,nsIDTD* aDTD) { PRBool result=PR_TRUE; + //ok, now see if we understand this vector + + if(0!=gVerificationOutputDir || gRecordingStatistics) + result=aDTD->VerifyContextVector(aTags,count); + + if (gRecordingStatistics) { + NoteVector(aTags,count,result); + } + if(0!=gVerificationOutputDir) { - -#ifdef XP_PC - char path[_MAX_PATH+1]; + char path[2048]; strcpy(path,gVerificationOutputDir); -#endif int i=0; for(i=0;iVerifyContextVector(aTags,count); - if(PR_FALSE==result){ -#ifdef NS_WIN32 - // save file to directory indicated by bad context vector - int iCount = 1; + static PRBool rnd_initialized = PR_FALSE; + + if (!rnd_initialized) { + // seed randomn number generator to aid in temp file + // creation. + rnd_initialized = PR_TRUE; + srand((unsigned)time(NULL)); + } + + // generate a filename to dump the html source into char filename[_MAX_PATH]; do { - sprintf(filename,"%s/html%04d.dbg", path, iCount++); + // use system time to generate a temporary file name + time_t ltime; + time (<ime); + // add in random number so that we can create uniques names + // faster than simply every second. + ltime += (time_t)rand(); + sprintf(filename,"%s/%lX.html", path, ltime); + // try until we find one we can create } while (PR_Access(filename,PR_ACCESS_EXISTS) == PR_SUCCESS); - PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); - if (debugFile) { - PR_Write(debugFile,gURLRef,PL_strlen(gURLRef)); - PR_Write(debugFile,"\n",PL_strlen("\n")); - PR_Close(debugFile); + + // check to see if we already recorded an instance of this particular + // bad vector. + if (!DebugRecord(path,gURLRef, filename)) + { + // save file to directory indicated by bad context vector + PRFileDesc * debugFile = PR_Open(filename,PR_CREATE_FILE|PR_RDWR,0); + // if we were able to open the debug file, then + // write the true URL at the top of the file. + if (debugFile) { + // dump the html source into the newly created file. + if (tokenizer) { + PRofstream ps; + ps.attach(debugFile); + tokenizer->DebugDumpSource(ps); + } + PR_Close(debugFile); + } } -#endif - //add debugging code here to record the fact that we just encountered - //a context vector we don't know how to handle. } } @@ -454,7 +783,7 @@ PRInt32 nsHTMLParser::IterateTokens() { if(aHandler) { theMarkPos=*mCurrentPos; result=(*aHandler)(theToken,this); - VerifyContextVector(mContextStack,mContextStackPos,mDTD); + VerifyContextVector(mTokenizer, mContextStack,mContextStackPos,mDTD); } ++(*mCurrentPos); } diff --git a/mozilla/parser/htmlparser/src/nsTokenizer.cpp b/mozilla/parser/htmlparser/src/nsTokenizer.cpp index 62c6967e78a..8d37467c794 100644 --- a/mozilla/parser/htmlparser/src/nsTokenizer.cpp +++ b/mozilla/parser/htmlparser/src/nsTokenizer.cpp @@ -23,6 +23,13 @@ #include "nsScanner.h" #include "nsIURL.h" +static void TokenFreeProc(void * pToken) +{ + if (pToken!=NULL) { + CToken * pCToken = (CToken*)pToken; + delete pCToken; + } +} /** * Default constructor @@ -33,7 +40,7 @@ * @return */ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aURL,aMode); mParseMode=aMode; @@ -48,7 +55,7 @@ CTokenizer::CTokenizer(nsIURL* aURL,ITokenizerDelegate* aDelegate,eParseMode aMo * @return */ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aFilename,aMode); mParseMode=aMode; @@ -63,7 +70,7 @@ CTokenizer::CTokenizer(const char* aFilename,ITokenizerDelegate* aDelegate,ePars * @return */ CTokenizer::CTokenizer(ITokenizerDelegate* aDelegate,eParseMode aMode) : - mTokenDeque() { + mTokenDeque(PR_TRUE,TokenFreeProc) { mDelegate=aDelegate; mScanner=new CScanner(aMode); mParseMode=aMode; diff --git a/mozilla/parser/htmlparser/src/prstrm.cpp b/mozilla/parser/htmlparser/src/prstrm.cpp new file mode 100644 index 00000000000..0b09b4f938f --- /dev/null +++ b/mozilla/parser/htmlparser/src/prstrm.cpp @@ -0,0 +1,343 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ +#include "prtypes.h" +#include "prstrm.h" +#include + +const PRIntn STRM_BUFSIZ = 8192; + +PRfilebuf::PRfilebuf(): +_fd(0), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd): +streambuf(), +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ +} + +PRfilebuf::PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen): +_fd(fd), +_opened(PR_FALSE), +_allocated(PR_FALSE) +{ + PRfilebuf::setbuf(buffptr, bufflen); +} + +PRfilebuf::~PRfilebuf() +{ + if (_opened){ + close(); + }else + sync(); + if (_allocated) + delete base(); +} + +PRfilebuf* +PRfilebuf::open(const char *name, int mode, int flags) +{ + if (_fd != 0) + return 0; // error if already open + PRIntn PRmode = 0; + // translate mode argument + if (!(mode & ios::nocreate)) + PRmode |= PR_CREATE_FILE; + //if (mode & ios::noreplace) + // PRmode |= O_EXCL; + if (mode & ios::app){ + mode |= ios::out; + PRmode |= PR_APPEND; + } + if (mode & ios::trunc){ + mode |= ios::out; // IMPLIED + PRmode |= PR_TRUNCATE; + } + if (mode & ios::out){ + if (mode & ios::in) + PRmode |= PR_RDWR; + else + PRmode |= PR_WRONLY; + if (!(mode & (ios::in|ios::app|ios::ate|ios::noreplace))){ + mode |= ios::trunc; // IMPLIED + PRmode |= PR_TRUNCATE; + } + }else if (mode & ios::in) + PRmode |= PR_RDONLY; + else + return 0; // error if not ios:in or ios::out + + + // + // The usual portable across unix crap... + // NT gets a hokey piece of junk layer that prevents + // access to the API. +#ifdef WIN32 + _fd = PR_Open(name, PRmode, PRmode); +#else + _fd = PR_Open(name, PRmode, flags); +#endif + if (_fd == 0) + return 0; + _opened = PR_TRUE; + if ((!unbuffered()) && (!ebuf())){ + char * sbuf = new char[STRM_BUFSIZ]; + if (!sbuf) + unbuffered(1); + else{ + _allocated = PR_TRUE; + streambuf::setb(sbuf,sbuf+STRM_BUFSIZ,0); + } + } + if (mode & ios::ate){ + if (seekoff(0,ios::end,mode)==EOF){ + close(); + return 0; + } + } + return this; +} + +PRfilebuf* +PRfilebuf::attach(PRFileDesc *fd) +{ + _opened = PR_FALSE; + _fd = fd; + return this; +} + +int +PRfilebuf::overflow(int c) +{ + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) // sync before new buffer created below + return EOF; + + if (!unbuffered()) + setp(base(),ebuf()); + + if (c!=EOF){ + if ((!unbuffered()) && (pptr() < epptr())) // guard against recursion + sputc(c); + else{ + if (PR_Write(_fd, &c, 1)!=1) + return(EOF); + } + } + return(1); // return something other than EOF if successful +} + +int +PRfilebuf::underflow() +{ + int count; + unsigned char tbuf; + + if (in_avail()) + return (int)(unsigned char) *gptr(); + + if (allocate()==EOF) // make sure there is a reserve area + return EOF; + if (PRfilebuf::sync()==EOF) + return EOF; + + if (unbuffered()) + { + if (PR_Read(_fd,(void *)&tbuf,1)<=0) + return EOF; + return (int)tbuf; + } + + if ((count=PR_Read(_fd,(void *)base(),blen())) <= 0) + return EOF; // reached EOF + setg(base(),base(),base()+count); + return (int)(unsigned char) *gptr(); +} + +streambuf* +PRfilebuf::setbuf(char *buffptr, int bufflen) +{ + if (is_open() && (ebuf())) + return 0; + if ((!buffptr) || (bufflen <= 0)) + unbuffered(1); + else + setb(buffptr, buffptr+bufflen, 0); + return this; +} + +streampos +PRfilebuf::seekoff(streamoff offset, ios::seek_dir dir, int /* mode */) +{ + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + PRSeekWhence fdir; + PRInt32 retpos; + switch (dir) { + case ios::beg : + fdir = PR_SEEK_SET; + break; + case ios::cur : + fdir = PR_SEEK_CUR; + break; + case ios::end : + fdir = PR_SEEK_END; + break; + default: + // error + return(EOF); + } + + if (PRfilebuf::sync()==EOF) + return EOF; + if ((retpos=PR_Seek(_fd, offset, fdir))==-1L) + return (EOF); + return((streampos)retpos); + }else + return (EOF); +} + + +int +PRfilebuf::sync() +{ + PRInt32 count; + + if (_fd==0) + return(EOF); + + if (!unbuffered()){ + // Sync write area + if ((count=out_waiting())!=0){ + PRInt32 nout; + if ((nout =PR_Write(_fd, + (void *) pbase(), + (unsigned int)count)) != count){ + if (nout > 0) { + // should set _pptr -= nout + pbump(-(int)nout); + memmove(pbase(), pbase()+nout, (int)(count-nout)); + } + return(EOF); + } + } + setp(0,0); // empty put area + + if (PR_GetDescType(_fd) == PR_DESC_FILE){ + // Sockets can't seek; don't need this + if ((count=in_avail()) > 0){ + if (PR_Seek(_fd, -count, PR_SEEK_CUR)!=-1L) + { + return (EOF); + } + } + } + setg(0,0,0); // empty get area + } + return(0); +} + +PRfilebuf * +PRfilebuf::close() +{ + int retval; + if (_fd==0) + return 0; + + retval = sync(); + + if ((PR_Close(_fd)==0) || (retval==EOF)) + return 0; + _fd = 0; + return this; +} + +PRofstream::PRofstream(): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd): +ostream(new PRfilebuf(fd)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(PRFileDesc *fd, char *buff, int bufflen): +ostream(new PRfilebuf(fd, buff, bufflen)) +{ + _PRSTR_DELBUF(0); +} + +PRofstream::PRofstream(const char *name, int mode, int flags): +ostream(new PRfilebuf) +{ + _PRSTR_DELBUF(0); + if (!rdbuf()->open(name, (mode|ios::out), flags)) + clear(rdstate() | ios::failbit); +} + +PRofstream::~PRofstream() +{ + flush(); + + delete rdbuf(); +#ifdef _PRSTR_BP + _PRSTR_BP = 0; +#endif +} + +streambuf * +PRofstream::setbuf(char * ptr, int len) +{ + if ((is_open()) || (!(rdbuf()->setbuf(ptr, len)))){ + clear(rdstate() | ios::failbit); + return 0; + } + return rdbuf(); +} + +void +PRofstream::attach(PRFileDesc *fd) +{ + if (!(rdbuf()->attach(fd))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::open(const char * name, int mode, int flags) +{ + if (is_open() || !(rdbuf()->open(name, (mode|ios::out), flags))) + clear(rdstate() | ios::failbit); +} + +void +PRofstream::close() +{ + clear((rdbuf()->close()) ? 0 : (rdstate() | ios::failbit)); +} + + + diff --git a/mozilla/parser/htmlparser/src/prstrm.h b/mozilla/parser/htmlparser/src/prstrm.h new file mode 100644 index 00000000000..d19957889ea --- /dev/null +++ b/mozilla/parser/htmlparser/src/prstrm.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * The contents of this file are subject to the Netscape Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is Netscape Communications + * Corporation. Portions created by Netscape are Copyright (C) 1998 + * Netscape Communications Corporation. All Rights Reserved. + */ + +// The originals are in: nsprpub/lib/pstreams/ +// currently not being built into nspr.. these files will go away. + +#ifndef __PRSTRM +#define __PRSTRM + +#include "prtypes.h" +#include "prio.h" +#include + +#if defined(__GNUC__) +#define _PRSTR_BP _strbuf +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#elif defined(WIN32) +#define _PRSTR_BP bp +#define _PRSTR_DELBUF(x) delbuf(x) +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#elif defined(OSF1) +#define _PRSTR_BP m_psb +#define _PRSTR_DELBUF(x) /* as nothing */ +#define _PRSTR_DELBUF_C(c, x) /* as nothing */ +#else +#define _PRSTR_BP bp +// Unix compilers don't believe in encapsulation +// At least on Solaris this is also ignored +#define _PRSTR_DELBUF(x) delbuf = x +#define _PRSTR_DELBUF_C(c, x) c::_PRSTR_DELBUF(x) +#endif + +class PR_IMPLEMENT(PRfilebuf): public streambuf +{ +public: + PRfilebuf(); + PRfilebuf(PRFileDesc *fd); + PRfilebuf(PRFileDesc *fd, char * buffptr, int bufflen); + ~PRfilebuf(); + virtual int overflow(int=EOF); + virtual int underflow(); + virtual streambuf *setbuf(char *buff, int bufflen); + virtual streampos seekoff(streamoff, ios::seek_dir, int); + virtual int sync(); + PRfilebuf *open(const char *name, int mode, int flags); + PRfilebuf *attach(PRFileDesc *fd); + PRfilebuf *close(); + int is_open() const {return (_fd != 0);} + PRFileDesc *fd(){return _fd;} + +private: + PRFileDesc * _fd; + PRBool _opened; + PRBool _allocated; +}; + + +class PR_IMPLEMENT(PRofstream) : public ostream { +public: + PRofstream(); + PRofstream(const char *, int mode=ios::out, int flags = 0); + PRofstream(PRFileDesc *); + PRofstream(PRFileDesc *, char *, int); + ~PRofstream(); + + streambuf * setbuf(char *, int); + PRfilebuf* rdbuf() { return (PRfilebuf*) ios::rdbuf(); } + + void attach(PRFileDesc *); + PRFileDesc *fd() {return rdbuf()->fd();} + + int is_open(){return rdbuf()->is_open();} + void open(const char *, int =ios::out, int = 0); + void close(); +}; + +#endif /* __PRSTRM */ \ No newline at end of file