66985 - Do not open BODY for INPUT type=hidden. For compatibility 72639 - Discard nested FORMS 2749 - Enable Strict comment parsing. Replace PRBools with bit operation. nsScanner Append() should return nsresult not PRBool. r=heikki sr=jst git-svn-id: svn://10.0.0.236/trunk@90964 18797224-902f-48f8-a5cc-f745e15eee43
372 lines
12 KiB
C++
372 lines
12 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
|
|
|
|
/**
|
|
* MODULE NOTES:
|
|
* @update gess 4/1/98
|
|
*
|
|
* The scanner is a low-level service class that knows
|
|
* how to consume characters out of an (internal) stream.
|
|
* This class also offers a series of utility methods
|
|
* that most tokenizers want, such as readUntil(),
|
|
* readWhile() and SkipWhitespace().
|
|
*/
|
|
|
|
|
|
#ifndef SCANNER
|
|
#define SCANNER
|
|
|
|
#include "nsString.h"
|
|
#include "nsIParser.h"
|
|
#include "prtypes.h"
|
|
#include "nsIUnicodeDecoder.h"
|
|
#include "nsFileStream.h"
|
|
#include "nsSlidingString.h"
|
|
|
|
class nsScannerString : public nsSlidingString {
|
|
public:
|
|
nsScannerString(PRUnichar* aStorageStart,
|
|
PRUnichar* aDataEnd,
|
|
PRUnichar* aStorageEnd);
|
|
|
|
virtual void UngetReadable(const nsAReadableString& aReadable, const nsReadingIterator<PRUnichar>& aCurrentPosition) { InsertReadable(aReadable,aCurrentPosition); }
|
|
virtual void ReplaceCharacter(nsReadingIterator<PRUnichar>& aPosition,
|
|
PRUnichar aChar);
|
|
};
|
|
|
|
class nsScanner {
|
|
public:
|
|
|
|
/**
|
|
* Use this constructor if you want i/o to be based on
|
|
* a single string you hand in during construction.
|
|
* This short cut was added for Javascript.
|
|
*
|
|
* @update ftang 3/02/99
|
|
* @param aCharset charset
|
|
* @param aCharsetSource - where the charset info came from
|
|
* @param aMode represents the parser mode (nav, other)
|
|
* @return
|
|
*/
|
|
nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource);
|
|
|
|
/**
|
|
* Use this constructor if you want i/o to be based on
|
|
* a file (therefore a stream) or just data you provide via Append().
|
|
*
|
|
* @update ftang 3/02/99
|
|
* @param aCharset charset
|
|
* @param aCharsetSource - where the charset info came from
|
|
* @param aMode represents the parser mode (nav, other)
|
|
* @return
|
|
*/
|
|
nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource);
|
|
|
|
/**
|
|
* Use this constructor if you want i/o to be stream based.
|
|
*
|
|
* @update ftang 3/02/99
|
|
* @param aCharset charset
|
|
* @param aCharsetSource - where the charset info came from
|
|
* @param aMode represents the parser mode (nav, other)
|
|
* @return
|
|
*/
|
|
nsScanner(nsString& aFilename, nsInputStream& aStream, const nsString& aCharset, nsCharsetSource aSource);
|
|
|
|
|
|
~nsScanner();
|
|
|
|
/**
|
|
* retrieve next char from internal input stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param ch is the char to accept new value
|
|
* @return error code reflecting read status
|
|
*/
|
|
nsresult GetChar(PRUnichar& ch);
|
|
|
|
/**
|
|
* peek ahead to consume next char from scanner's internal
|
|
* input buffer
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param ch is the char to accept new value
|
|
* @return error code reflecting read status
|
|
*/
|
|
nsresult Peek(PRUnichar& ch, PRUint32 aOffset=0);
|
|
|
|
nsresult Peek(nsAWritableString& aStr, PRInt32 aNumChars);
|
|
|
|
/**
|
|
* Skip over chars as long as they're in aSkipSet
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param set of chars to be skipped
|
|
* @return error code
|
|
*/
|
|
nsresult SkipOver(nsString& SkipChars);
|
|
|
|
/**
|
|
* Skip over chars as long as they equal given char
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param char to be skipped
|
|
* @return error code
|
|
*/
|
|
nsresult SkipOver(PRUnichar aSkipChar);
|
|
|
|
/**
|
|
* Skip over chars until they're in aValidSet
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aValid set contains chars you're looking for
|
|
* @return error code
|
|
*/
|
|
nsresult SkipTo(nsString& aValidSet);
|
|
|
|
/**
|
|
* Skip over chars as long as they're in aSequence
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param contains sequence to be skipped
|
|
* @return error code
|
|
*/
|
|
nsresult SkipPast(nsString& aSequence);
|
|
|
|
/**
|
|
* Skip whitespace on scanner input stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return error status
|
|
*/
|
|
nsresult SkipWhitespace(void);
|
|
|
|
/**
|
|
* Determine if the scanner has reached EOF.
|
|
* This method can also cause the buffer to be filled
|
|
* if it happens to be empty
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return PR_TRUE upon eof condition
|
|
*/
|
|
nsresult Eof(void);
|
|
|
|
/**
|
|
* Consume characters until you find the terminal char
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString receives new data from stream
|
|
* @param addTerminal tells us whether to append terminal to aString
|
|
* @return error code
|
|
*/
|
|
nsresult GetIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE);
|
|
nsresult ReadIdentifier(nsString& aString,PRBool allowPunct=PR_FALSE);
|
|
nsresult ReadIdentifier(nsReadingIterator<PRUnichar>& aStart,
|
|
nsReadingIterator<PRUnichar>& aEnd,
|
|
PRBool allowPunct=PR_FALSE);
|
|
nsresult ReadNumber(nsString& aString);
|
|
nsresult ReadNumber(nsReadingIterator<PRUnichar>& aStart,
|
|
nsReadingIterator<PRUnichar>& aEnd);
|
|
nsresult ReadWhitespace(nsString& aString);
|
|
nsresult ReadWhitespace(nsReadingIterator<PRUnichar>& aStart,
|
|
nsReadingIterator<PRUnichar>& aEnd);
|
|
|
|
/**
|
|
* Consume characters until you find the terminal char
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString receives new data from stream
|
|
* @param aTerminal contains terminating char
|
|
* @param addTerminal tells us whether to append terminal to aString
|
|
* @return error code
|
|
*/
|
|
nsresult ReadUntil(nsString& aString,PRUnichar aTerminal,PRBool addTerminal);
|
|
|
|
/**
|
|
* Consume characters until you find one contained in given
|
|
* terminal set.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString receives new data from stream
|
|
* @param aTermSet contains set of terminating chars
|
|
* @param addTerminal tells us whether to append terminal to aString
|
|
* @return error code
|
|
*/
|
|
nsresult ReadUntil(nsString& aString,nsString& aTermSet,PRBool addTerminal);
|
|
nsresult ReadUntil(nsString& aString,nsCString& aTermSet,PRBool addTerminal);
|
|
nsresult ReadUntil(nsString& aString,const char* aTermSet,PRBool addTerminal);
|
|
nsresult ReadUntil(nsReadingIterator<PRUnichar>& aStart, nsReadingIterator<PRUnichar>& aEnd, nsString& aTerminalSet,PRBool addTerminal);
|
|
|
|
|
|
/**
|
|
* Consume characters while they're members of anInputSet
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString receives new data from stream
|
|
* @param anInputSet contains valid chars
|
|
* @param addTerminal tells us whether to append terminal to aString
|
|
* @return error code
|
|
*/
|
|
nsresult ReadWhile(nsString& aString,nsString& anInputSet,PRBool addTerminal);
|
|
|
|
/**
|
|
* Records current offset position in input stream. This allows us
|
|
* to back up to this point if the need should arise, such as when
|
|
* tokenization gets interrupted.
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void Mark(void);
|
|
|
|
/**
|
|
* Resets current offset position of input stream to marked position.
|
|
* This allows us to back up to this point if the need should arise,
|
|
* such as when tokenization gets interrupted.
|
|
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void RewindToMark(void);
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update harishd 01/12/99
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRBool UngetReadable(const nsAReadableString& aBuffer);
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/13/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult Append(const nsAReadableString& aBuffer);
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 5/21/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult Append(const char* aBuffer, PRUint32 aLen);
|
|
|
|
/**
|
|
* Call this to copy bytes out of the scanner that have not yet been consumed
|
|
* by the tokenization process.
|
|
*
|
|
* @update gess 5/12/98
|
|
* @param aCopyBuffer is where the scanner buffer will be copied to
|
|
* @return nada
|
|
*/
|
|
void CopyUnusedData(nsString& aCopyBuffer);
|
|
|
|
/**
|
|
* Retrieve the name of the file that the scanner is reading from.
|
|
* In some cases, it's just a given name, because the scanner isn't
|
|
* really reading from a file.
|
|
*
|
|
* @update gess 5/12/98
|
|
* @return
|
|
*/
|
|
nsString& GetFilename(void);
|
|
|
|
static void SelfTest();
|
|
|
|
/**
|
|
* Use this setter to change the scanner's unicode decoder
|
|
*
|
|
* @update ftang 3/02/99
|
|
* @param aCharset a normalized (alias resolved) charset name
|
|
* @param aCharsetSource- where the charset info came from
|
|
* @return
|
|
*/
|
|
nsresult SetDocumentCharset(const nsString& aCharset, nsCharsetSource aSource);
|
|
|
|
void BindSubstring(nsSlidingSubstring& aSubstring, const nsReadingIterator<PRUnichar>& aStart, const nsReadingIterator<PRUnichar>& aEnd);
|
|
void CurrentPosition(nsReadingIterator<PRUnichar>& aPosition);
|
|
void EndReading(nsReadingIterator<PRUnichar>& aPosition);
|
|
void SetPosition(nsReadingIterator<PRUnichar>& aPosition,
|
|
PRBool aTruncate = PR_FALSE,
|
|
PRBool aReverse = PR_FALSE);
|
|
void ReplaceCharacter(nsReadingIterator<PRUnichar>& aPosition,
|
|
PRUnichar aChar);
|
|
|
|
/**
|
|
* Internal method used to cause the internal buffer to
|
|
* be filled with data.
|
|
*
|
|
* @update gess4/3/98
|
|
*/
|
|
PRBool IsIncremental(void) {return mIncremental;}
|
|
void SetIncremental(PRBool anIncrValue) {mIncremental=anIncrValue;}
|
|
|
|
PRInt32 GetNewlinesSkipped(void) { return mNewlinesSkipped; }
|
|
|
|
protected:
|
|
|
|
|
|
enum {eBufferSizeThreshold=0x1000}; //4K
|
|
|
|
/**
|
|
* Internal method used to cause the internal buffer to
|
|
* be filled with data.
|
|
*
|
|
* @update gess4/3/98
|
|
*/
|
|
nsresult FillBuffer(void);
|
|
|
|
void AppendToBuffer(PRUnichar* aStorageStart,
|
|
PRUnichar* aDataEnd,
|
|
PRUnichar* aStorageEnd);
|
|
|
|
nsInputStream* mInputStream;
|
|
nsScannerString* mSlidingBuffer;
|
|
nsReadingIterator<PRUnichar> mCurrentPosition; // The position we will next read from in the scanner buffer
|
|
nsReadingIterator<PRUnichar> mMarkPosition; // The position last marked (we may rewind to here)
|
|
nsReadingIterator<PRUnichar> mEndPosition; // The current end of the scanner buffer
|
|
nsString mFilename;
|
|
PRUint32 mCountRemaining; // The number of bytes still to be read
|
|
// from the scanner buffer
|
|
PRUint32 mTotalRead;
|
|
PRBool mOwnsStream;
|
|
PRBool mIncremental;
|
|
nsCharsetSource mCharsetSource;
|
|
nsString mCharset;
|
|
nsIUnicodeDecoder *mUnicodeDecoder;
|
|
PRInt32 mNewlinesSkipped;
|
|
};
|
|
|
|
#endif
|
|
|
|
|