r=dveditz, sr=scc git-svn-id: svn://10.0.0.236/trunk@92666 18797224-902f-48f8-a5cc-f745e15eee43
921 lines
31 KiB
C++
921 lines
31 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
|
|
|
|
/**
|
|
* MODULE NOTES:
|
|
* @update gess 4/1/98
|
|
*
|
|
*/
|
|
|
|
#include "nsExpatTokenizer.h"
|
|
#include "nsScanner.h"
|
|
#include "nsDTDUtils.h"
|
|
#include "nsParserError.h"
|
|
#include "nsIParser.h"
|
|
#include "prlog.h"
|
|
|
|
#include "prmem.h"
|
|
#include "nsIUnicharInputStream.h"
|
|
#include "nsNetUtil.h"
|
|
#include "nsIServiceManager.h"
|
|
#include "nsCOMPtr.h"
|
|
#include "nsSpecialSystemDirectory.h"
|
|
#include "nsIURL.h"
|
|
|
|
typedef struct _XMLParserState {
|
|
XML_Parser parser;
|
|
nsScanner* scanner;
|
|
const PRUnichar* bufferStart;
|
|
const PRUnichar* bufferEnd;
|
|
nsReadingIterator<PRUnichar> currentIterator;
|
|
nsDeque* tokenDeque;
|
|
nsTokenAllocator* tokenAllocator;
|
|
nsString doctypeText;
|
|
PRBool indoctype;
|
|
nsString cdataText;
|
|
PRBool incdata;
|
|
} XMLParserState;
|
|
|
|
/************************************************************************
|
|
And now for the main class -- nsExpatTokenizer...
|
|
************************************************************************/
|
|
|
|
static NS_DEFINE_IID(kHTMLTokenizerIID, NS_HTMLTOKENIZER_IID);
|
|
static NS_DEFINE_IID(kClassIID, NS_EXPATTOKENIZER_IID);
|
|
|
|
static const char* kDTDDirectory = "dtd/";
|
|
static const char kHTMLNameSpaceURI[] = "http://www.w3.org/1999/xhtml";
|
|
|
|
const nsIID&
|
|
nsExpatTokenizer::GetIID()
|
|
{
|
|
return kClassIID;
|
|
}
|
|
|
|
|
|
const nsIID&
|
|
nsExpatTokenizer::GetCID()
|
|
{
|
|
static NS_DEFINE_IID(kCID, NS_EXPATTOKENIZER_CID);
|
|
return kCID;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method gets called as part of our COM-like interfaces.
|
|
* Its purpose is to create an interface to parser object
|
|
* of some type.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIID id of object to discover
|
|
* @param aInstancePtr ptr to newly discovered interface
|
|
* @return NS_xxx result code
|
|
*/
|
|
nsresult nsExpatTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|
{
|
|
if (NULL == aInstancePtr) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
|
|
if(aIID.Equals(NS_GET_IID(nsISupports))) { //do IUnknown...
|
|
*aInstancePtr = (nsExpatTokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(NS_GET_IID(nsITokenizer))) { //do ITokenizer base class...
|
|
*aInstancePtr = (nsITokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(kHTMLTokenizerIID)) { //do nsHTMLTokenizer base class...
|
|
*aInstancePtr = (nsHTMLTokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(kClassIID)) { //do this class...
|
|
*aInstancePtr = (nsExpatTokenizer*)(this);
|
|
}
|
|
else {
|
|
*aInstancePtr=0;
|
|
return NS_NOINTERFACE;
|
|
}
|
|
NS_ADDREF_THIS();
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This method is defined in nsIParser. It is used to
|
|
* cause the COM-like construction of an nsParser.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIParser** ptr to newly instantiated parser
|
|
* @return NS_xxx error result
|
|
*/
|
|
NS_HTMLPARS nsresult NS_New_Expat_Tokenizer(nsITokenizer** aInstancePtrResult) {
|
|
nsExpatTokenizer* it = new nsExpatTokenizer();
|
|
if (it == 0) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
return it->QueryInterface(kClassIID, (void **) aInstancePtrResult);
|
|
}
|
|
|
|
|
|
NS_IMPL_ADDREF(nsExpatTokenizer)
|
|
NS_IMPL_RELEASE(nsExpatTokenizer)
|
|
|
|
/**
|
|
* Sets up the callbacks and user data for the expat parser
|
|
* @update nra 2/24/99
|
|
* @param none
|
|
* @return none
|
|
*/
|
|
void nsExpatTokenizer::SetupExpatParser(void) {
|
|
if (mExpatParser) {
|
|
// Set up the callbacks
|
|
XML_SetElementHandler(mExpatParser, Tokenizer_HandleStartElement, Tokenizer_HandleEndElement);
|
|
XML_SetCharacterDataHandler(mExpatParser, Tokenizer_HandleCharacterData);
|
|
XML_SetProcessingInstructionHandler(mExpatParser, Tokenizer_HandleProcessingInstruction);
|
|
XML_SetDefaultHandlerExpand(mExpatParser, Tokenizer_HandleDefault);
|
|
XML_SetUnparsedEntityDeclHandler(mExpatParser, Tokenizer_HandleUnparsedEntityDecl);
|
|
XML_SetNotationDeclHandler(mExpatParser, Tokenizer_HandleNotationDecl);
|
|
XML_SetExternalEntityRefHandler(mExpatParser, Tokenizer_HandleExternalEntityRef);
|
|
XML_SetCommentHandler(mExpatParser, Tokenizer_HandleComment);
|
|
XML_SetUnknownEncodingHandler(mExpatParser, Tokenizer_HandleUnknownEncoding, NULL);
|
|
XML_SetCdataSectionHandler(mExpatParser, Tokenizer_HandleStartCdataSection,
|
|
Tokenizer_HandleEndCdataSection);
|
|
|
|
XML_SetDoctypeDeclHandler(mExpatParser, Tokenizer_HandleStartDoctypeDecl, Tokenizer_HandleEndDoctypeDecl);
|
|
|
|
// Set up the user data.
|
|
XML_SetUserData(mExpatParser, (void*) mState);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Default constructor
|
|
*
|
|
* @update gess 4/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsExpatTokenizer::nsExpatTokenizer(nsString* aURL) : nsHTMLTokenizer() {
|
|
NS_INIT_REFCNT();
|
|
mBytesParsed = 0;
|
|
mState = new XMLParserState;
|
|
mState->tokenAllocator = nsnull;
|
|
mState->parser = nsnull;
|
|
mState->tokenDeque = nsnull;
|
|
mState->indoctype = PR_FALSE;
|
|
mState->incdata = PR_FALSE;
|
|
|
|
nsAutoString buffer; buffer.AssignWithConversion("UTF-16");
|
|
const PRUnichar* encoding = buffer.GetUnicode();
|
|
if (encoding) {
|
|
mExpatParser = XML_ParserCreate((const XML_Char*) encoding);
|
|
if (mExpatParser) {
|
|
#ifdef XML_DTD
|
|
XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
|
#endif
|
|
if (aURL)
|
|
XML_SetBase(mExpatParser, (const XML_Char*) aURL->GetUnicode());
|
|
|
|
SetupExpatParser();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Destructor
|
|
*
|
|
* @update gess 4/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsExpatTokenizer::~nsExpatTokenizer(){
|
|
if (mExpatParser) {
|
|
XML_ParserFree(mExpatParser);
|
|
mExpatParser = nsnull;
|
|
}
|
|
|
|
if (mState)
|
|
delete mState;
|
|
}
|
|
|
|
|
|
/*******************************************************************
|
|
Here begins the real working methods for the tokenizer.
|
|
*******************************************************************/
|
|
|
|
nsresult nsExpatTokenizer::WillTokenize(PRBool aIsFinalChunk,nsTokenAllocator* aTokenAllocator)
|
|
{
|
|
mState->tokenAllocator=aTokenAllocator;
|
|
return nsHTMLTokenizer::WillTokenize(aIsFinalChunk,aTokenAllocator);
|
|
}
|
|
|
|
/*
|
|
* Parameters:
|
|
*
|
|
* aSourceBuffer (in): String buffer.
|
|
* aLength (in): Length of input buffer.
|
|
* aOffset (in): Offset in buffer
|
|
* aLine (out): Line on which the character, aSourceBuffer[aOffset], is located.
|
|
*/
|
|
void nsExpatTokenizer::GetLine(const char* aSourceBuffer, PRUint32 aLength,
|
|
PRUint32 aOffset, nsString& aLine)
|
|
{
|
|
/* Figure out the line inside aSourceBuffer that contains character specified by aOffset.
|
|
Copy it into aLine. */
|
|
NS_ASSERTION(aOffset >= 0 && aOffset < aLength, "?");
|
|
/* Assert that the byteIndex and the length of the buffer is even */
|
|
NS_ASSERTION(aOffset % 2 == 0 && aLength % 2 == 0, "?");
|
|
PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the start of the line */
|
|
PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the end of the line */
|
|
PRUint32 startIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'start' pointer into the buffer */
|
|
PRUint32 endIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'end' pointer into the buffer */
|
|
PRUint32 numCharsInBuffer = aLength / sizeof(PRUnichar);
|
|
PRBool reachedStart;
|
|
PRBool reachedEnd;
|
|
|
|
|
|
/* Use start to find the first new line before the error position and
|
|
end to find the first new line after the error position */
|
|
reachedStart = (startIndex <= 0 || '\n' == *start || '\r' == *start);
|
|
reachedEnd = (endIndex >= numCharsInBuffer || '\n' == *end || '\r' == *end);
|
|
while (!reachedStart || !reachedEnd) {
|
|
if (!reachedStart) {
|
|
start--;
|
|
startIndex--;
|
|
reachedStart = (startIndex <= 0 || '\n' == *start || '\r' == *start);
|
|
}
|
|
if (!reachedEnd) {
|
|
end++;
|
|
endIndex++;
|
|
reachedEnd = (endIndex >= numCharsInBuffer || '\n' == *end || '\r' == *end);
|
|
}
|
|
}
|
|
|
|
aLine.Truncate(0);
|
|
if (startIndex == endIndex) {
|
|
/* Special case if the error is on a line where the only character is a newline */
|
|
// STRING USE WARNING: I have no idea what this is supposed to do; to me it looks like a no-op
|
|
// ... so I'm not going to delete it but I will fix it to conform to the new standard.
|
|
// aLine.Append("");
|
|
aLine.AppendWithConversion("");
|
|
}
|
|
else {
|
|
NS_ASSERTION(endIndex - startIndex >= sizeof(PRUnichar), "?");
|
|
/* At this point, there are two cases. Either the error is on the first line or
|
|
on subsequent lines. If the error is on the first line, startIndex will decrement
|
|
all the way to zero. If not, startIndex will decrement to the position of the
|
|
newline character on the previous line. So, in the first case, the start position
|
|
of the error line = startIndex (== 0). In the second case, the start position of the
|
|
error line = startIndex + 1. In both cases, the end position of the error line will be
|
|
(endIndex - 1). */
|
|
PRUint32 startPosn = (startIndex <= 0) ? startIndex : startIndex + 1;
|
|
|
|
/* At this point, the substring starting at startPosn and ending at (endIndex - 1),
|
|
is the line on which the error occurred. Copy that substring into the error structure. */
|
|
const PRUnichar* unicodeBuffer = (const PRUnichar*) aSourceBuffer;
|
|
aLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn);
|
|
}
|
|
}
|
|
|
|
|
|
static nsresult
|
|
CreateErrorText(const nsParserError* aError, nsString& aErrorString)
|
|
{
|
|
aErrorString.AssignWithConversion("XML Parsing Error: ");
|
|
|
|
if (aError) {
|
|
aErrorString.Append(aError->description);
|
|
aErrorString.AppendWithConversion("\nLine Number ");
|
|
aErrorString.AppendInt(aError->lineNumber, 10);
|
|
aErrorString.AppendWithConversion(", Column ");
|
|
aErrorString.AppendInt(aError->colNumber, 10);
|
|
aErrorString.AppendWithConversion(":");
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
static nsresult
|
|
CreateSourceText(const nsParserError* aError, nsString& aSourceString)
|
|
{
|
|
PRInt32 errorPosition = aError->colNumber;
|
|
|
|
aSourceString.Append(aError->sourceLine);
|
|
aSourceString.AppendWithConversion("\n");
|
|
for (PRInt32 i = 0; i < errorPosition - 1; i++)
|
|
aSourceString.AppendWithConversion("-");
|
|
aSourceString.AppendWithConversion("^");
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/* Create and add the tokens in the following order to display the error:
|
|
ParserError start token
|
|
Text token containing error message
|
|
SourceText start token
|
|
Text token containing source text
|
|
SourceText end token
|
|
ParserError end token
|
|
*/
|
|
nsresult
|
|
nsExpatTokenizer::AddErrorMessageTokens(nsParserError* aError)
|
|
{
|
|
nsresult rv = NS_OK;
|
|
CToken* newToken = mState->tokenAllocator->CreateTokenOfType(eToken_start, eHTMLTag_parsererror);
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
CAttributeToken* attrToken = (CAttributeToken*)
|
|
mState->tokenAllocator->CreateTokenOfType(eToken_attribute, eHTMLTag_unknown, NS_ConvertASCIItoUCS2(kHTMLNameSpaceURI));
|
|
attrToken->SetKey(NS_LITERAL_STRING("xmlns"));
|
|
newToken->SetAttributeCount(1);
|
|
newToken = (CToken*) attrToken;
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
nsAutoString textStr;
|
|
CreateErrorText(aError, textStr);
|
|
newToken = mState->tokenAllocator->CreateTokenOfType(eToken_text, eHTMLTag_unknown, textStr);
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
newToken = mState->tokenAllocator->CreateTokenOfType(eToken_start, eHTMLTag_sourcetext);
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
textStr.Truncate();
|
|
CreateSourceText(aError, textStr);
|
|
newToken = mState->tokenAllocator->CreateTokenOfType(eToken_text, eHTMLTag_unknown,textStr);
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
newToken = mState->tokenAllocator->CreateTokenOfType(eToken_end, eHTMLTag_sourcetext);
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
newToken = mState->tokenAllocator->CreateTokenOfType(eToken_end, eHTMLTag_parsererror);
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
return rv;
|
|
}
|
|
|
|
/*
|
|
* Called immediately after an error has occurred in expat. Creates
|
|
* tokens to display the error and an error token to the token stream.
|
|
*
|
|
* The error tokens will end up creating the following content model
|
|
* in the content sink:
|
|
*
|
|
* <ParserError>
|
|
* XML Error: "contents of aError->description"
|
|
* Line Number: "contents of aError->lineNumber"
|
|
* <SourceText>
|
|
* "Contents of aError->sourceLine"
|
|
* "^ pointing at the error location"
|
|
* </SourceText>
|
|
* </ParserError>
|
|
*
|
|
*/
|
|
nsresult
|
|
nsExpatTokenizer::PushXMLErrorTokens(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal)
|
|
{
|
|
CErrorToken* errorToken= (CErrorToken *) mState->tokenAllocator->CreateTokenOfType(eToken_error, eHTMLTag_unknown);
|
|
nsParserError *error = new nsParserError;
|
|
nsresult rv = NS_OK;
|
|
|
|
if (error && errorToken) {
|
|
/* Fill in the values of the error token */
|
|
error->code = XML_GetErrorCode(mExpatParser);
|
|
error->lineNumber = XML_GetCurrentLineNumber(mExpatParser);
|
|
// Adjust the column number so that it is one based rather than zero based.
|
|
error->colNumber = XML_GetCurrentColumnNumber(mExpatParser) + 1;
|
|
error->description.AssignWithConversion(XML_ErrorString(error->code));
|
|
if (!aIsFinal) {
|
|
PRInt32 byteIndexRelativeToFile = 0;
|
|
byteIndexRelativeToFile = XML_GetCurrentByteIndex(mExpatParser);
|
|
GetLine(aBuffer, aLength, (byteIndexRelativeToFile - mBytesParsed), error->sourceLine);
|
|
}
|
|
else {
|
|
error->sourceLine.Append(mLastLine);
|
|
}
|
|
|
|
errorToken->SetError(error);
|
|
|
|
|
|
/* Add the error token */
|
|
CToken* newToken = (CToken*) errorToken;
|
|
AddToken(newToken, NS_OK, mState->tokenDeque, mState->tokenAllocator);
|
|
|
|
/* Add the error message tokens */
|
|
AddErrorMessageTokens(error);
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength, PRBool aIsFinal)
|
|
{
|
|
nsresult result=NS_OK;
|
|
NS_ASSERTION((aBuffer && aLength) || (aBuffer == nsnull && aLength == 0), "?");
|
|
if (mExpatParser) {
|
|
|
|
nsCOMPtr<nsExpatTokenizer> me=this;
|
|
|
|
if (!XML_Parse(mExpatParser, aBuffer, aLength, aIsFinal)) {
|
|
PushXMLErrorTokens(aBuffer, aLength, aIsFinal);
|
|
result=NS_ERROR_HTMLPARSER_STOPPARSING;
|
|
}
|
|
else if (aBuffer && aLength) {
|
|
// Cache the last line in the buffer
|
|
GetLine(aBuffer, aLength, aLength - sizeof(PRUnichar), mLastLine);
|
|
}
|
|
mBytesParsed += aLength;
|
|
}
|
|
else {
|
|
result = NS_ERROR_FAILURE;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method repeatedly called by the tokenizer.
|
|
* Each time, we determine the kind of token were about to
|
|
* read, and then we call the appropriate method to handle
|
|
* that token type.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsExpatTokenizer::ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens) {
|
|
|
|
// return nsHTMLTokenizer::ConsumeToken(aScanner);
|
|
|
|
// Ask the scanner to send us all the data it has
|
|
// scanned and pass that data to expat.
|
|
nsresult result = NS_OK;
|
|
nsReadingIterator<PRUnichar> start, end;
|
|
aScanner.CurrentPosition(start);
|
|
aScanner.EndReading(end);
|
|
mState->tokenDeque = &mTokenDeque;
|
|
mState->parser = mExpatParser;
|
|
mState->scanner = &aScanner;
|
|
|
|
while (start != end) {
|
|
PRUint32 fragLength = PRUint32(start.size_forward());
|
|
PRUint32 bufLength = fragLength * sizeof(PRUnichar);
|
|
const PRUnichar* expatBuffer = start.get();
|
|
|
|
mState->bufferStart = expatBuffer;
|
|
mState->bufferEnd = expatBuffer + fragLength;
|
|
mState->currentIterator = start;
|
|
result = ParseXMLBuffer((const char *)expatBuffer, bufLength);
|
|
if (NS_FAILED(result)) return result;
|
|
|
|
start.advance(fragLength);
|
|
}
|
|
|
|
aScanner.SetPosition(end, PR_TRUE);
|
|
|
|
if(NS_OK==result)
|
|
result=aScanner.Eof();
|
|
|
|
mState->scanner = nsnull;
|
|
mState->bufferStart = mState->bufferEnd = nsnull;
|
|
|
|
return result;
|
|
}
|
|
|
|
nsresult nsExpatTokenizer::DidTokenize(PRBool aIsFinalChunk)
|
|
{
|
|
return ParseXMLBuffer(nsnull, 0, aIsFinalChunk);
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess12/29/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void nsExpatTokenizer::FrontloadMisplacedContent(nsDeque& aDeque){
|
|
}
|
|
|
|
/***************************************/
|
|
/* Expat Callback Functions start here */
|
|
/***************************************/
|
|
|
|
void Tokenizer_HandleStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
CToken* theToken = state->tokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown, nsLiteralString((PRUnichar*)name));
|
|
if(theToken) {
|
|
// If an ID attribute exists for this element, set it on the start token
|
|
PRInt32 index = XML_GetIdAttributeIndex(state->parser);
|
|
if (index >= 0) {
|
|
nsCOMPtr<nsIAtom> attributeAtom = dont_AddRef(NS_NewAtom((const PRUnichar *) atts[index]));
|
|
CStartToken* startToken = NS_STATIC_CAST(CStartToken*, theToken);
|
|
startToken->SetIDAttributeAtom(attributeAtom);
|
|
}
|
|
|
|
nsExpatTokenizer::AddToken(theToken, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
|
|
// For each attribute on this element, create and add attribute tokens to the token queue
|
|
int theAttrCount=0;
|
|
while(*atts){
|
|
theAttrCount++;
|
|
CAttributeToken* theAttrToken = (CAttributeToken*)
|
|
state->tokenAllocator->CreateTokenOfType(eToken_attribute, eHTMLTag_unknown, nsLiteralString((PRUnichar*)atts[1]));
|
|
if(theAttrToken){
|
|
PRUnichar* ptr = (PRUnichar*)atts[0];
|
|
if ((ptr >= state->bufferStart) && (ptr < state->bufferEnd)) {
|
|
PRUint32 len = nsCRT::strlen(ptr);
|
|
nsReadingIterator<PRUnichar> start, end;
|
|
start = state->currentIterator;
|
|
start.advance(ptr - state->bufferStart);
|
|
end = start;
|
|
end.advance(len);
|
|
theAttrToken->BindKey(state->scanner, start, end);
|
|
}
|
|
else {
|
|
theAttrToken->SetKey(nsLiteralString(ptr));
|
|
}
|
|
}
|
|
CToken* theTok=(CToken*)theAttrToken;
|
|
nsExpatTokenizer::AddToken(theTok, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
atts += 2;
|
|
}
|
|
theToken->SetAttributeCount(theAttrCount);
|
|
}
|
|
else{
|
|
//THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
|
|
}
|
|
}
|
|
|
|
void Tokenizer_HandleEndElement(void *userData, const XML_Char *name) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
CToken* theToken = state->tokenAllocator->CreateTokenOfType(eToken_end,eHTMLTag_unknown, nsLiteralString((PRUnichar *) name));
|
|
if(theToken) {
|
|
nsExpatTokenizer::AddToken(theToken, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
}
|
|
else{
|
|
//THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
|
|
}
|
|
}
|
|
|
|
void Tokenizer_HandleCharacterData(void *userData, const XML_Char *s, int len) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
|
|
if (state->incdata) {
|
|
// While we're in a CDATASection, keep appending all strings
|
|
// from expat into it.
|
|
state->cdataText.Append((PRUnichar *) s,len);
|
|
} else {
|
|
CToken* newToken = 0;
|
|
|
|
switch(((PRUnichar*)s)[0]){
|
|
case kNewLine:
|
|
case nsCRT::CR:
|
|
newToken = state->tokenAllocator->CreateTokenOfType(eToken_newline,eHTMLTag_unknown);
|
|
break;
|
|
case kSpace:
|
|
case kTab:
|
|
newToken = state->tokenAllocator->CreateTokenOfType(eToken_whitespace,eHTMLTag_unknown, nsLocalString((PRUnichar*)s, len));
|
|
break;
|
|
default:
|
|
{
|
|
CTextToken* textToken = (CTextToken*)state->tokenAllocator->CreateTokenOfType(eToken_text, eHTMLTag_unknown);
|
|
PRUnichar* ptr = (PRUnichar*)s;
|
|
if ((ptr >= state->bufferStart) && (ptr < state->bufferEnd)) {
|
|
nsReadingIterator<PRUnichar> start, end;
|
|
start = state->currentIterator;
|
|
start.advance(ptr - state->bufferStart);
|
|
end = start;
|
|
end.advance(len);
|
|
textToken->Bind(state->scanner, start, end);
|
|
}
|
|
else {
|
|
textToken->Bind(nsLocalString(ptr, len));
|
|
}
|
|
newToken = textToken;
|
|
}
|
|
}
|
|
|
|
if(newToken) {
|
|
nsExpatTokenizer::AddToken(newToken, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
}
|
|
else {
|
|
//THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
|
|
}
|
|
}
|
|
}
|
|
|
|
void Tokenizer_HandleComment(void *userData, const XML_Char *name) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
if (state->indoctype) {
|
|
// We do not want comments popping out of the doctype...
|
|
state->doctypeText.Append(NS_LITERAL_STRING("<!--"));
|
|
state->doctypeText.Append((PRUnichar*)name);
|
|
state->doctypeText.Append(NS_LITERAL_STRING("-->"));
|
|
} else {
|
|
CToken* theToken = state->tokenAllocator->CreateTokenOfType(eToken_comment, eHTMLTag_unknown, nsLiteralString((PRUnichar*)name));
|
|
if(theToken) {
|
|
nsExpatTokenizer::AddToken(theToken, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
}
|
|
else{
|
|
//THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
|
|
}
|
|
}
|
|
}
|
|
|
|
void Tokenizer_HandleStartCdataSection(void *userData) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
|
|
state->incdata = PR_TRUE;
|
|
}
|
|
|
|
void Tokenizer_HandleEndCdataSection(void *userData) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
CToken* cdataToken = state->tokenAllocator->CreateTokenOfType(eToken_cdatasection,
|
|
eHTMLTag_unknown,
|
|
state->cdataText);
|
|
|
|
// We've reached the end of the current CDATA section. Push the current
|
|
// CDATA token onto the token queue
|
|
nsExpatTokenizer::AddToken(cdataToken, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
|
|
state->incdata = PR_FALSE;
|
|
state->cdataText.Truncate();
|
|
}
|
|
|
|
void Tokenizer_HandleProcessingInstruction(void *userData,
|
|
const XML_Char *target,
|
|
const XML_Char *data)
|
|
{
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
nsAutoString theString;
|
|
theString. AppendWithConversion("<?");
|
|
theString.Append((PRUnichar *) target);
|
|
if(data) {
|
|
theString.AppendWithConversion(" ");
|
|
theString.Append((PRUnichar *) data);
|
|
}
|
|
theString.AppendWithConversion("?>");
|
|
|
|
CToken* theToken = state->tokenAllocator->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown, theString);
|
|
if(theToken) {
|
|
nsExpatTokenizer::AddToken(theToken, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
}
|
|
else{
|
|
//THROW A HUGE ERROR IF WE CANT CREATE A TOKEN!
|
|
}
|
|
}
|
|
|
|
void Tokenizer_HandleDefault(void *userData, const XML_Char *s, int len) {
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
if (state->indoctype) {
|
|
state->doctypeText.Append((PRUnichar*)s, len);
|
|
}
|
|
else {
|
|
nsAutoString str((PRUnichar *)s, len);
|
|
PRInt32 offset = -1;
|
|
CToken* newLine = 0;
|
|
|
|
while ((offset = str.FindChar('\n', PR_FALSE, offset + 1)) != -1) {
|
|
newLine = state->tokenAllocator->CreateTokenOfType(eToken_newline, eHTMLTag_unknown);
|
|
nsExpatTokenizer::AddToken(newLine, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
}
|
|
}
|
|
}
|
|
|
|
void Tokenizer_HandleUnparsedEntityDecl(void *userData,
|
|
const XML_Char *entityName,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId,
|
|
const XML_Char *notationName) {
|
|
NS_NOTYETIMPLEMENTED("Error: Tokenizer_HandleUnparsedEntityDecl() not yet implemented.");
|
|
}
|
|
|
|
|
|
// aDTD is an in/out parameter. Returns true if the aDTD is a chrome url or if the
|
|
// filename contained within the url exists in the special DTD directory ("dtd"
|
|
// relative to the current process directory). For the latter case, aDTD is set
|
|
// to the file: url that points to the DTD file found in the local DTD directory.
|
|
static PRBool
|
|
IsLoadableDTD(nsCOMPtr<nsIURI>* aDTD)
|
|
{
|
|
PRBool isLoadable = PR_FALSE;
|
|
nsresult res = NS_OK;
|
|
|
|
if (!aDTD || !*aDTD) {
|
|
NS_ASSERTION(0, "Null parameter.");
|
|
return PR_FALSE;
|
|
}
|
|
|
|
// Return true if the url is a chrome url
|
|
res = (*aDTD)->SchemeIs("chrome", &isLoadable);
|
|
|
|
// If the url is not a chrome url, check to see if a DTD file of the same name
|
|
// exists in the special DTD directory
|
|
if (!isLoadable) {
|
|
nsCOMPtr<nsIURL> dtdURL;
|
|
dtdURL = do_QueryInterface(*aDTD, &res);
|
|
if (NS_SUCCEEDED(res)) {
|
|
char* fileName = nsnull;
|
|
res = dtdURL->GetFileName(&fileName);
|
|
if (NS_SUCCEEDED(res) && nsnull != fileName) {
|
|
nsSpecialSystemDirectory dtdPath(nsSpecialSystemDirectory::OS_CurrentProcessDirectory);
|
|
nsString path; path.AssignWithConversion(kDTDDirectory);
|
|
path.AppendWithConversion(fileName);
|
|
dtdPath += path;
|
|
if (dtdPath.Exists()) {
|
|
// The DTD was found in the local DTD directory.
|
|
// Set aDTD to a file: url pointing to the local DTD
|
|
nsFileURL dtdFile(dtdPath);
|
|
nsCOMPtr<nsIURI> dtdURI;
|
|
res = NS_NewURI(getter_AddRefs(dtdURI), dtdFile.GetURLString());
|
|
if (NS_SUCCEEDED(res) && nsnull != dtdURI) {
|
|
*aDTD = dtdURI;
|
|
isLoadable = PR_TRUE;
|
|
}
|
|
}
|
|
nsCRT::free(fileName);
|
|
}
|
|
}
|
|
}
|
|
|
|
return isLoadable;
|
|
}
|
|
|
|
nsresult
|
|
nsExpatTokenizer::OpenInputStream(const nsString& aURLStr,
|
|
const nsString& aBaseURL,
|
|
nsIInputStream** in,
|
|
nsString* aAbsURL)
|
|
{
|
|
nsresult rv;
|
|
nsCOMPtr<nsIURI> baseURI;
|
|
rv = NS_NewURI(getter_AddRefs(baseURI), aBaseURL);
|
|
if (NS_SUCCEEDED(rv) && nsnull != baseURI) {
|
|
nsCOMPtr<nsIURI> uri;
|
|
rv = NS_NewURI(getter_AddRefs(uri), aURLStr, baseURI);
|
|
if (NS_SUCCEEDED(rv) && uri) {
|
|
if (IsLoadableDTD(address_of(uri))) {
|
|
rv = NS_OpenURI(in, uri);
|
|
char* absURL = nsnull;
|
|
uri->GetSpec(&absURL);
|
|
aAbsURL->AppendWithConversion(absURL);
|
|
nsCRT::free(absURL);
|
|
}
|
|
else {
|
|
rv = NS_ERROR_NOT_IMPLEMENTED;
|
|
}
|
|
}
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
nsresult nsExpatTokenizer::LoadStream(nsIInputStream* in,
|
|
PRUnichar*& uniBuf,
|
|
PRUint32& retLen)
|
|
{
|
|
// read it
|
|
PRUint32 aCount = 1024,
|
|
bufsize = aCount*sizeof(PRUnichar);
|
|
nsIUnicharInputStream *uniIn = nsnull;
|
|
nsAutoString utf8; utf8.AssignWithConversion("UTF-8");
|
|
|
|
nsresult res = NS_NewConverterStream(&uniIn,
|
|
nsnull,
|
|
in,
|
|
aCount,
|
|
&utf8);
|
|
if (NS_FAILED(res)) return res;
|
|
|
|
PRUint32 aReadCount = 0;
|
|
PRUnichar *aBuf = (PRUnichar *) PR_Malloc(bufsize);
|
|
|
|
while (NS_OK == (res=uniIn->Read(aBuf, retLen, aCount, &aReadCount))
|
|
&& aReadCount != 0) {
|
|
retLen += aReadCount;
|
|
#if 1
|
|
bufsize += aCount * sizeof(PRUnichar);
|
|
aBuf = (PRUnichar *) PR_Realloc(aBuf, bufsize);
|
|
#else
|
|
if (((aReadCount+32) >= aCount) &&
|
|
((retLen+aCount) * sizeof(PRUnichar) >= bufsize)) {
|
|
|
|
bufsize += aCount * sizeof(PRUnichar);
|
|
uniBuf = (PRUnichar *) PR_Realloc(uniBuf, bufsize*sizeof(PRUnichar));
|
|
}
|
|
#endif
|
|
}/* while */
|
|
uniBuf = (PRUnichar *) PR_Malloc(retLen*sizeof(PRUnichar));
|
|
nsCRT::memcpy(uniBuf, aBuf, sizeof(PRUnichar) * retLen);
|
|
PR_FREEIF(aBuf);
|
|
NS_RELEASE(uniIn);
|
|
|
|
return res;
|
|
}
|
|
|
|
void Tokenizer_HandleNotationDecl(void *userData,
|
|
const XML_Char *notationName,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId){
|
|
NS_NOTYETIMPLEMENTED("Error: Tokenizer_HandleNotationDecl() not yet implemented.");
|
|
}
|
|
|
|
int Tokenizer_HandleExternalEntityRef(XML_Parser parser,
|
|
const XML_Char *openEntityNames,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId)
|
|
{
|
|
int result = PR_TRUE;
|
|
|
|
#ifdef XML_DTD
|
|
// Load the external entity into a buffer
|
|
nsCOMPtr<nsIInputStream> in = nsnull;
|
|
nsAutoString urlSpec( (const PRUnichar*) systemId );
|
|
nsAutoString baseURL( (const PRUnichar*) base );
|
|
nsAutoString absURL;
|
|
|
|
nsresult rv = nsExpatTokenizer::OpenInputStream(urlSpec, baseURL, getter_AddRefs(in), &absURL);
|
|
|
|
if (NS_SUCCEEDED(rv) && nsnull != in) {
|
|
PRUint32 retLen = 0;
|
|
PRUnichar *uniBuf = nsnull;
|
|
rv = nsExpatTokenizer::LoadStream(in, uniBuf, retLen);
|
|
|
|
// Pass the buffer to expat for parsing
|
|
if (NS_SUCCEEDED(rv) && nsnull != uniBuf) {
|
|
// Create a parser for parsing the external entity
|
|
nsAutoString encoding; encoding.AssignWithConversion("UTF-16");
|
|
XML_Parser entParser = nsnull;
|
|
|
|
entParser = XML_ExternalEntityParserCreate(parser, 0,
|
|
(const XML_Char*) encoding.GetUnicode());
|
|
|
|
if (nsnull != entParser) {
|
|
XML_SetBase(entParser, (const XML_Char*) absURL.GetUnicode());
|
|
result = XML_Parse(entParser, (char *)uniBuf, retLen * sizeof(PRUnichar), 1);
|
|
XML_ParserFree(entParser);
|
|
}
|
|
|
|
PR_FREEIF(uniBuf);
|
|
}
|
|
}
|
|
#else /* ! XML_DTD */
|
|
|
|
NS_NOTYETIMPLEMENTED("Error: Tokenizer_HandleExternalEntityRef() not yet implemented.");
|
|
|
|
#endif /* XML_DTD */
|
|
|
|
return result;
|
|
}
|
|
|
|
int Tokenizer_HandleUnknownEncoding(void *encodingHandlerData,
|
|
const XML_Char *name,
|
|
XML_Encoding *info) {
|
|
NS_NOTYETIMPLEMENTED("Error: Tokenizer_HandleUnknownEncoding() not yet implemented.");
|
|
int result=0;
|
|
return result;
|
|
}
|
|
|
|
void Tokenizer_HandleStartDoctypeDecl(void *userData,
|
|
const XML_Char *doctypeName)
|
|
{
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
state->indoctype = PR_TRUE;
|
|
state->doctypeText.Assign(NS_LITERAL_STRING("<!DOCTYPE "));
|
|
}
|
|
|
|
void Tokenizer_HandleEndDoctypeDecl(void *userData)
|
|
{
|
|
XMLParserState* state = (XMLParserState*) userData;
|
|
|
|
state->doctypeText.AppendWithConversion(">");
|
|
CToken* token = state->tokenAllocator->CreateTokenOfType(eToken_doctypeDecl, eHTMLTag_unknown, state->doctypeText);
|
|
if (token) {
|
|
nsExpatTokenizer::AddToken(token, NS_OK, state->tokenDeque, state->tokenAllocator);
|
|
}
|
|
state->indoctype = PR_FALSE;
|
|
state->doctypeText.Truncate();
|
|
// Do nothing
|
|
}
|