260 lines
7.6 KiB
C++
260 lines
7.6 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
|
|
|
|
/**
|
|
* MODULE NOTES:
|
|
* @update gess 4/1/98
|
|
*
|
|
*/
|
|
|
|
#include "nsXMLTokenizer.h"
|
|
#include "nsParser.h"
|
|
#include "nsScanner.h"
|
|
#include "nsDTDUtils.h"
|
|
#include "nsParser.h"
|
|
|
|
/************************************************************************
|
|
And now for the main class -- nsXMLTokenizer...
|
|
************************************************************************/
|
|
|
|
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
|
static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID);
|
|
static NS_DEFINE_IID(kHTMLTokenizerIID, NS_HTMLTOKENIZER_IID);
|
|
static NS_DEFINE_IID(kClassIID, NS_XMLTOKENIZER_IID);
|
|
|
|
/**
|
|
* This method gets called as part of our COM-like interfaces.
|
|
* Its purpose is to create an interface to parser object
|
|
* of some type.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIID id of object to discover
|
|
* @param aInstancePtr ptr to newly discovered interface
|
|
* @return NS_xxx result code
|
|
*/
|
|
nsresult nsXMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|
{
|
|
if (NULL == aInstancePtr) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
|
|
if(aIID.Equals(kISupportsIID)) { //do IUnknown...
|
|
*aInstancePtr = (nsXMLTokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(kITokenizerIID)) { //do ITOkenizer base class...
|
|
*aInstancePtr = (nsITokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(kHTMLTokenizerIID)) { //do nsHTMLTokenizer base class...
|
|
*aInstancePtr = (nsHTMLTokenizer*)(this);
|
|
}
|
|
else if(aIID.Equals(kClassIID)) { //do this class...
|
|
*aInstancePtr = (nsXMLTokenizer*)(this);
|
|
}
|
|
else {
|
|
*aInstancePtr=0;
|
|
return NS_NOINTERFACE;
|
|
}
|
|
NS_ADDREF_THIS();
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This method is defined in nsIParser. It is used to
|
|
* cause the COM-like construction of an nsParser.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIParser** ptr to newly instantiated parser
|
|
* @return NS_xxx error result
|
|
*/
|
|
NS_HTMLPARS nsresult NS_NewXMLTokenizer(nsITokenizer** aInstancePtrResult){
|
|
nsXMLTokenizer* it = new nsXMLTokenizer();
|
|
|
|
if (it == 0) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
return it->QueryInterface(kClassIID, (void **) aInstancePtrResult);
|
|
}
|
|
|
|
|
|
NS_IMPL_ADDREF(nsXMLTokenizer)
|
|
NS_IMPL_RELEASE(nsXMLTokenizer)
|
|
|
|
|
|
/**
|
|
* Default constructor
|
|
*
|
|
* @update gess 4/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsXMLTokenizer::nsXMLTokenizer() : nsHTMLTokenizer() {
|
|
NS_INIT_REFCNT();
|
|
mDoXMLEmptyTags=PR_TRUE;
|
|
mDocType=eXMLText;
|
|
}
|
|
|
|
/**
|
|
* Default constructor
|
|
*
|
|
* @update gess 4/9/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsXMLTokenizer::~nsXMLTokenizer(){
|
|
}
|
|
|
|
|
|
/*******************************************************************
|
|
Here begins the real working methods for the tokenizer.
|
|
*******************************************************************/
|
|
|
|
/**
|
|
* This method repeatedly called by the tokenizer.
|
|
* Each time, we determine the kind of token were about to
|
|
* read, and then we call the appropriate method to handle
|
|
* that token type.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsXMLTokenizer::ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens) {
|
|
return nsHTMLTokenizer::ConsumeToken(aScanner,aFlushTokens);
|
|
}
|
|
|
|
|
|
nsTokenAllocator* nsXMLTokenizer::GetTokenAllocator(void) {
|
|
return nsHTMLTokenizer::GetTokenAllocator();
|
|
}
|
|
|
|
/*
|
|
* Consume characters as long as they match the string passed in.
|
|
* If they don't match, put them all back.
|
|
* XXX The scanner should be able to do this.
|
|
*
|
|
* @update vidur 11/12/98
|
|
*/
|
|
static
|
|
nsresult ConsumeConditional(nsScanner& aScanner,const nsString& aMatchString,PRBool& aMatch) {
|
|
nsresult result=NS_OK;
|
|
nsAutoString str;
|
|
PRUint32 len = aMatchString.Length();
|
|
|
|
result = aScanner.Peek(str, len);
|
|
if ((NS_OK == result) && str.Equals(aMatchString)) {
|
|
aMatch = PR_TRUE;
|
|
nsReadingIterator<PRUnichar> curPos;
|
|
aScanner.CurrentPosition(curPos);
|
|
curPos.advance(len);
|
|
aScanner.SetPosition(curPos);
|
|
}
|
|
else {
|
|
aMatch = PR_FALSE;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "<!" has been consumed
|
|
* and we know we're at the start of a comment.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsXMLTokenizer::ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){
|
|
nsresult result=NS_OK;
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
|
|
if(theAllocator) {
|
|
nsAutoString theEmpty;
|
|
aToken=theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment,theEmpty);
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,eDTDMode_strict);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* This method is called just after a "<!" has been consumed.
|
|
* NOTE: Here we might consume CDATA and "special" comments.
|
|
*
|
|
*
|
|
* @update harishd 09/02/99
|
|
* @param aChar: last char read
|
|
* @param aScanner: see nsScanner.h
|
|
* @param anErrorCode: arg that will hold error condition
|
|
* @return new token or null
|
|
*/
|
|
nsresult nsXMLTokenizer::ConsumeSpecialMarkup(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){
|
|
nsresult result=NS_OK;
|
|
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
|
|
|
|
if(theAllocator) {
|
|
PRUnichar theChar;
|
|
aScanner.Peek(theChar);
|
|
PRBool isComment=PR_TRUE;
|
|
nsAutoString theEmpty;
|
|
if(theChar==kLeftSquareBracket) {
|
|
nsAutoString CDATAString; CDATAString.AssignWithConversion("[CDATA[");
|
|
PRBool isCDATA = PR_FALSE;
|
|
result = ConsumeConditional(aScanner, CDATAString, isCDATA);
|
|
if (NS_OK == result) {
|
|
if (isCDATA) {
|
|
aToken=theAllocator->CreateTokenOfType(eToken_cdatasection,eHTMLTag_unknown,theEmpty);
|
|
isComment=PR_FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(isComment) aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment,theEmpty);
|
|
|
|
if(aToken) {
|
|
result=aToken->Consume(aChar,aScanner,eDTDMode_strict);
|
|
AddToken(aToken,result,&mTokenDeque,theAllocator);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess12/28/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult nsXMLTokenizer::HandleSkippedContent(nsScanner& aScanner,CToken*& aToken) {
|
|
nsresult result=NS_OK;
|
|
return result;
|
|
}
|