/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ #include "nsHTMLParser.h" #include "nsHTMLDelegate.h" #include "nsHTMLContentSink.h" #include "nsTokenizer.h" #include "nsHTMLTokens.h" #include "nsString.h" #include "nsIURL.h" #include "nsDefaultTokenHandler.h" #include "nsCRT.h" #include "nsHTMLDTD.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kClassIID, NS_IHTML_PARSER_IID); static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID); static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTMLCONTENTSINK_IID); /**------------------------------------------------------- * This method is defined in nsIParser. It is used to * cause the COM-like construction of an nsHTMLParser. * * @update gess 3/25/98 * @param nsIParser** ptr to newly instantiated parser * @return NS_xxx error result *------------------------------------------------------*/ NS_HTMLPARS nsresult NS_NewHTMLParser(nsIParser** aInstancePtrResult) { nsHTMLParser *it = new nsHTMLParser(); if (it == 0) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); } /**------------------------------------------------------- * init the set of default token handlers... * * @update gess 3/25/98 * @param * @return *------------------------------------------------------*/ void nsHTMLParser::InitializeDefaultTokenHandlers() { AddTokenHandler(new CStartTokenHandler()); AddTokenHandler(new CEndTokenHandler()); AddTokenHandler(new CCommentTokenHandler()); AddTokenHandler(new CEntityTokenHandler()); AddTokenHandler(new CWhitespaceTokenHandler()); AddTokenHandler(new CNewlineTokenHandler()); AddTokenHandler(new CTextTokenHandler()); AddTokenHandler(new CAttributeTokenHandler()); AddTokenHandler(new CScriptTokenHandler()); AddTokenHandler(new CStyleTokenHandler()); } /**------------------------------------------------------- * default constructor * * @update gess 3/25/98 * @param * @return *------------------------------------------------------*/ nsHTMLParser::nsHTMLParser() { NS_INIT_REFCNT(); mSink=0; mTokenHandlerCount=0; mTagStackPos=0; mCurrentPos=0; nsCRT::zero(mTagStack,sizeof(mTagStack)); nsCRT::zero(mTokenHandlers,sizeof(mTokenHandlers)); mDTD=new nsHTMLDTD(); InitializeDefaultTokenHandlers(); } /**------------------------------------------------------- * Default destructor * * @update gess 3/25/98 * @param * @return *------------------------------------------------------*/ nsHTMLParser::~nsHTMLParser() { if(mCurrentPos) delete mCurrentPos; mCurrentPos=0; if(mDTD) delete mDTD; mDTD=0; NS_IF_RELEASE(mSink); } NS_IMPL_ADDREF(nsHTMLParser) NS_IMPL_RELEASE(nsHTMLParser) //NS_IMPL_ISUPPORTS(nsHTMLParser,NS_IHTML_PARSER_IID) /**------------------------------------------------------- * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 3/25/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code *------------------------------------------------------*/ nsresult nsHTMLParser::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { //do IUnknown... *aInstancePtr = (nsIParser*)(this); } else if(aIID.Equals(kIParserIID)) { //do IParser base class... *aInstancePtr = (nsIParser*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsHTMLParser*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } ((nsISupports*) *aInstancePtr)->AddRef(); return NS_OK; } /**------------------------------------------------------- * * * @update gess 4/2/98 * @param * @return *------------------------------------------------------*/ eHTMLTags nsHTMLParser::NodeAt(PRInt32 aPos) const { NS_PRECONDITION(0 <= aPos, "bad nodeAt"); if((aPos>-1) && (aPos=0;i--){ if(mTagStack[i]==aTag) return i; } return kNotFound; } /**------------------------------------------------------- * Determine whether the given tag is open anywhere * in our context stack. * * @update gess 4/2/98 * @param eHTMLTags tag to be searched for in stack * @return topmost index of tag on stack *------------------------------------------------------*/ PRBool nsHTMLParser::IsOpen(eHTMLTags aTag) const { PRInt32 pos=GetTopmostIndex(aTag); return PRBool(kNotFound!=pos); } /**------------------------------------------------------- * Gets the number of open containers on the stack. * * @update gess 4/2/98 * @param * @return *------------------------------------------------------*/ PRInt32 nsHTMLParser::GetStackPos() const { return mTagStackPos; } /**------------------------------------------------------- * Finds a tag handler for the given tag type, given in string. * * @update gess 4/2/98 * @param aString contains name of tag to be handled * @return valid tag handler (if found) or null *------------------------------------------------------*/ CDefaultTokenHandler* nsHTMLParser::GetTokenHandler(const nsString& aString) const{ eHTMLTokenTypes theType=DetermineTokenType(aString); return GetTokenHandler(theType); } /**------------------------------------------------------- * Finds a tag handler for the given tag type. * * @update gess 4/2/98 * @param aTagType type of tag to be handled * @return valid tag handler (if found) or null *------------------------------------------------------*/ CDefaultTokenHandler* nsHTMLParser::GetTokenHandler(eHTMLTokenTypes aType) const { for(int i=0;iCanHandle(aType)) { return mTokenHandlers[i]; } } return 0; } /**------------------------------------------------------- * Register a handler. * * @update gess 4/2/98 * @param * @return *------------------------------------------------------*/ CDefaultTokenHandler* nsHTMLParser::AddTokenHandler(CDefaultTokenHandler* aHandler) { NS_ASSERTION(0!=aHandler,"Error: Null handler argument"); if(aHandler) { } return 0; } /**------------------------------------------------------- * This method gets called in order to set the content * sink for this parser to dump nodes to. * * @update gess 3/25/98 * @param nsIContentSink interface for node receiver * @return *------------------------------------------------------*/ void nsHTMLParser::SetContentSink(nsIContentSink* aSink) { NS_PRECONDITION(0!=aSink,"sink cannot be null!"); if(aSink) { nsIHTMLContentSink* htmlSink; if (NS_OK == aSink->QueryInterface(kIHTMLContentSinkIID, (void**)&htmlSink)) { if ((nsHTMLContentSink*)(htmlSink) != mSink) { NS_IF_RELEASE(mSink); mSink = (nsHTMLContentSink*)(htmlSink); } else { NS_RELEASE(htmlSink); } } } } /**------------------------------------------------------- * This is the main controlling routine in the parsing process. * Note that it may get called multiple times for the same scanner, * since this is a pushed based system, and all the tokens may * not have been consumed by the scanner during a given invocation * of this method. * * @update gess 3/25/98 * @param aFilename -- const char* containing file to be parsed. * @return PR_TRUE if parse succeeded, PR_FALSE otherwise. *------------------------------------------------------*/ PRBool nsHTMLParser::Parse(nsIURL* aURL){ NS_PRECONDITION(0!=aURL,"Error: URL cannot be null!"); PRBool result=PR_FALSE; if(aURL) { result=PR_TRUE; CHTMLTokenizerDelegate delegate; mTokenizer=new CTokenizer(aURL, delegate); mTokenizer->Tokenize(); //#define VERBOSE_DEBUG #ifdef VERBOSE_DEBUG mTokenizer->DebugDumpTokens(cout); #endif CDeque& deque=mTokenizer->GetDeque(); CDequeIterator e=deque.End(); if(mCurrentPos) delete mCurrentPos; //don't leak, now! mCurrentPos=new CDequeIterator(deque.Begin()); CToken* theToken; PRBool done=PR_FALSE; PRInt32 iteration=0; while((!done) && (result)) { theToken=*mCurrentPos; eHTMLTokenTypes type=eHTMLTokenTypes(theToken->GetTokenType()); iteration++; //debug purposes... switch(eHTMLTokenTypes(type)){ case eToken_start: result=HandleStartToken(theToken); break; case eToken_end: result=HandleEndToken(theToken); break; case eToken_entity: result=HandleEntityToken(theToken); break; case eToken_text: result=HandleTextToken(theToken); break; case eToken_newline: result=HandleNewlineToken(theToken); break; case eToken_skippedcontent: //used in cases like