/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * The Original Code is Mozilla Communicator client code. * * The Initial Developer of the Original Code is Netscape Communications * Corporation. Portions created by Netscape are Copyright (C) 1998 * Netscape Communications Corporation. All Rights Reserved. */ /** * MODULE NOTES: * @update gess 4/1/98 * * This file declares the concrete HTMLContentSink class. * This class is used during the parsing process as the * primary interface between the parser and the content * model. */ #include "nsHTMLContentSinkStream.h" #include "nsHTMLTokens.h" #include #include "nsString.h" #include "nsParserTypes.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID); static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); static char* gHeaderComment = ""; static char* gDocTypeHeader = ""; const int gTabSize=2; static char gBuffer[500]; /** PRETTY PRINTING PROTOTYPES **/ PRBool IsInline(eHTMLTags aTag); PRBool IsBlockLevel(eHTMLTags aTag); PRInt32 BreakBeforeOpen(eHTMLTags aTag); PRInt32 BreakAfterOpen(eHTMLTags aTag); PRInt32 BreakBeforeClose(eHTMLTags aTag); PRInt32 BreakAfterClose(eHTMLTags aTag); PRBool IndentChildren(eHTMLTags aTag); PRBool PreformattedChildren(eHTMLTags aTag); PRBool EatOpen(eHTMLTags aTag); PRBool EatClose(eHTMLTags aTag); PRBool PermitWSBeforeOpen(eHTMLTags aTag); PRBool PermitWSAfterOpen(eHTMLTags aTag); PRBool PermitWSBeforeClose(eHTMLTags aTag); PRBool PermitWSAfterClose(eHTMLTags aTag); PRBool IgnoreWS(eHTMLTags aTag); /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gess 4/8/98 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult nsHTMLContentSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(kISupportsIID)) { *aInstancePtr = (nsIContentSink*)(this); } else if(aIID.Equals(kIContentSinkIID)) { *aInstancePtr = (nsIContentSink*)(this); } else if(aIID.Equals(kIHTMLContentSinkIID)) { *aInstancePtr = (nsIHTMLContentSink*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } NS_IMPL_ADDREF(nsHTMLContentSinkStream) NS_IMPL_RELEASE(nsHTMLContentSinkStream) /** * This method is defined in nsIParser. It is used to * cause the COM-like construction of an nsParser. * * @update gess 4/8/98 * @param nsIParser** ptr to newly instantiated parser * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, PRBool aDoFormat, PRBool aDoHeader) { nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(aDoFormat,aDoHeader); if (nsnull == it) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); } /** * Construct a content sink stream. * @update gess7/7/98 * @param * @return */ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHeader) { mOutput=&cout; mLowerCaseTags = PR_TRUE; memset(mHTMLTagStack,0,sizeof(mHTMLTagStack)); mHTMLStackPos = 0; mColPos = 0; mIndent = 0; mDoFormat = aDoFormat; mDoHeader = aDoHeader; } /** * Construct a content sink stream. * @update gess7/7/98 * @param * @return */ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoFormat,PRBool aDoHeader) { mOutput = &aStream; mLowerCaseTags = PR_TRUE; memset(mHTMLTagStack,0,sizeof(mHTMLTagStack)); mHTMLStackPos = 0; mColPos = 0; mIndent = 0; mDoFormat = aDoFormat; mDoHeader = aDoHeader; } /** * * @update gess7/7/98 * @param * @return */ nsHTMLContentSinkStream::~nsHTMLContentSinkStream() { mOutput=0; //we don't own the stream we're given; just forget it. } /** * * @update gess7/22/98 * @param * @return */ NS_IMETHODIMP_(void) nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){ mOutput=&aStream; } /** * * @update gess7/7/98 * @param * @return */ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream& aStream) { int theCount=aNode.GetAttributeCount(); if(theCount) { int i=0; for(i=0;iflush(); } return NS_OK; } /** * This method is used to open the only HEAD container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenHead(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_head) AddStartTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to close the only HEAD container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseHead(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_head) AddEndTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to open the main BODY container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenBody(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_body) AddStartTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to close the main BODY container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseBody(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_body) AddEndTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to open a new FORM container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenForm(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_form) AddStartTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to close the outer FORM container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseForm(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_form) AddEndTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to open a new FORM container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenMap(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_map) AddStartTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to close the outer FORM container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseMap(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_map) AddEndTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to open the FRAMESET container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenFrameset(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_frameset) AddStartTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to close the FRAMESET container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseFrameset(const nsIParserNode& aNode){ if(mOutput) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); if (tag == eHTMLTag_frameset) AddEndTag(aNode,*mOutput); } return NS_OK; } void nsHTMLContentSinkStream::AddIndent(ostream& aStream) { for (PRInt32 i = mIndent; --i >= 0; ) { aStream << " "; mColPos += 2; } } void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& aStream) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); const nsString& name = aNode.GetText(); nsString tagName; mHTMLTagStack[mHTMLStackPos++] = tag; tagName = name; if (mLowerCaseTags == PR_TRUE) tagName.ToLowerCase(); else tagName.ToUpperCase(); if (mColPos != 0 && BreakBeforeOpen(tag)) { aStream << endl; mColPos = 0; } if (PermitWSBeforeOpen(tag)) AddIndent(aStream); tagName.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << (char)kLessThan << gBuffer; mColPos += 1 + tagName.Length(); if (tag == eHTMLTag_style) { aStream << (char)kGreaterThan << endl; const nsString& data = aNode.GetSkippedContent(); PRInt32 size = data.Length(); char* buffer = new char[size+1]; data.ToCString(buffer,size+1); aStream << buffer; delete buffer; } else { WriteAttributes(aNode,aStream); aStream << (char)kGreaterThan; mColPos += 1; } if (BreakAfterOpen(tag)) { aStream << endl; mColPos = 0; } if (IndentChildren(tag)) mIndent++; } void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aStream) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); // const nsString& name = aNode.GetText(); nsString tagName; if (tag == eHTMLTag_unknown) { tagName = aNode.GetText(); } else { const char* name = NS_EnumToTag(tag); tagName = name; } if (mLowerCaseTags == PR_TRUE) tagName.ToLowerCase(); else tagName.ToUpperCase(); if (IndentChildren(tag)) mIndent--; if (BreakBeforeClose(tag)) { if (mColPos != 0) { aStream << endl; mColPos = 0; } AddIndent(aStream); } tagName.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << (char)kLessThan << (char)kForwardSlash << gBuffer << (char)kGreaterThan; mColPos += 1 + 1 + strlen(gBuffer) + 1; if (BreakAfterClose(tag)) { aStream << endl; mColPos = 0; } mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown; } /** * This gets called by the parser when you want to add * a leaf node to the current container in the content * model. * * @updated gpk 06/18/98 * @param * @return */ nsresult nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); eHTMLTags tag = eHTMLTag_unknown; if (mHTMLStackPos > 0) tag = mHTMLTagStack[mHTMLStackPos-1]; PRBool preformatted = PR_FALSE; for (PRInt32 i = mHTMLStackPos-1; i >= 0; i--) { preformatted |= PreformattedChildren(mHTMLTagStack[i]); if (preformatted) break; } if (type == eHTMLTag_br || type == eHTMLTag_hr || type == eHTMLTag_meta || type == eHTMLTag_style) { AddStartTag(aNode,aStream); mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown; } if (type == eHTMLTag_text) { const nsString& text = aNode.GetText(); if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE) { text.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << gBuffer; mColPos += text.Length(); } else { PRInt32 mMaxColumn = 72; // 1. Determine the length of the input string PRInt32 length = text.Length(); // 2. If the offset plus the length of the text is smaller // than the max then just add it if (mColPos + length < mMaxColumn) { text.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << gBuffer; mColPos += text.Length(); } else { nsString str = text; PRBool done = PR_FALSE; PRInt32 index = 0; PRInt32 offset = mColPos; while (!done) { // find the next break PRInt32 start = mMaxColumn-offset; if (start < 0) start = 0; index = str.Find(' ',start); // if there is no break than just add it if (index == kNotFound) { str.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << gBuffer; mColPos += str.Length(); done = PR_TRUE; } else { // make first equal to the str from the // beginning to the index nsString first = str; first.Truncate(index); first.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << gBuffer << endl; mColPos = 0; // cut the string from the beginning to the index str.Cut(0,index); offset = 0; } } } } } else if (type == eHTMLTag_whitespace) { if ((mDoFormat == PR_FALSE) || preformatted || IgnoreWS(tag) == PR_FALSE) { const nsString& text = aNode.GetText(); text.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << gBuffer; mColPos += text.Length(); } } else if (type == eHTMLTag_newline) { if ((mDoFormat == PR_FALSE) || preformatted) { const nsString& text = aNode.GetText(); text.ToCString(gBuffer,sizeof(gBuffer)-1); aStream << gBuffer; mColPos = 0; } } return NS_OK; } /** * This gets called by the parser when you want to add * a PI node to the current container in the content * model. * * @updated gess 3/25/98 * @param * @return */ NS_IMETHODIMP nsHTMLContentSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){ #ifdef VERBOSE_DEBUG DebugDump("<",aNode.GetText(),(mNodeStackPos)*2); #endif return NS_OK; } /** * This gets called by the parser when you want to add * a comment node to the current container in the content * model. * * @updated gess 3/25/98 * @param * @return */ NS_IMETHODIMP nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){ #ifdef VERBOSE_DEBUG DebugDump("<",aNode.GetText(),(mNodeStackPos)*2); #endif return NS_OK; } /** * This method is used to a general container. * This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){ if(mOutput) { AddStartTag(aNode,*mOutput); // eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); } return NS_OK; } /** * This method is used to close a generic container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseContainer(const nsIParserNode& aNode){ if(mOutput) { AddEndTag(aNode,*mOutput); } return NS_OK; } /** * This method is used to add a leaf to the currently * open container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode){ nsresult result = NS_OK; if(mOutput) { result = AddLeaf(aNode,*mOutput); } return result; } /** * This method gets called when the parser begins the process * of building the content model via the content sink. * * @update 5/7/98 gess */ NS_IMETHODIMP nsHTMLContentSinkStream::WillBuildModel(void){ mTabLevel=-1; if(mDoHeader && mOutput) { (*mOutput) << gHeaderComment << endl; (*mOutput) << gDocTypeHeader << endl; } return NS_OK; } /** * This method gets called when the parser concludes the process * of building the content model via the content sink. * * @param aQualityLevel describes how well formed the doc was. * 0=GOOD; 1=FAIR; 2=POOR; * @update 5/7/98 gess */ NS_IMETHODIMP nsHTMLContentSinkStream::DidBuildModel(PRInt32 aQualityLevel) { return NS_OK; } /** * This method gets called when the parser gets i/o blocked, * and wants to notify the sink that it may be a while before * more data is available. * * @update 5/7/98 gess */ NS_IMETHODIMP nsHTMLContentSinkStream::WillInterrupt(void) { return NS_OK; } /** * This method gets called when the parser i/o gets unblocked, * and we're about to start dumping content again to the sink. * * @update 5/7/98 gess */ NS_IMETHODIMP nsHTMLContentSinkStream::WillResume(void) { return NS_OK; } NS_IMETHODIMP nsHTMLContentSinkStream::NotifyError(nsresult aErrorResult) { return NS_OK; } /** * **** Pretty Printing Methods ****** * */ PRBool IsInline(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_a: case eHTMLTag_address: case eHTMLTag_big: case eHTMLTag_blink: case eHTMLTag_b: case eHTMLTag_br: case eHTMLTag_cite: case eHTMLTag_code: case eHTMLTag_dfn: case eHTMLTag_em: case eHTMLTag_font: case eHTMLTag_img: case eHTMLTag_i: case eHTMLTag_kbd: case eHTMLTag_keygen: case eHTMLTag_nobr: case eHTMLTag_samp: case eHTMLTag_small: case eHTMLTag_spacer: case eHTMLTag_span: case eHTMLTag_strike: case eHTMLTag_strong: case eHTMLTag_sub: case eHTMLTag_sup: case eHTMLTag_td: case eHTMLTag_textarea: case eHTMLTag_tt: case eHTMLTag_var: case eHTMLTag_wbr: result = PR_TRUE; break; default: break; } return result; } PRBool IsBlockLevel(eHTMLTags aTag) { return !IsInline(aTag); } /** * Desired line break state before the open tag. */ PRBool BreakBeforeOpen(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_html: result = PR_FALSE; break; default: result = IsBlockLevel(aTag); } return result; } /** * Desired line break state after the open tag. */ PRBool BreakAfterOpen(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_html: case eHTMLTag_body: case eHTMLTag_ul: case eHTMLTag_ol: case eHTMLTag_table: case eHTMLTag_tbody: case eHTMLTag_style: result = PR_TRUE; break; default: break; } return result; } /** * Desired line break state before the close tag. */ PRBool BreakBeforeClose(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_html: case eHTMLTag_head: case eHTMLTag_body: case eHTMLTag_ul: case eHTMLTag_ol: case eHTMLTag_table: case eHTMLTag_tbody: case eHTMLTag_style: result = PR_TRUE; break; default: break; } return result; } /** * Desired line break state after the close tag. */ PRBool BreakAfterClose(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_html: result = PR_TRUE; break; default: result = IsBlockLevel(aTag); } return result; } /** * Indent/outdent when the open/close tags are encountered. * This implies that BreakAfterOpen() and BreakBeforeClose() * are true no matter what those methods return. */ PRBool IndentChildren(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_table: case eHTMLTag_ul: case eHTMLTag_ol: case eHTMLTag_tbody: case eHTMLTag_form: case eHTMLTag_frameset: result = PR_TRUE; break; default: result = PR_FALSE; break; } return result; } /** * All tags after this tag and before the closing tag will be output with no * formatting. */ PRBool PreformattedChildren(eHTMLTags aTag) { PRBool result = PR_FALSE; if (aTag == eHTMLTag_pre) { result = PR_TRUE; } return result; } /** * Eat the open tag. Pretty much just for . */ PRBool EatOpen(eHTMLTags aTag) { return PR_FALSE; } /** * Eat the close tag. Pretty much just for

. */ PRBool EatClose(eHTMLTags aTag) { return PR_FALSE; } /** * Are we allowed to insert new white space before the open tag. * * Returning false does not prevent inserting WS * before the tag if WS insertion is allowed for another reason, * e.g. there is already WS there or we are after a tag that * has PermitWSAfter*(). */ PRBool PermitWSBeforeOpen(eHTMLTags aTag) { PRBool result = IsInline(aTag) == PR_FALSE; return result; } /** @see PermitWSBeforeOpen */ PRBool PermitWSAfterOpen(eHTMLTags aTag) { if (aTag == eHTMLTag_pre) { return PR_FALSE; } return PR_TRUE; } /** @see PermitWSBeforeOpen */ PRBool PermitWSBeforeClose(eHTMLTags aTag) { if (aTag == eHTMLTag_pre) { return PR_FALSE; } return PR_TRUE; } /** @see PermitWSBeforeOpen */ PRBool PermitWSAfterClose(eHTMLTags aTag) { return PR_TRUE; } /** @see PermitWSBeforeOpen */ PRBool IgnoreWS(eHTMLTags aTag) { PRBool result = PR_FALSE; switch (aTag) { case eHTMLTag_html: case eHTMLTag_head: case eHTMLTag_body: case eHTMLTag_ul: case eHTMLTag_ol: case eHTMLTag_li: case eHTMLTag_table: case eHTMLTag_tbody: case eHTMLTag_style: result = PR_TRUE; break; default: break; } return result; }