/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Mozilla Communicator client code. * * The Initial Developer of the Original Code is Netscape Communications * Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): * Greg Kostello (original structure) * Akkana Peck * Daniel Bratell * Ben Bucksch * Pierre Phaneuf * Markus Kuhn * */ /** * MODULE NOTES: * * This file declares the concrete TXT ContentSink class. * This class is used during the parsing process as the * primary interface between the parser and the content * model. */ #include "nsHTMLToTXTSinkStream.h" #include "nsHTMLTokens.h" #include "nsString.h" #include "nsIParser.h" #include "nsHTMLEntities.h" #include "nsXIFDTD.h" #include "prprf.h" // For PR_snprintf() #include "nsIDocumentEncoder.h" // for output flags #include "nsIUnicodeEncoder.h" #include "nsICharsetAlias.h" #include "nsIServiceManager.h" #include "nsICharsetConverterManager.h" #include "nsILineBreakerFactory.h" #include "nsLWBrkCIID.h" #include "nsIOutputStream.h" #include "nsFileStream.h" #include "nsIPref.h" static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID); static NS_DEFINE_CID(kPrefServiceCID, NS_PREF_CID); #define PREF_STRUCTS "converter.html2txt.structs" #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" const PRInt32 gTabSize=4; const PRInt32 gOLNumberWidth = 3; const PRInt32 gIndentSizeHeaders = 2; /* Indention of h1, if mHeaderStrategy = 1 or = 2. Indention of other headers is derived from that. XXX center h1? */ const PRInt32 gIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, indent h(x+1) this many columns more than h(x) */ const PRInt32 gIndentSizeList = (gTabSize > gOLNumberWidth+3) ? gTabSize: gOLNumberWidth+3; // Indention of non-first lines of ul and ol const PRInt32 gIndentSizeDD = gTabSize; // Indention of
static PRBool IsInline(eHTMLTags aTag); static PRBool IsBlockLevel(eHTMLTags aTag); static PRInt32 HeaderLevel(eHTMLTags aTag); static PRInt32 unicharwidth(PRUnichar ucs); static PRInt32 unicharwidth(const PRUnichar* pwcs, PRInt32 n); /** * Inits the encoder instance variable for the sink based on the charset * * @update gpk 4/21/99 * @param aCharset * @return NS_xxx error result */ nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset) { nsresult res = NS_OK; // If the converter is ucs2, then do not use a converter if (aCharset.EqualsWithConversion("ucs2")) { NS_IF_RELEASE(mUnicodeEncoder); return res; } nsICharsetAlias* calias = nsnull; res = nsServiceManager::GetService(kCharsetAliasCID, kICharsetAliasIID, (nsISupports**)&calias); NS_ASSERTION( nsnull != calias, "cannot find charset alias"); nsAutoString charsetName;charsetName.Assign(aCharset); if( NS_SUCCEEDED(res) && (nsnull != calias)) { res = calias->GetPreferred(aCharset, charsetName); nsServiceManager::ReleaseService(kCharsetAliasCID, calias); if(NS_FAILED(res)) { // failed - unknown alias , fallback to ISO-8859-1 charsetName.AssignWithConversion("ISO-8859-1"); } nsICharsetConverterManager * ccm = nsnull; res = nsServiceManager::GetService(kCharsetConverterManagerCID, NS_GET_IID(nsICharsetConverterManager), (nsISupports**)&ccm); if(NS_SUCCEEDED(res) && (nsnull != ccm)) { nsIUnicodeEncoder * encoder = nsnull; res = ccm->GetUnicodeEncoder(&charsetName, &encoder); if(NS_SUCCEEDED(res) && (nsnull != encoder)) { NS_IF_RELEASE(mUnicodeEncoder); mUnicodeEncoder = encoder; } nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm); } } return res; } /** * This method gets called as part of our COM-like interfaces. * Its purpose is to create an interface to parser object * of some type. * * @update gpk02/03/99 * @param nsIID id of object to discover * @param aInstancePtr ptr to newly discovered interface * @return NS_xxx result code */ nsresult nsHTMLToTXTSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr) { if (NULL == aInstancePtr) { return NS_ERROR_NULL_POINTER; } if(aIID.Equals(NS_GET_IID(nsISupports))) { *aInstancePtr = (nsIContentSink*)(this); } else if(aIID.Equals(NS_GET_IID(nsIContentSink))) { *aInstancePtr = (nsIContentSink*)(this); } else if(aIID.Equals(NS_GET_IID(nsIHTMLContentSink))) { *aInstancePtr = (nsIHTMLContentSink*)(this); } else if(aIID.Equals(NS_GET_IID(nsIHTMLToTXTSinkStream))) { *aInstancePtr = (nsIHTMLToTXTSinkStream*)(this); } else { *aInstancePtr=0; return NS_NOINTERFACE; } NS_ADDREF_THIS(); return NS_OK; } NS_IMPL_ADDREF(nsHTMLToTXTSinkStream) NS_IMPL_RELEASE(nsHTMLToTXTSinkStream) // Someday may want to make this non-const: static const PRUint32 TagStackSize = 500; static const PRUint32 OLStackSize = 100; /** * Construct a content sink stream. * @update gpk02/03/99 * @param * @return */ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() { NS_INIT_REFCNT(); mColPos = 0; mIndent = 0; mCiteQuoteLevel = 0; mDoFragment = PR_FALSE; mBufferSize = 0; mBufferLength = 0; mBuffer = nsnull; mUnicodeEncoder = nsnull; mStructs = PR_TRUE; // will be read from prefs later mHeaderStrategy = 1 /*indent increasingly*/; // ditto for (PRInt32 i = 0; i <= 6; i++) mHeaderCounter[i] = 0; // Line breaker mLineBreaker = nsnull; mWrapColumn = 72; // XXX magic number, we expect someone to reset this mCurrentLineWidth = 0; // Flow mEmptyLines=1; // The start of the document is an "empty line" in itself, mInWhitespace = PR_TRUE; mPreFormatted = PR_FALSE; mCacheLine = PR_FALSE; mStartedOutput = PR_FALSE; // initialize the tag stack to zero: mTagStack = new nsHTMLTag[TagStackSize]; mTagStackIndex = 0; // initialize the OL stack, where numbers for ordered lists are kept: mOLStack = new PRInt32[OLStackSize]; mOLStackIndex = 0; } /** * * @update gpk02/03/99 * @param * @return */ nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() { if (mCurrentLine.Length() > 0) FlushLine(); // We have some left over text in current line. flush it out. // This means we didn't have a body or html node -- probably a text control. if(mBuffer) delete[] mBuffer; delete[] mTagStack; delete[] mOLStack; NS_IF_RELEASE(mUnicodeEncoder); NS_IF_RELEASE(mLineBreaker); } /** * * @update gpk04/30/99 * @param * @return */ NS_IMETHODIMP nsHTMLToTXTSinkStream::Initialize(nsIOutputStream* aOutStream, nsString* aOutString, PRUint32 aFlags) { mStream = aOutStream; mString = aOutString; mFlags = aFlags; nsILineBreakerFactory *lf; nsresult result = NS_OK; result = nsServiceManager::GetService(kLWBrkCID, NS_GET_IID(nsILineBreakerFactory), (nsISupports **)&lf); if (NS_SUCCEEDED(result)) { nsAutoString lbarg; result = lf->GetBreaker(lbarg, &mLineBreaker); if(NS_FAILED(result)) { mLineBreaker = nsnull; } result = nsServiceManager::ReleaseService(kLWBrkCID, lf); } // Turn on caching if we are wrapping or we want formatting. // We need this even when flags indicate preformatted, // in order to wrap textareas with wrap=hard. if((mFlags & nsIDocumentEncoder::OutputFormatted) || (mFlags & nsIDocumentEncoder::OutputWrap)) { mCacheLine = PR_TRUE; } // Set the line break character: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) // Windows/mail mLineBreak.AssignWithConversion("\r\n"); else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) // Mac mLineBreak.AssignWithConversion("\r"); else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) // Unix/DOM mLineBreak.AssignWithConversion("\n"); else mLineBreak.AssignWithConversion(NS_LINEBREAK); // Platform/default // Get some prefs nsresult rv; NS_WITH_SERVICE(nsIPref, prefs, NS_PREF_PROGID, &rv); if (NS_SUCCEEDED(rv) && prefs) { rv = prefs->GetBoolPref(PREF_STRUCTS, &mStructs); rv = prefs->GetIntPref(PREF_HEADER_STRATEGY, &mHeaderStrategy); } return result; } NS_IMETHODIMP nsHTMLToTXTSinkStream::SetCharsetOverride(const nsString* aCharset) { if (aCharset) { mCharsetOverride = *aCharset; InitEncoder(mCharsetOverride); } return NS_OK; } /** * This method gets called by the parser when it encounters * a title tag and wants to set the document title in the sink. * * @update gpk02/03/99 * @param nsString reference to new title value * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::SetTitle(const nsString& aValue) { return NS_OK; } /** * All these HTML-specific methods may be called, or may not, * depending on whether the parser is parsing XIF or HTML. * So we can't depend on them; instead, we have Open/CloseContainer * do all the specialized work, and the html-specific Open/Close * methods must call the more general methods. * Since there are so many of them, make a macro: */ #define USE_GENERAL_OPEN_METHOD(opentag) \ NS_IMETHODIMP \ nsHTMLToTXTSinkStream::opentag(const nsIParserNode& aNode) \ { return OpenContainer(aNode); } #define USE_GENERAL_CLOSE_METHOD(closetag) \ NS_IMETHODIMP \ nsHTMLToTXTSinkStream::closetag(const nsIParserNode& aNode) \ { return CloseContainer(aNode); } USE_GENERAL_OPEN_METHOD(OpenHTML) USE_GENERAL_CLOSE_METHOD(CloseHTML) USE_GENERAL_OPEN_METHOD(OpenHead) USE_GENERAL_CLOSE_METHOD(CloseHead) USE_GENERAL_OPEN_METHOD(OpenBody) USE_GENERAL_CLOSE_METHOD(CloseBody) USE_GENERAL_OPEN_METHOD(OpenForm) USE_GENERAL_CLOSE_METHOD(CloseForm) USE_GENERAL_OPEN_METHOD(OpenMap) USE_GENERAL_CLOSE_METHOD(CloseMap) USE_GENERAL_OPEN_METHOD(OpenFrameset) USE_GENERAL_CLOSE_METHOD(CloseFrameset) NS_IMETHODIMP nsHTMLToTXTSinkStream::DoFragment(PRBool aFlag) { mDoFragment = aFlag; return NS_OK; } /** * This gets called when handling illegal contents, especially * in dealing with tables. This method creates a new context. * * @update 04/04/99 harishd * @param aPosition - The position from where the new context begins. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::BeginContext(PRInt32 aPosition) { return NS_OK; } /** * This method terminates any new context that got created by * BeginContext and switches back to the main context. * * @update 04/04/99 harishd * @param aPosition - Validates the end of a context. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::EndContext(PRInt32 aPosition) { return NS_OK; } /** * This gets called by the parser when you want to add * a PI node to the current container in the content * model. * * @updated gpk02/03/99 * @param * @return */ NS_IMETHODIMP nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){ return NS_OK; } /** * This gets called by the parser when it encounters * a DOCTYPE declaration in the HTML document. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMode) { return NS_OK; } /** * This gets called by the parser when you want to add * a comment node to the current container in the content * model. * * @updated gpk02/03/99 * @param * @return */ NS_IMETHODIMP nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode) { // Skip comments in plaintext output return NS_OK; } NS_IMETHODIMP nsHTMLToTXTSinkStream::GetValueOfAttribute(const nsIParserNode& aNode, char* aMatchKey, nsString& aValueRet) { nsAutoString matchKey; matchKey.AssignWithConversion(aMatchKey); PRInt32 count=aNode.GetAttributeCount(); for (PRInt32 i=0;iHTML converter. * In this case, we should ignore it. */ PRBool nsHTMLToTXTSinkStream::IsConverted(const nsIParserNode& aNode) { nsAutoString value; nsresult rv = GetValueOfAttribute(aNode, "class", value); return ( NS_SUCCEEDED(rv) && ( value.EqualsWithConversion("txt", PR_TRUE, 3) || value.EqualsWithConversion("\"txt", PR_TRUE, 4) ) ); } PRBool nsHTMLToTXTSinkStream::DoOutput() { PRBool inBody = PR_FALSE; // Loop over the tag stack and see if we're inside a body, // and not inside a markup_declaration for (PRUint32 i = 0; i < mTagStackIndex; ++i) { if (mTagStack[i] == eHTMLTag_markupDecl || mTagStack[i] == eHTMLTag_comment) return PR_FALSE; if (mTagStack[i] == eHTMLTag_body) inBody = PR_TRUE; } return mDoFragment || inBody; } /** * This method is used to a general container. * This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode) { eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); const nsString& name = aNode.GetText(); if (name.EqualsWithConversion("document_info")) { nsString value; if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "charset", value))) { if (mCharsetOverride.Length() == 0) InitEncoder(value); else InitEncoder(mCharsetOverride); } return NS_OK; } if (mTagStackIndex < TagStackSize) mTagStack[mTagStackIndex++] = type; if (type == eHTMLTag_body) { // body -> can turn on cacheing unless it's already preformatted if(!(mFlags & nsIDocumentEncoder::OutputPreformatted) && ((mFlags & nsIDocumentEncoder::OutputFormatted) || (mFlags & nsIDocumentEncoder::OutputWrap))) { mCacheLine = PR_TRUE; } // Try to figure out here whether we have a // preformatted style attribute. // // Trigger on the presence of a "-moz-pre-wrap" in the // style attribute. That's a very simplistic way to do // it, but better than nothing. // Also set mWrapColumn to the value given there // (which arguably we should only do if told to do so). nsString style; PRInt32 whitespace; if(NS_SUCCEEDED(GetValueOfAttribute(aNode, "style", style)) && (-1 != (whitespace = style.Find("white-space:")))) { if (-1 != style.Find("-moz-pre-wrap", PR_TRUE, whitespace)) { #ifdef DEBUG_preformatted printf("Set mPreFormatted based on style moz-pre-wrap\n"); #endif mPreFormatted = PR_TRUE; mCacheLine = PR_TRUE; PRInt32 widthOffset = style.Find("width:"); if (widthOffset >= 0) { // We have to search for the ch before the semicolon, // not for the semicolon itself, because nsString::ToInteger() // considers 'c' to be a valid numeric char (even if radix=10) // but then gets confused if it sees it next to the number // when the radix specified was 10, and returns an error code. PRInt32 semiOffset = style.Find("ch", widthOffset+6); PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6 : style.Length() - widthOffset); nsString widthstr; style.Mid(widthstr, widthOffset+6, length); PRInt32 err; PRInt32 col = widthstr.ToInteger(&err); if (NS_SUCCEEDED(err)) { SetWrapColumn((PRUint32)col); #ifdef DEBUG_preformatted printf("Set wrap column to %d based on style\n", mWrapColumn); #endif } } } else if (-1 != style.Find("pre", PR_TRUE, whitespace)) { #ifdef DEBUG_preformatted printf("Set mPreFormatted based on style pre\n"); #endif mPreFormatted = PR_TRUE; mCacheLine = PR_TRUE; SetWrapColumn(0); } } else { mPreFormatted = PR_FALSE; mCacheLine = PR_TRUE; // Cache lines unless something else tells us not to } return NS_OK; } if (!DoOutput()) return NS_OK; if (type == eHTMLTag_p || type == eHTMLTag_pre) EnsureVerticalSpace(1); // Should this be 0 in unformatted case? // Else make sure we'll separate block level tags, // even if we're about to leave, before doing any other formatting. else if (IsBlockLevel(type)) EnsureVerticalSpace(0); // The rest of this routine is formatted output stuff, // which we should skip if we're not formatted: if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) return NS_OK; if (type == eHTMLTag_h1 || type == eHTMLTag_h2 || type == eHTMLTag_h3 || type == eHTMLTag_h4 || type == eHTMLTag_h5 || type == eHTMLTag_h6) { EnsureVerticalSpace(2); if (mHeaderStrategy == 2) // numbered { mIndent += gIndentSizeHeaders; // Caching nsCAutoString leadup; PRInt32 level = HeaderLevel(type); // Increase counter for current level mHeaderCounter[level]++; // Reset all lower levels PRInt32 i; for (i = level + 1; i <= 6; i++) mHeaderCounter[i] = 0; // Construct numbers for (i = 1; i <= level; i++) { leadup.AppendInt(mHeaderCounter[i]); leadup += "."; } leadup += " "; Write(NS_ConvertASCIItoUCS2(leadup.GetBuffer())); } else if (mHeaderStrategy == 1) // indent increasingly { mIndent += gIndentSizeHeaders; for (PRInt32 i = HeaderLevel(type); i > 1; i--) // for h(x), run x-1 times mIndent += gIndentIncrementHeaders; } } else if (type == eHTMLTag_ul) { // Indent here to support nested list, which aren't included in li :-( EnsureVerticalSpace(1); // Must end the current line before we change indent. mIndent += gIndentSizeList; } else if (type == eHTMLTag_ol) { EnsureVerticalSpace(1); // Must end the current line before we change indent. if (mOLStackIndex < OLStackSize) mOLStack[mOLStackIndex++] = 1; // XXX should get it from the node! mIndent += gIndentSizeList; // see ul } else if (type == eHTMLTag_li) { if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_ol) { if (mOLStackIndex > 0) // This is what nsBulletFrame does for OLs: mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); else mInIndentString.AppendWithConversion("#"); mInIndentString.AppendWithConversion('.'); } else mInIndentString.AppendWithConversion('*'); mInIndentString.AppendWithConversion(' '); } else if (type == eHTMLTag_dl) EnsureVerticalSpace(1); else if (type == eHTMLTag_dd) mIndent += gIndentSizeDD; else if (type == eHTMLTag_td || type == eHTMLTag_th) { // We must make sure that the content of two table cells get a // space between them. // Fow now, I will only add a SPACE. Could be a TAB or something // else but I'm not sure everything can handle the TAB so SPACE // seems like a better solution. if(!mInWhitespace) { // Maybe add something else? Several spaces? A TAB? SPACE+TAB? if(mCacheLine) { AddToLine(NS_ConvertToString(" ").GetUnicode(), 1); } else { nsAutoString space(NS_ConvertToString(" ")); WriteSimple(space); } mInWhitespace = PR_TRUE; } } else if (type == eHTMLTag_blockquote) { EnsureVerticalSpace(0); // Find out whether it's a type=cite, and insert "> " instead. // Eventually we should get the value of the pref controlling citations, // and handle AOL-style citations as well. // If we want to support RFC 2646 (and we do!) we have to have: // >>>> text // >>> fdfd // when a mail is sent. nsString value; nsresult rv = GetValueOfAttribute(aNode, "type", value); if ( NS_SUCCEEDED(rv) ) value.StripChars("\""); if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE)) mCiteQuoteLevel++; else mIndent += gTabSize; // Check for some maximum value? } else if (type == eHTMLTag_img) { /* Output (in decreasing order of preference) alt, title or src (URI) attribute */ // See nsAutoString desc, temp; if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "alt", desc))) { if (!desc.IsEmpty()) { temp.AppendWithConversion(" ["); // Should we output chars at all here? desc.StripChars("\""); temp += desc; temp.AppendWithConversion(" ]"); } // If the alt attribute has an empty value (|alt=""|), output nothing } else if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "title", desc)) && !desc.IsEmpty()) { temp.AppendWithConversion(" ["); desc.StripChars("\""); temp += desc; temp.AppendWithConversion("] "); } else if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "src", desc)) && !desc.IsEmpty()) { temp.AppendWithConversion(" <"); desc.StripChars("\""); temp += desc; temp.AppendWithConversion("> "); } if (!temp.IsEmpty()) Write(temp); } else if (type == eHTMLTag_a && !IsConverted(aNode)) { nsAutoString url; if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "href", url)) && !url.IsEmpty()) { url.StripChars("\""); mURL = url; } } else if (type == eHTMLTag_q) Write(NS_ConvertASCIItoUCS2("\"")); else if (type == eHTMLTag_sup && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("^")); else if (type == eHTMLTag_sub && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("_")); else if (type == eHTMLTag_code && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("|")); else if ((type == eHTMLTag_strong || type == eHTMLTag_b) && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("*")); else if ((type == eHTMLTag_em || type == eHTMLTag_i) && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("/")); else if (type == eHTMLTag_u && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("_")); return NS_OK; } /** * This method is used to close a generic container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode) { eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); if (mTagStackIndex > 0) --mTagStackIndex; // End current line if we're ending a block level tag if (IsBlockLevel(type)) { if((type == eHTMLTag_body) || (type == eHTMLTag_html)) { // We want the output to end with a new line, // but in preformatted areas like text fields, // we can't emit newlines that weren't there. // So add the newline only in the case of formatted output. if (mFlags & nsIDocumentEncoder::OutputFormatted) EnsureVerticalSpace(0); else FlushLine(); } else if ((type == eHTMLTag_tr) || (type == eHTMLTag_li) || (type == eHTMLTag_dt) || (type == eHTMLTag_dd) || (type == eHTMLTag_pre) || (type == eHTMLTag_blockquote)) { EnsureVerticalSpace(0); } else if (type != eHTMLTag_script) { // All other blocks get 1 vertical space after them // in formatted mode, otherwise 0. // This is hard. Sometimes 0 is a better number, but // how to know? EnsureVerticalSpace((mFlags & nsIDocumentEncoder::OutputFormatted) ? 1 : 0); } } // The rest of this routine is formatted output stuff, // which we should skip if we're not formatted: if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) return NS_OK; if (type == eHTMLTag_h1 || type == eHTMLTag_h2 || type == eHTMLTag_h3 || type == eHTMLTag_h4 || type == eHTMLTag_h5 || type == eHTMLTag_h6) { if (mHeaderStrategy /*numbered or indent increasingly*/ ) mIndent -= gIndentSizeHeaders; if (mHeaderStrategy == 1 /*indent increasingly*/ ) { for (PRInt32 i = HeaderLevel(type); i > 1; i--) // for h(x), run x-1 times mIndent -= gIndentIncrementHeaders; } EnsureVerticalSpace(1); } else if (type == eHTMLTag_ul) { mIndent -= gIndentSizeList; } else if (type == eHTMLTag_ol) { FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. --mOLStackIndex; mIndent -= gIndentSizeList; } else if (type == eHTMLTag_dd) { mIndent -= gIndentSizeDD; } else if (type == eHTMLTag_blockquote) { FlushLine(); nsString value; nsresult rv = GetValueOfAttribute(aNode, "type", value); if ( NS_SUCCEEDED(rv) ) value.StripChars("\""); if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE)) mCiteQuoteLevel--; else mIndent -= gTabSize; } else if (type == eHTMLTag_a && !IsConverted(aNode) && !mURL.IsEmpty()) { nsAutoString temp; temp.AssignWithConversion(" <"); temp += mURL; temp.AppendWithConversion(">"); Write(temp); mURL.Truncate(); } else if (type == eHTMLTag_q) Write(NS_ConvertASCIItoUCS2("\"")); else if ((type == eHTMLTag_sup || type == eHTMLTag_sub) && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2(" ")); else if (type == eHTMLTag_code && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("|")); else if ((type == eHTMLTag_strong || type == eHTMLTag_b) && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("*")); else if ((type == eHTMLTag_em || type == eHTMLTag_i) && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("/")); else if (type == eHTMLTag_u && mStructs && !IsConverted(aNode)) Write(NS_ConvertASCIItoUCS2("_")); return NS_OK; } /** * This method is used to add a leaf to the currently * open container. * * @update 07/12/98 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode) { // If we don't want any output, just return if (!DoOutput()) return NS_OK; eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); nsString text = aNode.GetText(); if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_select) { // Don't output the contents of SELECT elements; // Might be nice, eventually, to output just the selected element. return NS_OK; } else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) { // Don't output the contents of