diff --git a/mozilla/htmlparser/src/nsHTMLToTXTSinkStream.cpp b/mozilla/htmlparser/src/nsHTMLToTXTSinkStream.cpp deleted file mode 100644 index a99f2c7e3c7..00000000000 --- a/mozilla/htmlparser/src/nsHTMLToTXTSinkStream.cpp +++ /dev/null @@ -1,1798 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * - * The contents of this file are subject to the Netscape Public - * License Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.mozilla.org/NPL/ - * - * Software distributed under the License is distributed on an "AS - * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or - * implied. See the License for the specific language governing - * rights and limitations under the License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is Netscape Communications - * Corporation. Portions created by Netscape are - * Copyright (C) 1998 Netscape Communications Corporation. All - * Rights Reserved. - * - * Contributor(s): - * Greg Kostello (original structure) - * Akkana Peck - * Daniel Bratell - * Ben Bucksch - * Pierre Phaneuf - * Markus Kuhn - */ - -/** - * MODULE NOTES: - * - * This file declares the concrete TXT ContentSink class. - * This class is used during the parsing process as the - * primary interface between the parser and the content - * model. - */ - -#include "nsHTMLToTXTSinkStream.h" -#include "nsHTMLTokens.h" -#include "nsString.h" -#include "nsIParser.h" -#include "nsHTMLEntities.h" -#include "nsXIFDTD.h" -#include "prprf.h" // For PR_snprintf() -#include "nsIDocumentEncoder.h" // for output flags -#include "nsIUnicodeEncoder.h" -#include "nsICharsetAlias.h" -#include "nsIServiceManager.h" -#include "nsICharsetConverterManager.h" -#include "nsILineBreakerFactory.h" -#include "nsLWBrkCIID.h" -#include "nsIOutputStream.h" -#include "nsFileStream.h" -#include "nsIPref.h" - -static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); -static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID); -static NS_DEFINE_CID(kPrefServiceCID, NS_PREF_CID); -static NS_DEFINE_IID(kCParserIID, NS_IPARSER_IID); -static NS_DEFINE_IID(kCParserCID, NS_PARSER_IID); - -#define PREF_STRUCTS "converter.html2txt.structs" -#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" -const PRInt32 gTabSize=4; -const PRInt32 gOLNumberWidth = 3; -const PRInt32 gIndentSizeHeaders = 2; /* Indention of h1, if - mHeaderStrategy = 1 or = 2. - Indention of other headers - is derived from that. - XXX center h1? */ -const PRInt32 gIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, - indent h(x+1) this many - columns more than h(x) */ -const PRInt32 gIndentSizeList = (gTabSize > gOLNumberWidth+3) ? gTabSize: gOLNumberWidth+3; - // Indention of non-first lines of ul and ol -const PRInt32 gIndentSizeDD = gTabSize; // Indention of
- -static PRInt32 HeaderLevel(eHTMLTags aTag); -static PRInt32 unicharwidth(PRUnichar ucs); -static PRInt32 unicharwidth(const PRUnichar* pwcs, PRInt32 n); - -/** - * Inits the encoder instance variable for the sink based on the charset - * - * @update gpk 4/21/99 - * @param aCharset - * @return NS_xxx error result - */ -nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset) -{ - nsresult res = NS_OK; - - // If the converter is ucs2, then do not use a converter - if (aCharset.EqualsWithConversion("ucs2")) - { - NS_IF_RELEASE(mUnicodeEncoder); - return res; - } - - nsICharsetAlias* calias = nsnull; - res = nsServiceManager::GetService(kCharsetAliasCID, - kICharsetAliasIID, - (nsISupports**)&calias); - - NS_ASSERTION( nsnull != calias, "cannot find charset alias"); - nsAutoString charsetName;charsetName.Assign(aCharset); - if( NS_SUCCEEDED(res) && (nsnull != calias)) - { - res = calias->GetPreferred(aCharset, charsetName); - nsServiceManager::ReleaseService(kCharsetAliasCID, calias); - - if(NS_FAILED(res)) - { - // failed - unknown alias , fallback to ISO-8859-1 - charsetName.AssignWithConversion("ISO-8859-1"); - } - - nsICharsetConverterManager * ccm = nsnull; - res = nsServiceManager::GetService(kCharsetConverterManagerCID, - NS_GET_IID(nsICharsetConverterManager), - (nsISupports**)&ccm); - if(NS_SUCCEEDED(res) && (nsnull != ccm)) - { - nsIUnicodeEncoder * encoder = nsnull; - res = ccm->GetUnicodeEncoder(&charsetName, &encoder); - if(NS_SUCCEEDED(res) && (nsnull != encoder)) - { - NS_IF_RELEASE(mUnicodeEncoder); - mUnicodeEncoder = encoder; - } - nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm); - } - } - return res; -} - -/** - * This method gets called as part of our COM-like interfaces. - * Its purpose is to create an interface to parser object - * of some type. - * - * @update gpk02/03/99 - * @param nsIID id of object to discover - * @param aInstancePtr ptr to newly discovered interface - * @return NS_xxx result code - */ -nsresult -nsHTMLToTXTSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - if(aIID.Equals(NS_GET_IID(nsISupports))) { - *aInstancePtr = (nsIContentSink*)(this); - } - else if(aIID.Equals(NS_GET_IID(nsIContentSink))) { - *aInstancePtr = (nsIContentSink*)(this); - } - else if(aIID.Equals(NS_GET_IID(nsIHTMLContentSink))) { - *aInstancePtr = (nsIHTMLContentSink*)(this); - } - else if(aIID.Equals(NS_GET_IID(nsIHTMLToTXTSinkStream))) { - *aInstancePtr = (nsIHTMLToTXTSinkStream*)(this); - } - else { - *aInstancePtr=0; - return NS_NOINTERFACE; - } - NS_ADDREF_THIS(); - return NS_OK; -} - -NS_IMPL_ADDREF(nsHTMLToTXTSinkStream) -NS_IMPL_RELEASE(nsHTMLToTXTSinkStream) - -// Someday may want to make this non-const: -static const PRUint32 TagStackSize = 500; -static const PRUint32 OLStackSize = 100; - -/** - * Construct a content sink stream. - * @update gpk02/03/99 - * @param - * @return - */ -nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() -{ - NS_INIT_REFCNT(); - mDTD = 0; - mColPos = 0; - mIndent = 0; - mCiteQuoteLevel = 0; - mDoFragment = PR_FALSE; - mBufferSize = 0; - mBufferLength = 0; - mBuffer = nsnull; - mUnicodeEncoder = nsnull; - mStructs = PR_TRUE; // will be read from prefs later - mHeaderStrategy = 1 /*indent increasingly*/; // ditto - for (PRInt32 i = 0; i <= 6; i++) - mHeaderCounter[i] = 0; - - // Line breaker - mLineBreaker = nsnull; - mWrapColumn = 72; // XXX magic number, we expect someone to reset this - mCurrentLineWidth = 0; - - // Flow - mEmptyLines=1; // The start of the document is an "empty line" in itself, - mInWhitespace = PR_TRUE; - mPreFormatted = PR_FALSE; - mCacheLine = PR_FALSE; - mStartedOutput = PR_FALSE; - - // initialize the tag stack to zero: - mTagStack = new nsHTMLTag[TagStackSize]; - mTagStackIndex = 0; - - // initialize the OL stack, where numbers for ordered lists are kept: - mOLStack = new PRInt32[OLStackSize]; - mOLStackIndex = 0; -} - -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() -{ - if (mCurrentLine.Length() > 0) - FlushLine(); // We have some left over text in current line. flush it out. - // This means we didn't have a body or html node -- probably a text control. - - if(mBuffer) - delete[] mBuffer; - delete[] mTagStack; - delete[] mOLStack; - NS_IF_RELEASE(mDTD); - NS_IF_RELEASE(mUnicodeEncoder); - NS_IF_RELEASE(mLineBreaker); -} - -/** - * - * @update gpk04/30/99 - * @param - * @return - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::Initialize(nsIOutputStream* aOutStream, - nsAWritableString* aOutString, - PRUint32 aFlags) -{ - mStream = aOutStream; - // XXX This is wrong. It violates XPCOM string ownership rules. - // We're only getting away with this because instances of this - // class are restricted to single function scope. - mString = aOutString; - mFlags = aFlags; - - nsILineBreakerFactory *lf; - nsresult result = NS_OK; - - result = nsServiceManager::GetService(kLWBrkCID, - NS_GET_IID(nsILineBreakerFactory), - (nsISupports **)&lf); - if (NS_SUCCEEDED(result)) { - nsAutoString lbarg; - result = lf->GetBreaker(lbarg, &mLineBreaker); - if(NS_FAILED(result)) { - mLineBreaker = nsnull; - } - result = nsServiceManager::ReleaseService(kLWBrkCID, lf); - } - - // Turn on caching if we are wrapping or we want formatting. - // We need this even when flags indicate preformatted, - // in order to wrap textareas with wrap=hard. - if((mFlags & nsIDocumentEncoder::OutputFormatted) || - (mFlags & nsIDocumentEncoder::OutputWrap)) - { - mCacheLine = PR_TRUE; - } - - // Set the line break character: - if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) - && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) // Windows/mail - mLineBreak.AssignWithConversion("\r\n"); - else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) // Mac - mLineBreak.AssignWithConversion("\r"); - else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) // Unix/DOM - mLineBreak.AssignWithConversion("\n"); - else - mLineBreak.AssignWithConversion(NS_LINEBREAK); // Platform/default - - // Get some prefs - nsresult rv; - NS_WITH_SERVICE(nsIPref, prefs, NS_PREF_CONTRACTID, &rv); - if (NS_SUCCEEDED(rv) && prefs) - { - rv = prefs->GetBoolPref(PREF_STRUCTS, &mStructs); - rv = prefs->GetIntPref(PREF_HEADER_STRATEGY, &mHeaderStrategy); - } - - return result; -} - -NS_IMETHODIMP -nsHTMLToTXTSinkStream::SetCharsetOverride(const nsAReadableString* aCharset) -{ - if (aCharset) - { - mCharsetOverride.Assign(*aCharset); - InitEncoder(mCharsetOverride); - } - return NS_OK; -} - -/** - * This method gets called by the parser when it encounters - * a title tag and wants to set the document title in the sink. - * - * @update gpk02/03/99 - * @param nsString reference to new title value - * @return PR_TRUE if successful. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::SetTitle(const nsString& aValue) -{ - return NS_OK; -} - -/** - * All these HTML-specific methods may be called, or may not, - * depending on whether the parser is parsing XIF or HTML. - * So we can't depend on them; instead, we have Open/CloseContainer - * do all the specialized work, and the html-specific Open/Close - * methods must call the more general methods. - * Since there are so many of them, make a macro: - */ - -#define USE_GENERAL_OPEN_METHOD(opentag) \ -NS_IMETHODIMP \ -nsHTMLToTXTSinkStream::opentag(const nsIParserNode& aNode) \ -{ return OpenContainer(aNode); } - -#define USE_GENERAL_CLOSE_METHOD(closetag) \ -NS_IMETHODIMP \ -nsHTMLToTXTSinkStream::closetag(const nsIParserNode& aNode) \ -{ return CloseContainer(aNode); } - -USE_GENERAL_OPEN_METHOD(OpenHTML) -USE_GENERAL_CLOSE_METHOD(CloseHTML) -USE_GENERAL_OPEN_METHOD(OpenHead) -USE_GENERAL_CLOSE_METHOD(CloseHead) -USE_GENERAL_OPEN_METHOD(OpenBody) -USE_GENERAL_CLOSE_METHOD(CloseBody) -USE_GENERAL_OPEN_METHOD(OpenForm) -USE_GENERAL_CLOSE_METHOD(CloseForm) -USE_GENERAL_OPEN_METHOD(OpenMap) -USE_GENERAL_CLOSE_METHOD(CloseMap) -USE_GENERAL_OPEN_METHOD(OpenFrameset) -USE_GENERAL_CLOSE_METHOD(CloseFrameset) -USE_GENERAL_OPEN_METHOD(OpenNoscript) -USE_GENERAL_CLOSE_METHOD(CloseNoscript) - -NS_IMETHODIMP -nsHTMLToTXTSinkStream::DoFragment(PRBool aFlag) -{ - mDoFragment = aFlag; - return NS_OK; -} - -/** - * This gets called when handling illegal contents, especially - * in dealing with tables. This method creates a new context. - * - * @update 04/04/99 harishd - * @param aPosition - The position from where the new context begins. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::BeginContext(PRInt32 aPosition) -{ - return NS_OK; -} - -/** - * This method terminates any new context that got created by - * BeginContext and switches back to the main context. - * - * @update 04/04/99 harishd - * @param aPosition - Validates the end of a context. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::EndContext(PRInt32 aPosition) -{ - return NS_OK; -} - -/** - * This gets called by the parser when you want to add - * a PI node to the current container in the content - * model. - * - * @updated gpk02/03/99 - * @param - * @return - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){ - return NS_OK; -} - -/** - * This gets called by the parser when it encounters - * a DOCTYPE declaration in the HTML document. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMode) -{ - // Should probably set DTD - return NS_OK; -} - -/** - * This gets called by the parser when you want to add - * a comment node to the current container in the content - * model. - * - * @updated gpk02/03/99 - * @param - * @return - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode) -{ - // Skip comments in plaintext output - return NS_OK; -} - -NS_IMETHODIMP -nsHTMLToTXTSinkStream::GetValueOfAttribute(const nsIParserNode& aNode, - char* aMatchKey, - nsString& aValueRet) -{ - nsAutoString matchKey; matchKey.AssignWithConversion(aMatchKey); - PRInt32 count=aNode.GetAttributeCount(); - for (PRInt32 i=0;iHTML converter. - * In this case, we should ignore it. - */ -PRBool nsHTMLToTXTSinkStream::IsConverted(const nsIParserNode& aNode) -{ - nsAutoString value; - nsresult rv = GetValueOfAttribute(aNode, "class", value); - return - ( - NS_SUCCEEDED(rv) - && - ( - value.EqualsWithConversion("moz-txt", PR_TRUE, 7) || - value.EqualsWithConversion("\"moz-txt", PR_TRUE, 8) - ) - ); -} - -PRBool nsHTMLToTXTSinkStream::DoOutput() -{ - PRBool inBody = PR_FALSE; - - // Loop over the tag stack and see if we're inside a body, - // and not inside a markup_declaration - for (PRUint32 i = 0; i < mTagStackIndex; ++i) - { - if (mTagStack[i] == eHTMLTag_doctypeDecl - || mTagStack[i] == eHTMLTag_comment - || mTagStack[i] == eHTMLTag_markupDecl) - return PR_FALSE; - - if (mTagStack[i] == eHTMLTag_body) - inBody = PR_TRUE; - } - - return mDoFragment || inBody; -} - - -/** - * This method is used to open a general container. - * This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc. - * - * @param nsIParserNode reference to parser node interface - * @return PR_TRUE if successful. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode) -{ - eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); - const nsAReadableString& name = aNode.GetText(); - if (name.Equals(NS_LITERAL_STRING("document_info"))) - { - nsString value; - if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "charset", value))) - { - if (mCharsetOverride.Length() == 0) - InitEncoder(value); - else - InitEncoder(mCharsetOverride); - } - return NS_OK; - } - - if (mTagStackIndex < TagStackSize) - mTagStack[mTagStackIndex++] = type; - - if (type == eHTMLTag_body) - { - // body -> can turn on cacheing unless it's already preformatted - if(!(mFlags & nsIDocumentEncoder::OutputPreformatted) && - ((mFlags & nsIDocumentEncoder::OutputFormatted) || - (mFlags & nsIDocumentEncoder::OutputWrap))) { - mCacheLine = PR_TRUE; - } - - // Try to figure out here whether we have a - // preformatted style attribute. - // - // Trigger on the presence of a "-moz-pre-wrap" in the - // style attribute. That's a very simplistic way to do - // it, but better than nothing. - // Also set mWrapColumn to the value given there - // (which arguably we should only do if told to do so). - nsString style; - PRInt32 whitespace; - if(NS_SUCCEEDED(GetValueOfAttribute(aNode, "style", style)) && - (-1 != (whitespace = style.Find("white-space:")))) - { - if (-1 != style.Find("-moz-pre-wrap", PR_TRUE, whitespace)) - { -#ifdef DEBUG_preformatted - printf("Set mPreFormatted based on style moz-pre-wrap\n"); -#endif - mPreFormatted = PR_TRUE; - mCacheLine = PR_TRUE; - PRInt32 widthOffset = style.Find("width:"); - if (widthOffset >= 0) - { - // We have to search for the ch before the semicolon, - // not for the semicolon itself, because nsString::ToInteger() - // considers 'c' to be a valid numeric char (even if radix=10) - // but then gets confused if it sees it next to the number - // when the radix specified was 10, and returns an error code. - PRInt32 semiOffset = style.Find("ch", widthOffset+6); - PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6 - : style.Length() - widthOffset); - nsString widthstr; - style.Mid(widthstr, widthOffset+6, length); - PRInt32 err; - PRInt32 col = widthstr.ToInteger(&err); - if (NS_SUCCEEDED(err)) - { - SetWrapColumn((PRUint32)col); -#ifdef DEBUG_preformatted - printf("Set wrap column to %d based on style\n", mWrapColumn); -#endif - } - } - } - else if (-1 != style.Find("pre", PR_TRUE, whitespace)) - { -#ifdef DEBUG_preformatted - printf("Set mPreFormatted based on style pre\n"); -#endif - mPreFormatted = PR_TRUE; - mCacheLine = PR_TRUE; - SetWrapColumn(0); - } - } else { - mPreFormatted = PR_FALSE; - mCacheLine = PR_TRUE; // Cache lines unless something else tells us not to - } - - return NS_OK; - } - - if (!DoOutput()) - return NS_OK; - - if (type == eHTMLTag_p || type == eHTMLTag_pre) - EnsureVerticalSpace(1); // Should this be 0 in unformatted case? - - else if (type == eHTMLTag_td || type == eHTMLTag_th) - { - // We must make sure that the content of two table cells get a - // space between them. - - // Fow now, I will only add a SPACE. Could be a TAB or something - // else but I'm not sure everything can handle the TAB so SPACE - // seems like a better solution. - if(!mInWhitespace) { - // Maybe add something else? Several spaces? A TAB? SPACE+TAB? - if(mCacheLine) { - AddToLine(NS_ConvertToString(" ").GetUnicode(), 1); - } else { - nsAutoString space(NS_ConvertToString(" ")); - WriteSimple(space); - } - mInWhitespace = PR_TRUE; - } - } - - // Else make sure we'll separate block level tags, - // even if we're about to leave, before doing any other formatting. - else if (IsBlockLevel(type)) - EnsureVerticalSpace(0); - - // The rest of this routine is formatted output stuff, - // which we should skip if we're not formatted: - if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) - return NS_OK; - - if (type == eHTMLTag_h1 || type == eHTMLTag_h2 || - type == eHTMLTag_h3 || type == eHTMLTag_h4 || - type == eHTMLTag_h5 || type == eHTMLTag_h6) - { - EnsureVerticalSpace(2); - if (mHeaderStrategy == 2) // numbered - { - mIndent += gIndentSizeHeaders; - // Caching - nsCAutoString leadup; - PRInt32 level = HeaderLevel(type); - // Increase counter for current level - mHeaderCounter[level]++; - // Reset all lower levels - PRInt32 i; - for (i = level + 1; i <= 6; i++) - mHeaderCounter[i] = 0; - // Construct numbers - for (i = 1; i <= level; i++) - { - leadup.AppendInt(mHeaderCounter[i]); - leadup += "."; - } - leadup += " "; - Write(NS_ConvertASCIItoUCS2(leadup.GetBuffer())); - } - else if (mHeaderStrategy == 1) // indent increasingly - { - mIndent += gIndentSizeHeaders; - for (PRInt32 i = HeaderLevel(type); i > 1; i--) - // for h(x), run x-1 times - mIndent += gIndentIncrementHeaders; - } - } - else if (type == eHTMLTag_ul) - { - // Indent here to support nested list, which aren't included in li :-( - EnsureVerticalSpace(1); // Must end the current line before we change indent. - mIndent += gIndentSizeList; - } - else if (type == eHTMLTag_ol) - { - EnsureVerticalSpace(1); // Must end the current line before we change indent. - if (mOLStackIndex < OLStackSize) - mOLStack[mOLStackIndex++] = 1; // XXX should get it from the node! - mIndent += gIndentSizeList; // see ul - } - else if (type == eHTMLTag_li) - { - if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_ol) - { - if (mOLStackIndex > 0) - // This is what nsBulletFrame does for OLs: - mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); - else - mInIndentString.AppendWithConversion("#"); - - mInIndentString.AppendWithConversion('.'); - - } - else - mInIndentString.AppendWithConversion('*'); - - mInIndentString.AppendWithConversion(' '); - } - else if (type == eHTMLTag_dl) - EnsureVerticalSpace(1); - else if (type == eHTMLTag_dd) - mIndent += gIndentSizeDD; - else if (type == eHTMLTag_blockquote) - { - EnsureVerticalSpace(1); - - // Find out whether it's a type=cite, and insert "> " instead. - // Eventually we should get the value of the pref controlling citations, - // and handle AOL-style citations as well. - // If we want to support RFC 2646 (and we do!) we have to have: - // >>>> text - // >>> fdfd - // when a mail is sent. - nsString value; - nsresult rv = GetValueOfAttribute(aNode, "type", value); - if ( NS_SUCCEEDED(rv) ) - value.StripChars("\""); - - if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE)) - mCiteQuoteLevel++; - else - mIndent += gTabSize; // Check for some maximum value? - } - - else if (type == eHTMLTag_a && !IsConverted(aNode)) - { - nsAutoString url; - if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "href", url)) - && !url.IsEmpty()) - { - url.StripChars("\""); - mURL = url; - } - } - else if (type == eHTMLTag_q) - Write(NS_ConvertASCIItoUCS2("\"")); - else if (type == eHTMLTag_sup && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("^")); - else if (type == eHTMLTag_sub && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("_")); - else if (type == eHTMLTag_code && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("|")); - else if ((type == eHTMLTag_strong || type == eHTMLTag_b) - && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("*")); - else if ((type == eHTMLTag_em || type == eHTMLTag_i) - && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("/")); - else if (type == eHTMLTag_u && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("_")); - - return NS_OK; -} - -/** - * This method is used to close a generic container. - * - * @update 07/12/98 gpk - * @param nsIParserNode reference to parser node interface - * @return PR_TRUE if successful. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode) -{ - eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); - if (mTagStackIndex > 0) - --mTagStackIndex; - - // End current line if we're ending a block level tag - if((type == eHTMLTag_body) || (type == eHTMLTag_html)) { - // We want the output to end with a new line, - // but in preformatted areas like text fields, - // we can't emit newlines that weren't there. - // So add the newline only in the case of formatted output. - if (mFlags & nsIDocumentEncoder::OutputFormatted) - EnsureVerticalSpace(0); - else - FlushLine(); - // We won't want to do anything with these in formatted mode either, - // so just return now: - return NS_OK; - } else if ((type == eHTMLTag_tr) || - (type == eHTMLTag_li) || - (type == eHTMLTag_pre) || - (type == eHTMLTag_dd) || - (type == eHTMLTag_dt)) { - // Items that should always end a line, but get no more whitespace - EnsureVerticalSpace(0); - } else if (IsBlockLevel(type) - && type != eHTMLTag_blockquote - && type != eHTMLTag_script - && type != eHTMLTag_doctypeDecl - && type != eHTMLTag_markupDecl) - { - // All other blocks get 1 vertical space after them - // in formatted mode, otherwise 0. - // This is hard. Sometimes 0 is a better number, but - // how to know? - EnsureVerticalSpace((mFlags & nsIDocumentEncoder::OutputFormatted) - ? 1 : 0); - } - - // The rest of this routine is formatted output stuff, - // which we should skip if we're not formatted: - if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) - return NS_OK; - - if (type == eHTMLTag_h1 || type == eHTMLTag_h2 || - type == eHTMLTag_h3 || type == eHTMLTag_h4 || - type == eHTMLTag_h5 || type == eHTMLTag_h6) - { - if (mHeaderStrategy /*numbered or indent increasingly*/ ) - mIndent -= gIndentSizeHeaders; - if (mHeaderStrategy == 1 /*indent increasingly*/ ) - { - for (PRInt32 i = HeaderLevel(type); i > 1; i--) - // for h(x), run x-1 times - mIndent -= gIndentIncrementHeaders; - } - EnsureVerticalSpace(1); - } - else if (type == eHTMLTag_ul) - { - mIndent -= gIndentSizeList; - } - else if (type == eHTMLTag_ol) - { - FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. - --mOLStackIndex; - mIndent -= gIndentSizeList; - } - else if (type == eHTMLTag_dd) - { - mIndent -= gIndentSizeDD; - } - else if (type == eHTMLTag_blockquote) - { - FlushLine(); // Is this needed? - - nsString value; - nsresult rv = GetValueOfAttribute(aNode, "type", value); - if ( NS_SUCCEEDED(rv) ) - value.StripChars("\""); - - if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE)) - mCiteQuoteLevel--; - else - mIndent -= gTabSize; - - EnsureVerticalSpace(1); - } - else if (type == eHTMLTag_a && !IsConverted(aNode) && !mURL.IsEmpty()) - { - nsAutoString temp; temp.AssignWithConversion(" <"); - temp += mURL; - temp.AppendWithConversion(">"); - Write(temp); - mURL.Truncate(); - } - else if (type == eHTMLTag_q) - Write(NS_ConvertASCIItoUCS2("\"")); - else if ((type == eHTMLTag_sup || type == eHTMLTag_sub) - && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2(" ")); - else if (type == eHTMLTag_code && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("|")); - else if ((type == eHTMLTag_strong || type == eHTMLTag_b) - && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("*")); - else if ((type == eHTMLTag_em || type == eHTMLTag_i) - && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("/")); - else if (type == eHTMLTag_u && mStructs && !IsConverted(aNode)) - Write(NS_ConvertASCIItoUCS2("_")); - - return NS_OK; -} - -/** - * This method is used to add a leaf to the currently - * open container. - * - * @update 07/12/98 gpk - * @param nsIParserNode reference to parser node interface - * @return PR_TRUE if successful. - */ -NS_IMETHODIMP -nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode) -{ - // If we don't want any output, just return - if (!DoOutput()) - return NS_OK; - - eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); - - nsAutoString text(aNode.GetText()); - - if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_select) - { - // Don't output the contents of SELECT elements; - // Might be nice, eventually, to output just the selected element. - return NS_OK; - } - else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) - { - // Don't output the contents of