diff --git a/mozilla/htmlparser/src/nsHTMLToTXTSinkStream.cpp b/mozilla/htmlparser/src/nsHTMLToTXTSinkStream.cpp
deleted file mode 100644
index a99f2c7e3c7..00000000000
--- a/mozilla/htmlparser/src/nsHTMLToTXTSinkStream.cpp
+++ /dev/null
@@ -1,1798 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- *
- * The contents of this file are subject to the Netscape Public
- * License Version 1.1 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of
- * the License at http://www.mozilla.org/NPL/
- *
- * Software distributed under the License is distributed on an "AS
- * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * rights and limitations under the License.
- *
- * The Original Code is Mozilla Communicator client code.
- *
- * The Initial Developer of the Original Code is Netscape Communications
- * Corporation. Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation. All
- * Rights Reserved.
- *
- * Contributor(s):
- * Greg Kostello (original structure)
- * Akkana Peck
- * Daniel Bratell
- * Ben Bucksch
- * Pierre Phaneuf
- * Markus Kuhn
- */
-
-/**
- * MODULE NOTES:
- *
- * This file declares the concrete TXT ContentSink class.
- * This class is used during the parsing process as the
- * primary interface between the parser and the content
- * model.
- */
-
-#include "nsHTMLToTXTSinkStream.h"
-#include "nsHTMLTokens.h"
-#include "nsString.h"
-#include "nsIParser.h"
-#include "nsHTMLEntities.h"
-#include "nsXIFDTD.h"
-#include "prprf.h" // For PR_snprintf()
-#include "nsIDocumentEncoder.h" // for output flags
-#include "nsIUnicodeEncoder.h"
-#include "nsICharsetAlias.h"
-#include "nsIServiceManager.h"
-#include "nsICharsetConverterManager.h"
-#include "nsILineBreakerFactory.h"
-#include "nsLWBrkCIID.h"
-#include "nsIOutputStream.h"
-#include "nsFileStream.h"
-#include "nsIPref.h"
-
-static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
-static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID);
-static NS_DEFINE_CID(kPrefServiceCID, NS_PREF_CID);
-static NS_DEFINE_IID(kCParserIID, NS_IPARSER_IID);
-static NS_DEFINE_IID(kCParserCID, NS_PARSER_IID);
-
-#define PREF_STRUCTS "converter.html2txt.structs"
-#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
-const PRInt32 gTabSize=4;
-const PRInt32 gOLNumberWidth = 3;
-const PRInt32 gIndentSizeHeaders = 2; /* Indention of h1, if
- mHeaderStrategy = 1 or = 2.
- Indention of other headers
- is derived from that.
- XXX center h1? */
-const PRInt32 gIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
- indent h(x+1) this many
- columns more than h(x) */
-const PRInt32 gIndentSizeList = (gTabSize > gOLNumberWidth+3) ? gTabSize: gOLNumberWidth+3;
- // Indention of non-first lines of ul and ol
-const PRInt32 gIndentSizeDD = gTabSize; // Indention of
-
-static PRInt32 HeaderLevel(eHTMLTags aTag);
-static PRInt32 unicharwidth(PRUnichar ucs);
-static PRInt32 unicharwidth(const PRUnichar* pwcs, PRInt32 n);
-
-/**
- * Inits the encoder instance variable for the sink based on the charset
- *
- * @update gpk 4/21/99
- * @param aCharset
- * @return NS_xxx error result
- */
-nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
-{
- nsresult res = NS_OK;
-
- // If the converter is ucs2, then do not use a converter
- if (aCharset.EqualsWithConversion("ucs2"))
- {
- NS_IF_RELEASE(mUnicodeEncoder);
- return res;
- }
-
- nsICharsetAlias* calias = nsnull;
- res = nsServiceManager::GetService(kCharsetAliasCID,
- kICharsetAliasIID,
- (nsISupports**)&calias);
-
- NS_ASSERTION( nsnull != calias, "cannot find charset alias");
- nsAutoString charsetName;charsetName.Assign(aCharset);
- if( NS_SUCCEEDED(res) && (nsnull != calias))
- {
- res = calias->GetPreferred(aCharset, charsetName);
- nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
-
- if(NS_FAILED(res))
- {
- // failed - unknown alias , fallback to ISO-8859-1
- charsetName.AssignWithConversion("ISO-8859-1");
- }
-
- nsICharsetConverterManager * ccm = nsnull;
- res = nsServiceManager::GetService(kCharsetConverterManagerCID,
- NS_GET_IID(nsICharsetConverterManager),
- (nsISupports**)&ccm);
- if(NS_SUCCEEDED(res) && (nsnull != ccm))
- {
- nsIUnicodeEncoder * encoder = nsnull;
- res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
- if(NS_SUCCEEDED(res) && (nsnull != encoder))
- {
- NS_IF_RELEASE(mUnicodeEncoder);
- mUnicodeEncoder = encoder;
- }
- nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
- }
- }
- return res;
-}
-
-/**
- * This method gets called as part of our COM-like interfaces.
- * Its purpose is to create an interface to parser object
- * of some type.
- *
- * @update gpk02/03/99
- * @param nsIID id of object to discover
- * @param aInstancePtr ptr to newly discovered interface
- * @return NS_xxx result code
- */
-nsresult
-nsHTMLToTXTSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr)
-{
- if (NULL == aInstancePtr) {
- return NS_ERROR_NULL_POINTER;
- }
- if(aIID.Equals(NS_GET_IID(nsISupports))) {
- *aInstancePtr = (nsIContentSink*)(this);
- }
- else if(aIID.Equals(NS_GET_IID(nsIContentSink))) {
- *aInstancePtr = (nsIContentSink*)(this);
- }
- else if(aIID.Equals(NS_GET_IID(nsIHTMLContentSink))) {
- *aInstancePtr = (nsIHTMLContentSink*)(this);
- }
- else if(aIID.Equals(NS_GET_IID(nsIHTMLToTXTSinkStream))) {
- *aInstancePtr = (nsIHTMLToTXTSinkStream*)(this);
- }
- else {
- *aInstancePtr=0;
- return NS_NOINTERFACE;
- }
- NS_ADDREF_THIS();
- return NS_OK;
-}
-
-NS_IMPL_ADDREF(nsHTMLToTXTSinkStream)
-NS_IMPL_RELEASE(nsHTMLToTXTSinkStream)
-
-// Someday may want to make this non-const:
-static const PRUint32 TagStackSize = 500;
-static const PRUint32 OLStackSize = 100;
-
-/**
- * Construct a content sink stream.
- * @update gpk02/03/99
- * @param
- * @return
- */
-nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream()
-{
- NS_INIT_REFCNT();
- mDTD = 0;
- mColPos = 0;
- mIndent = 0;
- mCiteQuoteLevel = 0;
- mDoFragment = PR_FALSE;
- mBufferSize = 0;
- mBufferLength = 0;
- mBuffer = nsnull;
- mUnicodeEncoder = nsnull;
- mStructs = PR_TRUE; // will be read from prefs later
- mHeaderStrategy = 1 /*indent increasingly*/; // ditto
- for (PRInt32 i = 0; i <= 6; i++)
- mHeaderCounter[i] = 0;
-
- // Line breaker
- mLineBreaker = nsnull;
- mWrapColumn = 72; // XXX magic number, we expect someone to reset this
- mCurrentLineWidth = 0;
-
- // Flow
- mEmptyLines=1; // The start of the document is an "empty line" in itself,
- mInWhitespace = PR_TRUE;
- mPreFormatted = PR_FALSE;
- mCacheLine = PR_FALSE;
- mStartedOutput = PR_FALSE;
-
- // initialize the tag stack to zero:
- mTagStack = new nsHTMLTag[TagStackSize];
- mTagStackIndex = 0;
-
- // initialize the OL stack, where numbers for ordered lists are kept:
- mOLStack = new PRInt32[OLStackSize];
- mOLStackIndex = 0;
-}
-
-/**
- *
- * @update gpk02/03/99
- * @param
- * @return
- */
-nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream()
-{
- if (mCurrentLine.Length() > 0)
- FlushLine(); // We have some left over text in current line. flush it out.
- // This means we didn't have a body or html node -- probably a text control.
-
- if(mBuffer)
- delete[] mBuffer;
- delete[] mTagStack;
- delete[] mOLStack;
- NS_IF_RELEASE(mDTD);
- NS_IF_RELEASE(mUnicodeEncoder);
- NS_IF_RELEASE(mLineBreaker);
-}
-
-/**
- *
- * @update gpk04/30/99
- * @param
- * @return
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::Initialize(nsIOutputStream* aOutStream,
- nsAWritableString* aOutString,
- PRUint32 aFlags)
-{
- mStream = aOutStream;
- // XXX This is wrong. It violates XPCOM string ownership rules.
- // We're only getting away with this because instances of this
- // class are restricted to single function scope.
- mString = aOutString;
- mFlags = aFlags;
-
- nsILineBreakerFactory *lf;
- nsresult result = NS_OK;
-
- result = nsServiceManager::GetService(kLWBrkCID,
- NS_GET_IID(nsILineBreakerFactory),
- (nsISupports **)&lf);
- if (NS_SUCCEEDED(result)) {
- nsAutoString lbarg;
- result = lf->GetBreaker(lbarg, &mLineBreaker);
- if(NS_FAILED(result)) {
- mLineBreaker = nsnull;
- }
- result = nsServiceManager::ReleaseService(kLWBrkCID, lf);
- }
-
- // Turn on caching if we are wrapping or we want formatting.
- // We need this even when flags indicate preformatted,
- // in order to wrap textareas with wrap=hard.
- if((mFlags & nsIDocumentEncoder::OutputFormatted) ||
- (mFlags & nsIDocumentEncoder::OutputWrap))
- {
- mCacheLine = PR_TRUE;
- }
-
- // Set the line break character:
- if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
- && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) // Windows/mail
- mLineBreak.AssignWithConversion("\r\n");
- else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) // Mac
- mLineBreak.AssignWithConversion("\r");
- else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) // Unix/DOM
- mLineBreak.AssignWithConversion("\n");
- else
- mLineBreak.AssignWithConversion(NS_LINEBREAK); // Platform/default
-
- // Get some prefs
- nsresult rv;
- NS_WITH_SERVICE(nsIPref, prefs, NS_PREF_CONTRACTID, &rv);
- if (NS_SUCCEEDED(rv) && prefs)
- {
- rv = prefs->GetBoolPref(PREF_STRUCTS, &mStructs);
- rv = prefs->GetIntPref(PREF_HEADER_STRATEGY, &mHeaderStrategy);
- }
-
- return result;
-}
-
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::SetCharsetOverride(const nsAReadableString* aCharset)
-{
- if (aCharset)
- {
- mCharsetOverride.Assign(*aCharset);
- InitEncoder(mCharsetOverride);
- }
- return NS_OK;
-}
-
-/**
- * This method gets called by the parser when it encounters
- * a title tag and wants to set the document title in the sink.
- *
- * @update gpk02/03/99
- * @param nsString reference to new title value
- * @return PR_TRUE if successful.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::SetTitle(const nsString& aValue)
-{
- return NS_OK;
-}
-
-/**
- * All these HTML-specific methods may be called, or may not,
- * depending on whether the parser is parsing XIF or HTML.
- * So we can't depend on them; instead, we have Open/CloseContainer
- * do all the specialized work, and the html-specific Open/Close
- * methods must call the more general methods.
- * Since there are so many of them, make a macro:
- */
-
-#define USE_GENERAL_OPEN_METHOD(opentag) \
-NS_IMETHODIMP \
-nsHTMLToTXTSinkStream::opentag(const nsIParserNode& aNode) \
-{ return OpenContainer(aNode); }
-
-#define USE_GENERAL_CLOSE_METHOD(closetag) \
-NS_IMETHODIMP \
-nsHTMLToTXTSinkStream::closetag(const nsIParserNode& aNode) \
-{ return CloseContainer(aNode); }
-
-USE_GENERAL_OPEN_METHOD(OpenHTML)
-USE_GENERAL_CLOSE_METHOD(CloseHTML)
-USE_GENERAL_OPEN_METHOD(OpenHead)
-USE_GENERAL_CLOSE_METHOD(CloseHead)
-USE_GENERAL_OPEN_METHOD(OpenBody)
-USE_GENERAL_CLOSE_METHOD(CloseBody)
-USE_GENERAL_OPEN_METHOD(OpenForm)
-USE_GENERAL_CLOSE_METHOD(CloseForm)
-USE_GENERAL_OPEN_METHOD(OpenMap)
-USE_GENERAL_CLOSE_METHOD(CloseMap)
-USE_GENERAL_OPEN_METHOD(OpenFrameset)
-USE_GENERAL_CLOSE_METHOD(CloseFrameset)
-USE_GENERAL_OPEN_METHOD(OpenNoscript)
-USE_GENERAL_CLOSE_METHOD(CloseNoscript)
-
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::DoFragment(PRBool aFlag)
-{
- mDoFragment = aFlag;
- return NS_OK;
-}
-
-/**
- * This gets called when handling illegal contents, especially
- * in dealing with tables. This method creates a new context.
- *
- * @update 04/04/99 harishd
- * @param aPosition - The position from where the new context begins.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::BeginContext(PRInt32 aPosition)
-{
- return NS_OK;
-}
-
-/**
- * This method terminates any new context that got created by
- * BeginContext and switches back to the main context.
- *
- * @update 04/04/99 harishd
- * @param aPosition - Validates the end of a context.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::EndContext(PRInt32 aPosition)
-{
- return NS_OK;
-}
-
-/**
- * This gets called by the parser when you want to add
- * a PI node to the current container in the content
- * model.
- *
- * @updated gpk02/03/99
- * @param
- * @return
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
- return NS_OK;
-}
-
-/**
- * This gets called by the parser when it encounters
- * a DOCTYPE declaration in the HTML document.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMode)
-{
- // Should probably set DTD
- return NS_OK;
-}
-
-/**
- * This gets called by the parser when you want to add
- * a comment node to the current container in the content
- * model.
- *
- * @updated gpk02/03/99
- * @param
- * @return
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode)
-{
- // Skip comments in plaintext output
- return NS_OK;
-}
-
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::GetValueOfAttribute(const nsIParserNode& aNode,
- char* aMatchKey,
- nsString& aValueRet)
-{
- nsAutoString matchKey; matchKey.AssignWithConversion(aMatchKey);
- PRInt32 count=aNode.GetAttributeCount();
- for (PRInt32 i=0;iHTML converter.
- * In this case, we should ignore it.
- */
-PRBool nsHTMLToTXTSinkStream::IsConverted(const nsIParserNode& aNode)
-{
- nsAutoString value;
- nsresult rv = GetValueOfAttribute(aNode, "class", value);
- return
- (
- NS_SUCCEEDED(rv)
- &&
- (
- value.EqualsWithConversion("moz-txt", PR_TRUE, 7) ||
- value.EqualsWithConversion("\"moz-txt", PR_TRUE, 8)
- )
- );
-}
-
-PRBool nsHTMLToTXTSinkStream::DoOutput()
-{
- PRBool inBody = PR_FALSE;
-
- // Loop over the tag stack and see if we're inside a body,
- // and not inside a markup_declaration
- for (PRUint32 i = 0; i < mTagStackIndex; ++i)
- {
- if (mTagStack[i] == eHTMLTag_doctypeDecl
- || mTagStack[i] == eHTMLTag_comment
- || mTagStack[i] == eHTMLTag_markupDecl)
- return PR_FALSE;
-
- if (mTagStack[i] == eHTMLTag_body)
- inBody = PR_TRUE;
- }
-
- return mDoFragment || inBody;
-}
-
-
-/**
- * This method is used to open a general container.
- * This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc.
- *
- * @param nsIParserNode reference to parser node interface
- * @return PR_TRUE if successful.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
-{
- eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
- const nsAReadableString& name = aNode.GetText();
- if (name.Equals(NS_LITERAL_STRING("document_info")))
- {
- nsString value;
- if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "charset", value)))
- {
- if (mCharsetOverride.Length() == 0)
- InitEncoder(value);
- else
- InitEncoder(mCharsetOverride);
- }
- return NS_OK;
- }
-
- if (mTagStackIndex < TagStackSize)
- mTagStack[mTagStackIndex++] = type;
-
- if (type == eHTMLTag_body)
- {
- // body -> can turn on cacheing unless it's already preformatted
- if(!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
- ((mFlags & nsIDocumentEncoder::OutputFormatted) ||
- (mFlags & nsIDocumentEncoder::OutputWrap))) {
- mCacheLine = PR_TRUE;
- }
-
- // Try to figure out here whether we have a
- // preformatted style attribute.
- //
- // Trigger on the presence of a "-moz-pre-wrap" in the
- // style attribute. That's a very simplistic way to do
- // it, but better than nothing.
- // Also set mWrapColumn to the value given there
- // (which arguably we should only do if told to do so).
- nsString style;
- PRInt32 whitespace;
- if(NS_SUCCEEDED(GetValueOfAttribute(aNode, "style", style)) &&
- (-1 != (whitespace = style.Find("white-space:"))))
- {
- if (-1 != style.Find("-moz-pre-wrap", PR_TRUE, whitespace))
- {
-#ifdef DEBUG_preformatted
- printf("Set mPreFormatted based on style moz-pre-wrap\n");
-#endif
- mPreFormatted = PR_TRUE;
- mCacheLine = PR_TRUE;
- PRInt32 widthOffset = style.Find("width:");
- if (widthOffset >= 0)
- {
- // We have to search for the ch before the semicolon,
- // not for the semicolon itself, because nsString::ToInteger()
- // considers 'c' to be a valid numeric char (even if radix=10)
- // but then gets confused if it sees it next to the number
- // when the radix specified was 10, and returns an error code.
- PRInt32 semiOffset = style.Find("ch", widthOffset+6);
- PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
- : style.Length() - widthOffset);
- nsString widthstr;
- style.Mid(widthstr, widthOffset+6, length);
- PRInt32 err;
- PRInt32 col = widthstr.ToInteger(&err);
- if (NS_SUCCEEDED(err))
- {
- SetWrapColumn((PRUint32)col);
-#ifdef DEBUG_preformatted
- printf("Set wrap column to %d based on style\n", mWrapColumn);
-#endif
- }
- }
- }
- else if (-1 != style.Find("pre", PR_TRUE, whitespace))
- {
-#ifdef DEBUG_preformatted
- printf("Set mPreFormatted based on style pre\n");
-#endif
- mPreFormatted = PR_TRUE;
- mCacheLine = PR_TRUE;
- SetWrapColumn(0);
- }
- } else {
- mPreFormatted = PR_FALSE;
- mCacheLine = PR_TRUE; // Cache lines unless something else tells us not to
- }
-
- return NS_OK;
- }
-
- if (!DoOutput())
- return NS_OK;
-
- if (type == eHTMLTag_p || type == eHTMLTag_pre)
- EnsureVerticalSpace(1); // Should this be 0 in unformatted case?
-
- else if (type == eHTMLTag_td || type == eHTMLTag_th)
- {
- // We must make sure that the content of two table cells get a
- // space between them.
-
- // Fow now, I will only add a SPACE. Could be a TAB or something
- // else but I'm not sure everything can handle the TAB so SPACE
- // seems like a better solution.
- if(!mInWhitespace) {
- // Maybe add something else? Several spaces? A TAB? SPACE+TAB?
- if(mCacheLine) {
- AddToLine(NS_ConvertToString(" ").GetUnicode(), 1);
- } else {
- nsAutoString space(NS_ConvertToString(" "));
- WriteSimple(space);
- }
- mInWhitespace = PR_TRUE;
- }
- }
-
- // Else make sure we'll separate block level tags,
- // even if we're about to leave, before doing any other formatting.
- else if (IsBlockLevel(type))
- EnsureVerticalSpace(0);
-
- // The rest of this routine is formatted output stuff,
- // which we should skip if we're not formatted:
- if (!(mFlags & nsIDocumentEncoder::OutputFormatted))
- return NS_OK;
-
- if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
- type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
- type == eHTMLTag_h5 || type == eHTMLTag_h6)
- {
- EnsureVerticalSpace(2);
- if (mHeaderStrategy == 2) // numbered
- {
- mIndent += gIndentSizeHeaders;
- // Caching
- nsCAutoString leadup;
- PRInt32 level = HeaderLevel(type);
- // Increase counter for current level
- mHeaderCounter[level]++;
- // Reset all lower levels
- PRInt32 i;
- for (i = level + 1; i <= 6; i++)
- mHeaderCounter[i] = 0;
- // Construct numbers
- for (i = 1; i <= level; i++)
- {
- leadup.AppendInt(mHeaderCounter[i]);
- leadup += ".";
- }
- leadup += " ";
- Write(NS_ConvertASCIItoUCS2(leadup.GetBuffer()));
- }
- else if (mHeaderStrategy == 1) // indent increasingly
- {
- mIndent += gIndentSizeHeaders;
- for (PRInt32 i = HeaderLevel(type); i > 1; i--)
- // for h(x), run x-1 times
- mIndent += gIndentIncrementHeaders;
- }
- }
- else if (type == eHTMLTag_ul)
- {
- // Indent here to support nested list, which aren't included in li :-(
- EnsureVerticalSpace(1); // Must end the current line before we change indent.
- mIndent += gIndentSizeList;
- }
- else if (type == eHTMLTag_ol)
- {
- EnsureVerticalSpace(1); // Must end the current line before we change indent.
- if (mOLStackIndex < OLStackSize)
- mOLStack[mOLStackIndex++] = 1; // XXX should get it from the node!
- mIndent += gIndentSizeList; // see ul
- }
- else if (type == eHTMLTag_li)
- {
- if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_ol)
- {
- if (mOLStackIndex > 0)
- // This is what nsBulletFrame does for OLs:
- mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
- else
- mInIndentString.AppendWithConversion("#");
-
- mInIndentString.AppendWithConversion('.');
-
- }
- else
- mInIndentString.AppendWithConversion('*');
-
- mInIndentString.AppendWithConversion(' ');
- }
- else if (type == eHTMLTag_dl)
- EnsureVerticalSpace(1);
- else if (type == eHTMLTag_dd)
- mIndent += gIndentSizeDD;
- else if (type == eHTMLTag_blockquote)
- {
- EnsureVerticalSpace(1);
-
- // Find out whether it's a type=cite, and insert "> " instead.
- // Eventually we should get the value of the pref controlling citations,
- // and handle AOL-style citations as well.
- // If we want to support RFC 2646 (and we do!) we have to have:
- // >>>> text
- // >>> fdfd
- // when a mail is sent.
- nsString value;
- nsresult rv = GetValueOfAttribute(aNode, "type", value);
- if ( NS_SUCCEEDED(rv) )
- value.StripChars("\"");
-
- if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE))
- mCiteQuoteLevel++;
- else
- mIndent += gTabSize; // Check for some maximum value?
- }
-
- else if (type == eHTMLTag_a && !IsConverted(aNode))
- {
- nsAutoString url;
- if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "href", url))
- && !url.IsEmpty())
- {
- url.StripChars("\"");
- mURL = url;
- }
- }
- else if (type == eHTMLTag_q)
- Write(NS_ConvertASCIItoUCS2("\""));
- else if (type == eHTMLTag_sup && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("^"));
- else if (type == eHTMLTag_sub && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("_"));
- else if (type == eHTMLTag_code && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("|"));
- else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
- && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("*"));
- else if ((type == eHTMLTag_em || type == eHTMLTag_i)
- && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("/"));
- else if (type == eHTMLTag_u && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("_"));
-
- return NS_OK;
-}
-
-/**
- * This method is used to close a generic container.
- *
- * @update 07/12/98 gpk
- * @param nsIParserNode reference to parser node interface
- * @return PR_TRUE if successful.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode)
-{
- eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
- if (mTagStackIndex > 0)
- --mTagStackIndex;
-
- // End current line if we're ending a block level tag
- if((type == eHTMLTag_body) || (type == eHTMLTag_html)) {
- // We want the output to end with a new line,
- // but in preformatted areas like text fields,
- // we can't emit newlines that weren't there.
- // So add the newline only in the case of formatted output.
- if (mFlags & nsIDocumentEncoder::OutputFormatted)
- EnsureVerticalSpace(0);
- else
- FlushLine();
- // We won't want to do anything with these in formatted mode either,
- // so just return now:
- return NS_OK;
- } else if ((type == eHTMLTag_tr) ||
- (type == eHTMLTag_li) ||
- (type == eHTMLTag_pre) ||
- (type == eHTMLTag_dd) ||
- (type == eHTMLTag_dt)) {
- // Items that should always end a line, but get no more whitespace
- EnsureVerticalSpace(0);
- } else if (IsBlockLevel(type)
- && type != eHTMLTag_blockquote
- && type != eHTMLTag_script
- && type != eHTMLTag_doctypeDecl
- && type != eHTMLTag_markupDecl)
- {
- // All other blocks get 1 vertical space after them
- // in formatted mode, otherwise 0.
- // This is hard. Sometimes 0 is a better number, but
- // how to know?
- EnsureVerticalSpace((mFlags & nsIDocumentEncoder::OutputFormatted)
- ? 1 : 0);
- }
-
- // The rest of this routine is formatted output stuff,
- // which we should skip if we're not formatted:
- if (!(mFlags & nsIDocumentEncoder::OutputFormatted))
- return NS_OK;
-
- if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
- type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
- type == eHTMLTag_h5 || type == eHTMLTag_h6)
- {
- if (mHeaderStrategy /*numbered or indent increasingly*/ )
- mIndent -= gIndentSizeHeaders;
- if (mHeaderStrategy == 1 /*indent increasingly*/ )
- {
- for (PRInt32 i = HeaderLevel(type); i > 1; i--)
- // for h(x), run x-1 times
- mIndent -= gIndentIncrementHeaders;
- }
- EnsureVerticalSpace(1);
- }
- else if (type == eHTMLTag_ul)
- {
- mIndent -= gIndentSizeList;
- }
- else if (type == eHTMLTag_ol)
- {
- FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
- --mOLStackIndex;
- mIndent -= gIndentSizeList;
- }
- else if (type == eHTMLTag_dd)
- {
- mIndent -= gIndentSizeDD;
- }
- else if (type == eHTMLTag_blockquote)
- {
- FlushLine(); // Is this needed?
-
- nsString value;
- nsresult rv = GetValueOfAttribute(aNode, "type", value);
- if ( NS_SUCCEEDED(rv) )
- value.StripChars("\"");
-
- if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE))
- mCiteQuoteLevel--;
- else
- mIndent -= gTabSize;
-
- EnsureVerticalSpace(1);
- }
- else if (type == eHTMLTag_a && !IsConverted(aNode) && !mURL.IsEmpty())
- {
- nsAutoString temp; temp.AssignWithConversion(" <");
- temp += mURL;
- temp.AppendWithConversion(">");
- Write(temp);
- mURL.Truncate();
- }
- else if (type == eHTMLTag_q)
- Write(NS_ConvertASCIItoUCS2("\""));
- else if ((type == eHTMLTag_sup || type == eHTMLTag_sub)
- && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2(" "));
- else if (type == eHTMLTag_code && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("|"));
- else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
- && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("*"));
- else if ((type == eHTMLTag_em || type == eHTMLTag_i)
- && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("/"));
- else if (type == eHTMLTag_u && mStructs && !IsConverted(aNode))
- Write(NS_ConvertASCIItoUCS2("_"));
-
- return NS_OK;
-}
-
-/**
- * This method is used to add a leaf to the currently
- * open container.
- *
- * @update 07/12/98 gpk
- * @param nsIParserNode reference to parser node interface
- * @return PR_TRUE if successful.
- */
-NS_IMETHODIMP
-nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode)
-{
- // If we don't want any output, just return
- if (!DoOutput())
- return NS_OK;
-
- eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
-
- nsAutoString text(aNode.GetText());
-
- if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_select)
- {
- // Don't output the contents of SELECT elements;
- // Might be nice, eventually, to output just the selected element.
- return NS_OK;
- }
- else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script)
- {
- // Don't output the contents of