/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: NPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Netscape Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Daniel Bratell * Ben Bucksch * * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the NPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the NPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nsPlainTextSerializer.h" #include "nsILineBreakerFactory.h" #include "nsLWBrkCIID.h" #include "nsIPref.h" #include "nsIServiceManager.h" #include "nsHTMLAtoms.h" #include "nsIDOMText.h" #include "nsIDOMElement.h" #include "nsINameSpaceManager.h" #include "nsIHTMLContent.h" #include "nsITextContent.h" #include "nsTextFragment.h" #include "nsParserCIID.h" #include "nsContentUtils.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" #include "nsCRT.h" static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID); static NS_DEFINE_CID(kParserServiceCID, NS_PARSERSERVICE_CID); #define PREF_STRUCTS "converter.html2txt.structs" #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" static const PRInt32 kTabSize=4; static const PRInt32 kOLNumberWidth = 3; static const PRInt32 kIndentSizeHeaders = 2; /* Indention of h1, if mHeaderStrategy = 1 or = 2. Indention of other headers is derived from that. XXX center h1? */ static const PRInt32 kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, indent h(x+1) this many columns more than h(x) */ static const PRInt32 kIndentSizeList = (kTabSize > kOLNumberWidth+3) ? kTabSize: kOLNumberWidth+3; // Indention of non-first lines of ul and ol static const PRInt32 kIndentSizeDD = kTabSize; // Indention of

static PRInt32 HeaderLevel(eHTMLTags aTag); static PRInt32 GetUnicharWidth(PRUnichar ucs); static PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n); // Someday may want to make this non-const: static const PRUint32 TagStackSize = 500; static const PRUint32 OLStackSize = 100; nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) { nsPlainTextSerializer* it = new nsPlainTextSerializer(); if (!it) { return NS_ERROR_OUT_OF_MEMORY; } return it->QueryInterface(NS_GET_IID(nsIContentSerializer), (void**)aSerializer); } nsPlainTextSerializer::nsPlainTextSerializer() : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" { NS_INIT_ISUPPORTS(); mOutputString = nsnull; mInHead = PR_FALSE; mAtFirstColumn = PR_TRUE; mIndent = 0; mCiteQuoteLevel = 0; mStructs = PR_TRUE; // will be read from prefs later mHeaderStrategy = 1 /*indent increasingly*/; // ditto mQuotesPreformatted = PR_FALSE; // ditto mDontWrapAnyQuotes = PR_FALSE; // ditto mSpanLevel = 0; for (PRInt32 i = 0; i <= 6; i++) { mHeaderCounter[i] = 0; } // Line breaker mWrapColumn = 72; // XXX magic number, we expect someone to reset this mCurrentLineWidth = 0; // Flow mEmptyLines = 1; // The start of the document is an "empty line" in itself, mInWhitespace = PR_TRUE; mPreFormatted = PR_FALSE; mStartedOutput = PR_FALSE; // initialize the tag stack to zero: mTagStack = new nsHTMLTag[TagStackSize]; mTagStackIndex = 0; mIgnoreAboveIndex = (PRUint32)kNotFound; // initialize the OL stack, where numbers for ordered lists are kept: mOLStack = new PRInt32[OLStackSize]; mOLStackIndex = 0; mULCount = 0; } nsPlainTextSerializer::~nsPlainTextSerializer() { delete[] mTagStack; delete[] mOLStack; } NS_IMPL_ISUPPORTS4(nsPlainTextSerializer, nsIContentSerializer, nsIContentSink, nsIHTMLContentSink, nsIHTMLToTextSink) NS_IMETHODIMP nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn, nsIAtom* aCharSet, PRBool aIsCopying) { #ifdef DEBUG // Check if the major control flags are set correctly. if(aFlags & nsIDocumentEncoder::OutputFormatFlowed) { NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, "If you want format=flowed, you must combine it with " "nsIDocumentEncoder::OutputFormatted"); } if(aFlags & nsIDocumentEncoder::OutputFormatted) { NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), "Can't do formatted and preformatted output at the same time!"); } #endif nsresult rv; mFlags = aFlags; mWrapColumn = aWrapColumn; // Only create a linebreaker if we will handle wrapping. if(MayWrap()) { nsCOMPtr lf(do_GetService(kLWBrkCID, &rv)); if (NS_SUCCEEDED(rv)) { nsAutoString lbarg; rv = lf->GetBreaker(lbarg, getter_AddRefs(mLineBreaker)); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; } } // Set the line break character: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) // Windows mLineBreak.Assign(NS_LITERAL_STRING("\r\n")); else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) // Mac mLineBreak.Assign(PRUnichar('\r')); else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) // Unix/DOM mLineBreak.Assign(PRUnichar('\n')); else mLineBreak.AssignWithConversion(NS_LINEBREAK); // Platform/default nsCOMPtr prefs(do_GetService(NS_PREF_CONTRACTID, &rv)); if (NS_SUCCEEDED(rv) && prefs) { if(mFlags & nsIDocumentEncoder::OutputFormatted) { PRBool tempBool; // Get some prefs that controls how we do formatted output prefs->GetBoolPref(PREF_STRUCTS, &tempBool); mStructs = tempBool; prefs->GetIntPref(PREF_HEADER_STRATEGY, &mHeaderStrategy); // The quotesPreformatted pref is a temporary measure. See bug 69638. prefs->GetBoolPref("editor.quotesPreformatted", &tempBool); mQuotesPreformatted = tempBool; // DontWrapAnyQuotes is set according to whether plaintext mail // is wrapping to window width -- see bug 134439. // We'll only want this if we're wrapping and formatted. if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { prefs->GetBoolPref("mail.compose.wrap_to_window_width", &tempBool); mDontWrapAnyQuotes = tempBool; } } // XXX We should let the caller pass this in. PRBool allowFrames; prefs->GetBoolPref("browser.frames.enabled", &allowFrames); if (allowFrames) mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; else mFlags |= nsIDocumentEncoder::OutputNoFramesContent; } mLineBreakDue = PR_FALSE; mFloatingLines = -1; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::Initialize(nsAString* aOutString, PRUint32 aFlags, PRUint32 aWrapCol) { nsresult rv = Init(aFlags, aWrapCol, nsnull, PR_FALSE); NS_ENSURE_SUCCESS(rv, rv); // XXX This is wrong. It violates XPCOM string ownership rules. // We're only getting away with this because instances of this // class are restricted to single function scope. mOutputString = aOutString; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAString& aStr) { if (mIgnoreAboveIndex != (PRUint32)kNotFound) { return NS_OK; } NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); if ( aStartOffset < 0 ) return NS_ERROR_INVALID_ARG; NS_ENSURE_ARG(aText); nsresult rv = NS_OK; PRInt32 length = 0; nsAutoString textstr; nsCOMPtr content = do_QueryInterface(aText); if (!content) return NS_ERROR_FAILURE; const nsTextFragment* frag; content->GetText(&frag); if (frag) { PRInt32 endoffset = (aEndOffset == -1) ? frag->GetLength() : aEndOffset; NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); length = endoffset - aStartOffset; if (length <= 0) { return NS_OK; } if (frag->Is2b()) { textstr.Assign(frag->Get2b() + aStartOffset, length); } else { textstr.AssignWithConversion(frag->Get1b()+aStartOffset, length); } } mOutputString = &aStr; // We have to split the string across newlines // to match parser behavior PRInt32 start = 0; PRInt32 offset = textstr.FindCharInSet("\n\r"); while (offset != kNotFound) { if(offset>start) { // Pass in the line rv = DoAddLeaf(eHTMLTag_text, Substring(textstr, start, offset-start)); if (NS_FAILED(rv)) break; } // Pass in a newline rv = DoAddLeaf(eHTMLTag_newline, mLineBreak); if (NS_FAILED(rv)) break; start = offset+1; offset = textstr.FindCharInSet("\n\r", start); } // Consume the last bit of the string if there's any left if (NS_SUCCEEDED(rv) & (start < length)) { if (start) { rv = DoAddLeaf(eHTMLTag_text, Substring(textstr, start, length-start)); } else { rv = DoAddLeaf(eHTMLTag_text, textstr); } } mOutputString = nsnull; return rv; } NS_IMETHODIMP nsPlainTextSerializer::AppendElementStart(nsIDOMElement *aElement, PRBool aHasChildren, nsAString& aStr) { NS_ENSURE_ARG(aElement); mContent = do_QueryInterface(aElement); if (!mContent) return NS_ERROR_FAILURE; nsresult rv; PRInt32 id; rv = GetIdForContent(mContent, &id); if (NS_FAILED(rv)) return rv; PRBool isContainer = IsContainer(id); mOutputString = &aStr; if (isContainer) { rv = DoOpenContainer(id); } else { nsAutoString empty; rv = DoAddLeaf(id, empty); } mContent = 0; mOutputString = nsnull; if (!mInHead && id == eHTMLTag_head) mInHead = PR_TRUE; return rv; } NS_IMETHODIMP nsPlainTextSerializer::AppendElementEnd(nsIDOMElement *aElement, nsAString& aStr) { NS_ENSURE_ARG(aElement); mContent = do_QueryInterface(aElement); if (!mContent) return NS_ERROR_FAILURE; nsresult rv; PRInt32 id; rv = GetIdForContent(mContent, &id); if (NS_FAILED(rv)) return rv; PRBool isContainer = IsContainer(id); mOutputString = &aStr; rv = NS_OK; if (isContainer) { rv = DoCloseContainer(id); } mContent = 0; mOutputString = nsnull; if (mInHead && id == eHTMLTag_head) mInHead = PR_FALSE; return rv; } NS_IMETHODIMP nsPlainTextSerializer::Flush(nsAString& aStr) { mOutputString = &aStr; FlushLine(); mOutputString = nsnull; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::AppendDocumentStart(nsIDOMDocument *aDocument, nsAString& aStr) { return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::OpenContainer(const nsIParserNode& aNode) { PRInt32 type = aNode.GetNodeType(); mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode); return DoOpenContainer(type); } NS_IMETHODIMP nsPlainTextSerializer::CloseContainer(const nsIParserNode& aNode) { PRInt32 type = aNode.GetNodeType(); const nsAString& namestr = aNode.GetText(); nsCOMPtr name = dont_AddRef(NS_NewAtom(namestr)); mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode); return DoCloseContainer(type); } NS_IMETHODIMP nsPlainTextSerializer::AddLeaf(const nsIParserNode& aNode) { if (mIgnoreAboveIndex != (PRUint32)kNotFound) { return NS_OK; } eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); const nsAString& text = aNode.GetText(); mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode); if ((type == eHTMLTag_text) || (type == eHTMLTag_whitespace) || (type == eHTMLTag_newline)) { // Copy the text out, stripping out CRs nsAutoString str; PRUint32 length; str.SetCapacity(text.Length()); nsReadingIterator srcStart, srcEnd; length = nsContentUtils::CopyNewlineNormalizedUnicodeTo(text.BeginReading(srcStart), text.EndReading(srcEnd), str); str.SetLength(length); return DoAddLeaf(type, str); } else { return DoAddLeaf(type, text); } } NS_IMETHODIMP nsPlainTextSerializer::OpenHTML(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseHTML(const nsIParserNode& aNode) { return CloseContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::OpenHead(const nsIParserNode& aNode) { mInHead = PR_TRUE; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::CloseHead(const nsIParserNode& aNode) { mInHead = PR_FALSE; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::OpenBody(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseBody(const nsIParserNode& aNode) { return CloseContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::OpenForm(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseForm(const nsIParserNode& aNode) { return CloseContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::OpenMap(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseMap(const nsIParserNode& aNode) { return CloseContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::OpenFrameset(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseFrameset(const nsIParserNode& aNode) { return CloseContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::GetPref(PRInt32 aTag, PRBool& aPref) { nsHTMLTag theHTMLTag = nsHTMLTag(aTag); if (theHTMLTag == eHTMLTag_script) { aPref = mFlags & nsIDocumentEncoder::OutputNoScriptContent; } else if (theHTMLTag == eHTMLTag_frameset) { aPref = !(mFlags & nsIDocumentEncoder::OutputNoFramesContent); } else { aPref = PR_FALSE; } return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::DoFragment(PRBool aFlag) { return NS_OK; } nsresult nsPlainTextSerializer::DoOpenContainer(PRInt32 aTag) { eHTMLTags type = (eHTMLTags)aTag; if (mTagStackIndex < TagStackSize) { mTagStack[mTagStackIndex++] = type; } if (mIgnoreAboveIndex != (PRUint32)kNotFound) { return NS_OK; } if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines); // Check if this tag's content that should not be output if ((type == eHTMLTag_noscript && !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || ((type == eHTMLTag_iframe || type == eHTMLTag_noframes) && !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { mIgnoreAboveIndex = mTagStackIndex; return NS_OK; } if (type == eHTMLTag_body) { // Try to figure out here whether we have a // preformatted style attribute. // // Trigger on the presence of a "-moz-pre-wrap" in the // style attribute. That's a very simplistic way to do // it, but better than nothing. // Also set mWrapColumn to the value given there // (which arguably we should only do if told to do so). nsAutoString style; PRInt32 whitespace; if(NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::style, style)) && (kNotFound != (whitespace = style.Find("white-space:")))) { if (kNotFound != style.Find("-moz-pre-wrap", PR_TRUE, whitespace)) { #ifdef DEBUG_preformatted printf("Set mPreFormatted based on style moz-pre-wrap\n"); #endif mPreFormatted = PR_TRUE; PRInt32 widthOffset = style.Find("width:"); if (widthOffset >= 0) { // We have to search for the ch before the semicolon, // not for the semicolon itself, because nsString::ToInteger() // considers 'c' to be a valid numeric char (even if radix=10) // but then gets confused if it sees it next to the number // when the radix specified was 10, and returns an error code. PRInt32 semiOffset = style.Find("ch", widthOffset+6); PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6 : style.Length() - widthOffset); nsAutoString widthstr; style.Mid(widthstr, widthOffset+6, length); PRInt32 err; PRInt32 col = widthstr.ToInteger(&err); if (NS_SUCCEEDED(err)) { mWrapColumn = (PRUint32)col; #ifdef DEBUG_preformatted printf("Set wrap column to %d based on style\n", mWrapColumn); #endif } } } else if (kNotFound != style.Find("pre", PR_TRUE, whitespace)) { #ifdef DEBUG_preformatted printf("Set mPreFormatted based on style pre\n"); #endif mPreFormatted = PR_TRUE; mWrapColumn = 0; } } else { mPreFormatted = PR_FALSE; } return NS_OK; } if (!DoOutput()) { return NS_OK; } if (type == eHTMLTag_p || type == eHTMLTag_pre) { EnsureVerticalSpace(1); // Should this be 0 in unformatted case? } else if (type == eHTMLTag_td || type == eHTMLTag_th) { // We must make sure that the content of two table cells get a // space between them. // Fow now, I will only add a SPACE. Could be a TAB or something // else but I'm not sure everything can handle the TAB so SPACE // seems like a better solution. if(!mInWhitespace) { // Maybe add something else? Several spaces? A TAB? SPACE+TAB? AddToLine(kSpace.get(), 1); mInWhitespace = PR_TRUE; } } else if (type == eHTMLTag_ul) { // Indent here to support nested lists, which aren't included in li :-( EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); // Must end the current line before we change indention mIndent += kIndentSizeList; mULCount++; } else if (type == eHTMLTag_ol) { EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); // Must end the current line before we change indention if (mOLStackIndex < OLStackSize) { nsAutoString startAttr; PRInt32 startVal = 1; if(NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::start, startAttr))){ PRInt32 rv = 0; startVal = startAttr.ToInteger(&rv); if (NS_FAILED(rv)) startVal = 1; } mOLStack[mOLStackIndex++] = startVal; } mIndent += kIndentSizeList; // see ul } else if (type == eHTMLTag_li) { if (mTagStackIndex > 1 && IsInOL()) { if (mOLStackIndex > 0) { nsAutoString valueAttr; if(NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::value, valueAttr))){ PRInt32 rv = 0; PRInt32 valueAttrVal = valueAttr.ToInteger(&rv); if (NS_SUCCEEDED(rv)) mOLStack[mOLStackIndex-1] = valueAttrVal; } // This is what nsBulletFrame does for OLs: mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); } else { mInIndentString.Append(PRUnichar('#')); } mInIndentString.Append(PRUnichar('.')); } else { static char bulletCharArray[] = "*o+#"; NS_ASSERTION(mULCount > 0, "mULCount should be greater than 0 here"); char bulletChar = bulletCharArray[(mULCount - 1) % 4]; mInIndentString.Append(PRUnichar(bulletChar)); } mInIndentString.Append(PRUnichar(' ')); } else if (type == eHTMLTag_dl) { EnsureVerticalSpace(1); } else if (type == eHTMLTag_dd) { mIndent += kIndentSizeDD; } else if (type == eHTMLTag_span) { ++mSpanLevel; } else if (type == eHTMLTag_blockquote) { EnsureVerticalSpace(1); nsAutoString value; nsresult rv = GetAttributeValue(nsHTMLAtoms::type, value); if (NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite")) { mCiteQuoteLevel++; } else { mIndent += kTabSize; // Check for some maximum value? } } // Else make sure we'll separate block level tags, // even if we're about to leave, before doing any other formatting. else if (IsBlockLevel(aTag)) { EnsureVerticalSpace(0); } ////////////////////////////////////////////////////////////// if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { return NS_OK; } ////////////////////////////////////////////////////////////// // The rest of this routine is formatted output stuff, // which we should skip if we're not formatted: ////////////////////////////////////////////////////////////// if (type == eHTMLTag_h1 || type == eHTMLTag_h2 || type == eHTMLTag_h3 || type == eHTMLTag_h4 || type == eHTMLTag_h5 || type == eHTMLTag_h6) { EnsureVerticalSpace(2); if (mHeaderStrategy == 2) { // numbered mIndent += kIndentSizeHeaders; // Caching nsCAutoString leadup; PRInt32 level = HeaderLevel(type); // Increase counter for current level mHeaderCounter[level]++; // Reset all lower levels PRInt32 i; for (i = level + 1; i <= 6; i++) { mHeaderCounter[i] = 0; } // Construct numbers for (i = 1; i <= level; i++) { leadup.AppendInt(mHeaderCounter[i]); leadup += "."; } leadup += " "; Write(NS_ConvertASCIItoUCS2(leadup.get())); } else if (mHeaderStrategy == 1) { // indent increasingly mIndent += kIndentSizeHeaders; for (PRInt32 i = HeaderLevel(type); i > 1; i--) { // for h(x), run x-1 times mIndent += kIndentIncrementHeaders; } } } else if (type == eHTMLTag_a && !IsCurrentNodeConverted()) { nsAutoString url; if (NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::href, url)) && !url.IsEmpty()) { mURL = url; } } else if (type == eHTMLTag_q) { Write(NS_LITERAL_STRING("\"")); } else if (type == eHTMLTag_sup && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("^")); } else if (type == eHTMLTag_sub && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("_")); } else if (type == eHTMLTag_code && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("|")); } else if ((type == eHTMLTag_strong || type == eHTMLTag_b) && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("*")); } else if ((type == eHTMLTag_em || type == eHTMLTag_i) && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("/")); } else if (type == eHTMLTag_u && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("_")); } return NS_OK; } nsresult nsPlainTextSerializer::DoCloseContainer(PRInt32 aTag) { eHTMLTags type = (eHTMLTags)aTag; if (mTagStackIndex > 0) { --mTagStackIndex; } if (mTagStackIndex >= mIgnoreAboveIndex) { return NS_OK; } mIgnoreAboveIndex = (PRUint32)kNotFound; // End current line if we're ending a block level tag if((type == eHTMLTag_body) || (type == eHTMLTag_html)) { // We want the output to end with a new line, // but in preformatted areas like text fields, // we can't emit newlines that weren't there. // So add the newline only in the case of formatted output. if (mFlags & nsIDocumentEncoder::OutputFormatted) { EnsureVerticalSpace(0); } else { FlushLine(); } // We won't want to do anything with these in formatted mode either, // so just return now: return NS_OK; } else if ((type == eHTMLTag_tr) || (type == eHTMLTag_li) || (type == eHTMLTag_dt)) { // Items that should always end a line, but get no more whitespace if (mFloatingLines < 0) mFloatingLines = 0; mLineBreakDue = PR_TRUE; } else if (type == eHTMLTag_pre) { mFloatingLines = 1; mLineBreakDue = PR_TRUE; } else if (type == eHTMLTag_ul) { mIndent -= kIndentSizeList; if (--mULCount + mOLStackIndex == 0) { mFloatingLines = 1; mLineBreakDue = PR_TRUE; } } else if (type == eHTMLTag_ol) { FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. mIndent -= kIndentSizeList; mOLStackIndex--; if (mULCount + mOLStackIndex == 0) { mFloatingLines = 1; mLineBreakDue = PR_TRUE; } } else if (type == eHTMLTag_dd) { mIndent -= kIndentSizeDD; } else if (type == eHTMLTag_span) { --mSpanLevel; } else if (type == eHTMLTag_div) { if (mFloatingLines < 0) mFloatingLines = 0; mLineBreakDue = PR_TRUE; } else if (type == eHTMLTag_blockquote) { FlushLine(); // Is this needed? nsAutoString value; nsresult rv = GetAttributeValue(nsHTMLAtoms::type, value); if (NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite")) { mCiteQuoteLevel--; } else { mIndent -= kTabSize; } mFloatingLines = 1; mLineBreakDue = PR_TRUE; } else if (IsBlockLevel(aTag) && type != eHTMLTag_script && type != eHTMLTag_doctypeDecl && type != eHTMLTag_markupDecl) { // All other blocks get 1 vertical space after them // in formatted mode, otherwise 0. // This is hard. Sometimes 0 is a better number, but // how to know? if (mFlags & nsIDocumentEncoder::OutputFormatted) EnsureVerticalSpace(1); else { if (mFloatingLines < 0) mFloatingLines = 0; mLineBreakDue = PR_TRUE; } } ////////////////////////////////////////////////////////////// if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { return NS_OK; } ////////////////////////////////////////////////////////////// // The rest of this routine is formatted output stuff, // which we should skip if we're not formatted: ////////////////////////////////////////////////////////////// if (type == eHTMLTag_h1 || type == eHTMLTag_h2 || type == eHTMLTag_h3 || type == eHTMLTag_h4 || type == eHTMLTag_h5 || type == eHTMLTag_h6) { if (mHeaderStrategy) { /*numbered or indent increasingly*/ mIndent -= kIndentSizeHeaders; } if (mHeaderStrategy == 1 /*indent increasingly*/ ) { for (PRInt32 i = HeaderLevel(type); i > 1; i--) { // for h(x), run x-1 times mIndent -= kIndentIncrementHeaders; } } EnsureVerticalSpace(1); } else if (type == eHTMLTag_a && !IsCurrentNodeConverted() && !mURL.IsEmpty()) { nsAutoString temp; temp.Assign(NS_LITERAL_STRING(" <")); temp += mURL; temp.Append(PRUnichar('>')); Write(temp); mURL.Truncate(); } else if (type == eHTMLTag_q) { Write(NS_LITERAL_STRING("\"")); } else if ((type == eHTMLTag_sup || type == eHTMLTag_sub) && mStructs && !IsCurrentNodeConverted()) { Write(kSpace); } else if (type == eHTMLTag_code && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("|")); } else if ((type == eHTMLTag_strong || type == eHTMLTag_b) && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("*")); } else if ((type == eHTMLTag_em || type == eHTMLTag_i) && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("/")); } else if (type == eHTMLTag_u && mStructs && !IsCurrentNodeConverted()) { Write(NS_LITERAL_STRING("_")); } return NS_OK; } nsresult nsPlainTextSerializer::DoAddLeaf(PRInt32 aTag, const nsAString& aText) { // If we don't want any output, just return if (!DoOutput()) { return NS_OK; } if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines); eHTMLTags type = (eHTMLTags)aTag; if ((mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_select) || (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_select)) { // Don't output the contents of SELECT elements; // Might be nice, eventually, to output just the selected element. // Read more in bug 31994. return NS_OK; } else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) { // Don't output the contents of