/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ #include "nsHTMLContentSerializer.h" #include "nsIDOMElement.h" #include "nsIContent.h" #include "nsIDocument.h" #include "nsINameSpaceManager.h" #include "nsString.h" #include "nsXPIDLString.h" #include "nsParserCIID.h" #include "nsIServiceManager.h" #include "nsIDocumentEncoder.h" #include "nsLayoutAtoms.h" #include "nsHTMLAtoms.h" #include "nsIURI.h" #include "nsNetUtil.h" #include "nsEscape.h" static NS_DEFINE_CID(kParserServiceCID, NS_PARSERSERVICE_CID); #define kIndentStr NS_LITERAL_STRING(" ") #define kMozStr "_moz" #define kLessThan NS_LITERAL_STRING("<") #define kGreaterThan NS_LITERAL_STRING(">") #define kEndTag NS_LITERAL_STRING("QueryInterface(NS_GET_IID(nsIContentSerializer), (void**)aSerializer); } nsHTMLContentSerializer::nsHTMLContentSerializer() { mColPos = 0; mIndent = 0; mInBody = PR_FALSE; mInCDATA = PR_FALSE; } nsHTMLContentSerializer::~nsHTMLContentSerializer() { } nsresult nsHTMLContentSerializer::GetParserService(nsIParserService** aParserService) { if (!mParserService) { nsresult rv; mParserService = do_GetService(kParserServiceCID, &rv); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; } CallQueryInterface(mParserService.get(), aParserService); return NS_OK; } NS_IMETHODIMP nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) { mFlags = aFlags; if (!aWrapColumn) { mMaxColumn = 72; } else { mMaxColumn = aWrapColumn; } mDoFormat = (mFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE : PR_FALSE; mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE : PR_FALSE; // Set the line break character: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows/mail mLineBreak.AssignWithConversion("\r\n"); } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac mLineBreak.AssignWithConversion("\r"); } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM mLineBreak.AssignWithConversion("\n"); } else { mLineBreak.AssignWithConversion(NS_LINEBREAK); // Platform/default } mPreLevel = 0; return NS_OK; } NS_IMETHODIMP nsHTMLContentSerializer::AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr) { NS_ENSURE_ARG(aText); nsAutoString data; nsresult rv; rv = AppendTextData((nsIDOMNode*)aText, aStartOffset, aEndOffset, data, PR_TRUE, PR_FALSE); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; PRInt32 lastNewlineOffset = kNotFound; PRBool hasLongLines = HasLongLines(data, lastNewlineOffset); if (mPreLevel || (!mDoFormat && !hasLongLines) || (mFlags & nsIDocumentEncoder::OutputRaw)) { AppendToString(data, aStr); if (lastNewlineOffset != kNotFound) { mColPos = data.Length() - lastNewlineOffset; } } else { AppendToStringWrapped(data, aStr, PR_FALSE); } return NS_OK; } void nsHTMLContentSerializer::SerializeAttributes(nsIContent* aContent, nsIAtom* aTagName, nsAWritableString& aStr) { nsresult rv; PRInt32 index, count; nsAutoString nameStr, valueStr; PRInt32 namespaceID; nsCOMPtr attrName, attrPrefix; aContent->GetAttributeCount(count); for (index = 0; index < count; index++) { aContent->GetAttributeNameAt(index, namespaceID, *getter_AddRefs(attrName), *getter_AddRefs(attrPrefix)); // Filter out any attribute starting with _moz nsXPIDLString sharedName; attrName->GetUnicode(getter_Shares(sharedName)); if (nsCRT::strncmp(sharedName, NS_ConvertASCIItoUCS2(kMozStr), sizeof(kMozStr)-1) == 0) { continue; } aContent->GetAttribute(namespaceID, attrName, valueStr); // // Filter out special case of
or
, // used by the editor. Bug 16988. Yuck. // if ((aTagName == nsHTMLAtoms::br) && (attrName.get() == nsHTMLAtoms::type) && (valueStr.EqualsWithConversion(kMozStr, PR_FALSE, sizeof(kMozStr)-1))) { continue; } // Make all links absolute when converting only the selection: if ((mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) && ((attrName.get() == nsHTMLAtoms::href) || (attrName.get() == nsHTMLAtoms::src))) { // Would be nice to handle OBJECT and APPLET tags, // but that gets more complicated since we have to // search the tag list for CODEBASE as well. // For now, just leave them relative. nsCOMPtr document; aContent->GetDocument(*getter_AddRefs(document)); if (document) { nsCOMPtr uri = dont_AddRef(document->GetDocumentURL()); if (uri) { nsAutoString absURI; rv = NS_MakeAbsoluteURI(absURI, valueStr, uri); if (NS_SUCCEEDED(rv)) { valueStr = absURI; } } } } attrName->ToString(nameStr); SerializeAttr(nsAutoString(), nameStr, valueStr, aStr); } } NS_IMETHODIMP nsHTMLContentSerializer::AppendElementStart(nsIDOMElement *aElement, nsAWritableString& aStr) { NS_ENSURE_ARG(aElement); nsCOMPtr content = do_QueryInterface(aElement); if (!content) return NS_ERROR_FAILURE; // The _moz_dirty attribute is emitted by the editor to // indicate that this element should be pretty printed // even if we're not in pretty printing mode PRBool hasDirtyAttr = HasDirtyAttr(content); nsCOMPtr name; content->GetTag(*getter_AddRefs(name)); if (name.get() == nsHTMLAtoms::body) { mInBody = PR_TRUE; } if (LineBreakBeforeOpen(name, hasDirtyAttr)) { AppendToString(mLineBreak, aStr); mColPos = 0; } StartIndentation(name, hasDirtyAttr, aStr); if ((name.get() == nsHTMLAtoms::pre) || (name.get() == nsHTMLAtoms::script) || (name.get() == nsHTMLAtoms::style)) { mPreLevel++; } AppendToString(kLessThan, aStr); nsXPIDLString sharedName; name->GetUnicode(getter_Shares(sharedName)); AppendToString(sharedName, -1, aStr); SerializeAttributes(content, name, aStr); AppendToString(kGreaterThan, aStr); if (LineBreakAfterOpen(name, hasDirtyAttr)) { AppendToString(mLineBreak, aStr); mColPos = 0; } if ((name.get() == nsHTMLAtoms::script) || (name.get() == nsHTMLAtoms::style) || (name.get() == nsHTMLAtoms::noscript)) { mInCDATA = PR_TRUE; } return NS_OK; } NS_IMETHODIMP nsHTMLContentSerializer::AppendElementEnd(nsIDOMElement *aElement, nsAWritableString& aStr) { NS_ENSURE_ARG(aElement); nsCOMPtr content = do_QueryInterface(aElement); if (!content) return NS_ERROR_FAILURE; PRBool hasDirtyAttr = HasDirtyAttr(content); nsCOMPtr name; content->GetTag(*getter_AddRefs(name)); if ((name.get() == nsHTMLAtoms::pre) || (name.get() == nsHTMLAtoms::script) || (name.get() == nsHTMLAtoms::style)) { mPreLevel--; } nsXPIDLString sharedName; name->GetUnicode(getter_Shares(sharedName)); nsCOMPtr parserService; GetParserService(getter_AddRefs(parserService)); if (parserService && (name.get() != nsHTMLAtoms::style)) { nsAutoString nameStr(sharedName); PRBool isContainer; PRInt32 id; parserService->HTMLStringTagToId(nameStr, &id); parserService->IsContainer(id, isContainer); if (!isContainer) return NS_OK; } if (LineBreakBeforeClose(name, hasDirtyAttr)) { AppendToString(mLineBreak, aStr); mColPos = 0; } EndIndentation(name, hasDirtyAttr, aStr); AppendToString(kEndTag, aStr); AppendToString(sharedName, -1, aStr); AppendToString(kGreaterThan, aStr); if (LineBreakAfterClose(name, hasDirtyAttr)) { AppendToString(mLineBreak, aStr); mColPos = 0; } mInCDATA = PR_FALSE; return NS_OK; } void nsHTMLContentSerializer::AppendToString(const PRUnichar* aStr, PRInt32 aLength, nsAWritableString& aOutputStr) { if (mBodyOnly && !mInBody) { return; } PRInt32 length = (aLength == -1) ? nsCRT::strlen(aStr) : aLength; mColPos += length; aOutputStr.Append(aStr, length); } void nsHTMLContentSerializer::AppendToString(const PRUnichar aChar, nsAWritableString& aOutputStr) { if (mBodyOnly && !mInBody) { return; } mColPos += 1; aOutputStr.Append(aChar); } void nsHTMLContentSerializer::AppendToStringWrapped(const nsAReadableString& aStr, nsAWritableString& aOutputStr, PRBool aTranslateEntities) { PRInt32 length = aStr.Length(); nsAutoString line; PRBool done = PR_FALSE; PRInt32 indx = 0; PRInt32 strOffset = 0; PRInt32 lineLength, oldLineEnd; // Make sure we haven't gone too far already if (mColPos > mMaxColumn) { AppendToString(mLineBreak, aOutputStr); mColPos = 0; } // Find the end of the first old line oldLineEnd = aStr.FindChar(PRUnichar('\n'), 0); while ((!done) && (strOffset < length)) { // This is how much is needed to fill up the new line PRInt32 leftInLine = mMaxColumn - mColPos; // This is the last position in the current old line PRInt32 oldLineLimit; if (oldLineEnd == kNotFound) { oldLineLimit = length; } else { oldLineLimit = oldLineEnd; } PRBool addLineBreak = PR_FALSE; // if we can fill up the new line with less than what's // in the current old line... if ((strOffset + leftInLine) < oldLineLimit) { addLineBreak = PR_TRUE; // Look for the next word end to break indx = aStr.FindChar(PRUnichar(' '), strOffset + leftInLine); // If it's after the end of the current line, then break at // the current line if ((indx == kNotFound) || ((oldLineEnd != kNotFound) && (oldLineEnd < indx))) { indx = oldLineEnd; } } else { indx = oldLineEnd; } // if there was no place to break, then just add the entire string if (indx == kNotFound) { if (strOffset == 0) { AppendToString(aStr, aOutputStr, aTranslateEntities); } else { lineLength = length - strOffset; aStr.Right(line, lineLength); AppendToString(line, aOutputStr, aTranslateEntities); } done = PR_TRUE; } else { // Add the part of the current old line that's part of the // new line lineLength = indx - strOffset; aStr.Mid(line, strOffset, lineLength); AppendToString(line, aOutputStr, aTranslateEntities); // if we've reached the end of an old line, don't add the // old line break and find the end of the next old line. if (indx == oldLineEnd) { oldLineEnd = aStr.FindChar(PRUnichar('\n'), indx+1); AppendToString(NS_LITERAL_STRING(" "), aOutputStr); } if (addLineBreak) { AppendToString(mLineBreak, aOutputStr); mColPos = 0; } strOffset = indx+1; } } } static PRUint16 kGTVal = 62; static const char* kEntities[] = { "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "amp", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "lt", "", "gt" }; static const char* kAttrEntities[] = { "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "quot", "", "", "", "amp", "apos", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "lt", "", "gt" }; void nsHTMLContentSerializer::AppendToString(const nsAReadableString& aStr, nsAWritableString& aOutputStr, PRBool aTranslateEntities, PRBool aIncrColumn) { if (mBodyOnly && !mInBody) { return; } if (aIncrColumn) { mColPos += aStr.Length(); } if (aTranslateEntities && !mInCDATA) { if (mFlags & nsIDocumentEncoder::OutputEncodeEntities) { nsCOMPtr parserService; GetParserService(getter_AddRefs(parserService)); if (!parserService) { NS_ERROR("Can't get parser service"); return; } nsReadingIterator done_reading; aStr.EndReading(done_reading); // for each chunk of |aString|... PRUint32 advanceLength = 0; nsReadingIterator iter; const char **entityTable = mInAttribute ? kAttrEntities : kEntities; for (aStr.BeginReading(iter); iter != done_reading; iter.advance(PRInt32(advanceLength))) { PRUint32 fragmentLength = iter.size_forward(); const PRUnichar* c = iter.get(); const PRUnichar* fragmentStart = c; const PRUnichar* fragmentEnd = c + fragmentLength; const char* entityText = nsnull; nsCAutoString entityReplacement; advanceLength = 0; // for each character in this chunk, check if it // needs to be replaced for (; c < fragmentEnd; c++, advanceLength++) { PRUnichar val = *c; if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; } else if (val > 127) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (entityReplacement.Length() > 0) { entityText = entityReplacement.GetBuffer(); break; } } } aOutputStr.Append(fragmentStart, advanceLength); if (entityText) { aOutputStr.Append(PRUnichar('&')); aOutputStr.Append(NS_ConvertASCIItoUCS2(entityText)); aOutputStr.Append(PRUnichar(';')); advanceLength++; } } } else { nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn); } return; } aOutputStr.Append(aStr); } PRBool nsHTMLContentSerializer::HasDirtyAttr(nsIContent* aContent) { nsAutoString val; if (NS_CONTENT_ATTR_NOT_THERE != aContent->GetAttribute(kNameSpaceID_None, nsLayoutAtoms::mozdirty, val)) { return PR_TRUE; } else { return PR_FALSE; } } PRBool nsHTMLContentSerializer::LineBreakBeforeOpen(nsIAtom* aName, PRBool aHasDirtyAttr) { if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos || (mFlags & nsIDocumentEncoder::OutputRaw)) { return PR_FALSE; } if (aName == nsHTMLAtoms::title || aName == nsHTMLAtoms::meta || aName == nsHTMLAtoms::link || aName == nsHTMLAtoms::style || aName == nsHTMLAtoms::script || aName == nsHTMLAtoms::html) { return PR_TRUE; } else { nsCOMPtr parserService; GetParserService(getter_AddRefs(parserService)); if (parserService) { nsAutoString str; aName->ToString(str); PRBool res; PRInt32 id; parserService->HTMLStringTagToId(str, &id); parserService->IsBlock(id, res); return res; } } return PR_FALSE; } PRBool nsHTMLContentSerializer::LineBreakAfterOpen(nsIAtom* aName, PRBool aHasDirtyAttr) { if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || (mFlags & nsIDocumentEncoder::OutputRaw)) { return PR_FALSE; } if ((aName == nsHTMLAtoms::html) || (aName == nsHTMLAtoms::head) || (aName == nsHTMLAtoms::body) || (aName == nsHTMLAtoms::ul) || (aName == nsHTMLAtoms::ol) || (aName == nsHTMLAtoms::dl) || (aName == nsHTMLAtoms::table) || (aName == nsHTMLAtoms::tbody) || (aName == nsHTMLAtoms::tr) || (aName == nsHTMLAtoms::br) || (aName == nsHTMLAtoms::meta) || (aName == nsHTMLAtoms::link) || (aName == nsHTMLAtoms::script) || (aName == nsHTMLAtoms::style)) { return PR_TRUE; } return PR_FALSE; } PRBool nsHTMLContentSerializer::LineBreakBeforeClose(nsIAtom* aName, PRBool aHasDirtyAttr) { if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos || (mFlags & nsIDocumentEncoder::OutputRaw)) { return PR_FALSE; } if ((aName == nsHTMLAtoms::html) || (aName == nsHTMLAtoms::head) || (aName == nsHTMLAtoms::body) || (aName == nsHTMLAtoms::ul) || (aName == nsHTMLAtoms::ol) || (aName == nsHTMLAtoms::dl) || (aName == nsHTMLAtoms::table) || (aName == nsHTMLAtoms::tbody)) { return PR_TRUE; } return PR_FALSE; } PRBool nsHTMLContentSerializer::LineBreakAfterClose(nsIAtom* aName, PRBool aHasDirtyAttr) { if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || (mFlags & nsIDocumentEncoder::OutputRaw)) { return PR_FALSE; } if ((aName == nsHTMLAtoms::html) || (aName == nsHTMLAtoms::head) || (aName == nsHTMLAtoms::body) || (aName == nsHTMLAtoms::tr) || (aName == nsHTMLAtoms::th) || (aName == nsHTMLAtoms::td) || (aName == nsHTMLAtoms::pre) || (aName == nsHTMLAtoms::title) || (aName == nsHTMLAtoms::li) || (aName == nsHTMLAtoms::dt) || (aName == nsHTMLAtoms::dd) || (aName == nsHTMLAtoms::blockquote) || (aName == nsHTMLAtoms::p) || (aName == nsHTMLAtoms::div)) { return PR_TRUE; } else { nsCOMPtr parserService; GetParserService(getter_AddRefs(parserService)); if (parserService) { nsAutoString str; aName->ToString(str); PRBool res; PRInt32 id; parserService->HTMLStringTagToId(str, &id); parserService->IsBlock(id, res); return res; } } return PR_FALSE; } void nsHTMLContentSerializer::StartIndentation(nsIAtom* aName, PRBool aHasDirtyAttr, nsAWritableString& aStr) { if ((mDoFormat || aHasDirtyAttr) && !mPreLevel && !mColPos) { for (PRInt32 i = mIndent; --i >= 0; ) { AppendToString(kIndentStr, -1, aStr); } } if ((aName == nsHTMLAtoms::head) || (aName == nsHTMLAtoms::table) || (aName == nsHTMLAtoms::tr) || (aName == nsHTMLAtoms::ul) || (aName == nsHTMLAtoms::ol) || (aName == nsHTMLAtoms::tbody) || (aName == nsHTMLAtoms::form) || (aName == nsHTMLAtoms::frameset) || (aName == nsHTMLAtoms::blockquote) || (aName == nsHTMLAtoms::li) || (aName == nsHTMLAtoms::dt) || (aName == nsHTMLAtoms::dd)) { mIndent++; } } void nsHTMLContentSerializer::EndIndentation(nsIAtom* aName, PRBool aHasDirtyAttr, nsAWritableString& aStr) { if ((aName == nsHTMLAtoms::head) || (aName == nsHTMLAtoms::table) || (aName == nsHTMLAtoms::tr) || (aName == nsHTMLAtoms::ul) || (aName == nsHTMLAtoms::ol) || (aName == nsHTMLAtoms::li) || (aName == nsHTMLAtoms::tbody) || (aName == nsHTMLAtoms::form) || (aName == nsHTMLAtoms::frameset)) { mIndent--; } if ((mDoFormat || aHasDirtyAttr) && !mPreLevel && !mColPos) { for (PRInt32 i = mIndent; --i >= 0; ) { AppendToString(kIndentStr, -1, aStr); } } } // See if the string has any lines longer than longLineLen: // if so, we presume formatting is wonky (e.g. the node has been edited) // and we'd better rewrap the whole text node. PRBool nsHTMLContentSerializer::HasLongLines(const nsString& text, PRInt32& aLastNewlineOffset) { PRUint32 start=0; PRUint32 theLen=text.Length(); PRBool rv = PR_FALSE; aLastNewlineOffset = kNotFound; for (start = 0; start < theLen; ) { PRInt32 eol = text.FindChar('\n', PR_FALSE, start); if (eol < 0) { eol = text.Length(); } else { aLastNewlineOffset = eol; } if (PRInt32(eol - start) > kLongLineLen) rv = PR_TRUE; start = eol+1; } return rv; }