1338 lines
33 KiB
C++
1338 lines
33 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is Mozilla Communicator client code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape Communications
|
|
* Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Pierre Phaneuf <pp@ludusdesign.com>
|
|
*/
|
|
|
|
/**
|
|
* MODULE NOTES:
|
|
*
|
|
* This file declares the concrete HTMLContentSink class.
|
|
* This class is used during the parsing process as the
|
|
* primary interface between the parser and the content
|
|
* model.
|
|
*/
|
|
|
|
#include "nsHTMLContentSinkStream.h"
|
|
#include "nsIParserNode.h"
|
|
#include <ctype.h>
|
|
#include "nsString.h"
|
|
#include "nsIParser.h"
|
|
#include "nsICharsetAlias.h"
|
|
#include "nsIServiceManager.h"
|
|
#include "nsIEntityConverter.h"
|
|
#include "nsCRT.h"
|
|
#include "nsIDocumentEncoder.h" // for output flags
|
|
#include "nshtmlpars.h"
|
|
|
|
#include "nsIOutputStream.h"
|
|
#include "nsFileStream.h"
|
|
|
|
#include "nsNetUtil.h" // for NS_MakeAbsoluteURI
|
|
|
|
static NS_DEFINE_CID(kSaveAsCharsetCID, NS_SAVEASCHARSET_CID);
|
|
static NS_DEFINE_CID(kEntityConverterCID, NS_ENTITYCONVERTER_CID);
|
|
|
|
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
|
|
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
|
|
const int gTabSize=2;
|
|
|
|
static const nsString gMozDirty = NS_ConvertToString("_moz_dirty");
|
|
|
|
static PRBool IsInline(eHTMLTags aTag);
|
|
static PRBool IsBlockLevel(eHTMLTags aTag);
|
|
static PRInt32 BreakBeforeOpen(eHTMLTags aTag);
|
|
static PRInt32 BreakAfterOpen(eHTMLTags aTag);
|
|
static PRInt32 BreakBeforeClose(eHTMLTags aTag);
|
|
static PRInt32 BreakAfterClose(eHTMLTags aTag);
|
|
static PRBool IndentChildren(eHTMLTags aTag);
|
|
|
|
|
|
/**
|
|
* This method gets called as part of our COM-like interfaces.
|
|
* Its purpose is to create an interface to parser object
|
|
* of some type.
|
|
*
|
|
* @update gess 4/8/98
|
|
* @param nsIID id of object to discover
|
|
* @param aInstancePtr ptr to newly discovered interface
|
|
* @return NS_xxx result code
|
|
*/
|
|
nsresult
|
|
nsHTMLContentSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr)
|
|
{
|
|
if (NULL == aInstancePtr) {
|
|
return NS_ERROR_NULL_POINTER;
|
|
}
|
|
if (aIID.Equals(NS_GET_IID(nsISupports))) {
|
|
*aInstancePtr = (nsIContentSink*)(this);
|
|
}
|
|
else if (aIID.Equals(NS_GET_IID(nsIContentSink))) {
|
|
*aInstancePtr = (nsIContentSink*)(this);
|
|
}
|
|
else if (aIID.Equals(NS_GET_IID(nsIHTMLContentSink))) {
|
|
*aInstancePtr = (nsIHTMLContentSink*)(this);
|
|
}
|
|
else if (aIID.Equals(NS_GET_IID(nsIHTMLContentSinkStream))) {
|
|
*aInstancePtr = (nsIHTMLContentSinkStream*)(this);
|
|
}
|
|
else {
|
|
*aInstancePtr=0;
|
|
return NS_NOINTERFACE;
|
|
}
|
|
NS_ADDREF_THIS();
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMPL_ADDREF(nsHTMLContentSinkStream)
|
|
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
|
|
|
|
/**
|
|
* Construct a content sink stream.
|
|
* @update gess7/7/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsHTMLContentSinkStream::nsHTMLContentSinkStream()
|
|
{
|
|
NS_INIT_REFCNT();
|
|
mLowerCaseTags = PR_TRUE;
|
|
memset(mHTMLTagStack,0,sizeof(mHTMLTagStack));
|
|
memset(mDirtyStack,0,sizeof(mDirtyStack));
|
|
mHTMLStackPos = 0;
|
|
mColPos = 0;
|
|
mIndent = 0;
|
|
mInBody = PR_FALSE;
|
|
mBuffer = nsnull;
|
|
mBufferSize = 0;
|
|
mBufferLength = 0;
|
|
mFlags = 0;
|
|
mHasOpenHtmlTag=PR_FALSE;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::Initialize(nsIOutputStream* aOutStream,
|
|
nsString* aOutString,
|
|
const nsString* aCharsetOverride,
|
|
PRUint32 aFlags)
|
|
{
|
|
mDoFormat = (aFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE
|
|
: PR_FALSE;
|
|
|
|
mBodyOnly = (aFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
|
|
: PR_FALSE;
|
|
mDoHeader = (!mBodyOnly) && (mDoFormat) &&
|
|
((aFlags & nsIDocumentEncoder::OutputNoDoctype) ? PR_FALSE
|
|
: PR_TRUE);
|
|
mMaxColumn = 72;
|
|
mFlags = aFlags;
|
|
|
|
mStream = aOutStream;
|
|
mString = aOutString;
|
|
if (aCharsetOverride != nsnull)
|
|
mCharsetOverride.AssignWithConversion(aCharsetOverride->GetUnicode());
|
|
|
|
mPreLevel = 0;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsHTMLContentSinkStream::~nsHTMLContentSinkStream()
|
|
{
|
|
if (mBuffer)
|
|
nsMemory::Free(mBuffer);
|
|
}
|
|
|
|
/**
|
|
* This method tells the sink whether or not it is
|
|
* encoding an HTML fragment or the whole document.
|
|
* By default, the entire document is encoded.
|
|
*
|
|
* @update 03/14/99 gpk
|
|
* @param aFlag set to true if only encoding a fragment
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::DoFragment(PRBool aFlag)
|
|
{
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This gets called when handling illegal contents, especially
|
|
* in dealing with tables. This method creates a new context.
|
|
*
|
|
* @update 04/04/99 harishd
|
|
* @param aPosition - The position from where the new context begins.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::BeginContext(PRInt32 aPosition)
|
|
{
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This method terminates any new context that got created by
|
|
* BeginContext and switches back to the main context.
|
|
*
|
|
* @update 04/04/99 harishd
|
|
* @param aPosition - Validates the end of a context.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::EndContext(PRInt32 aPosition)
|
|
{
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* Initialize the Unicode encoder with our current mCharsetOverride.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::InitEncoders()
|
|
{
|
|
nsresult res;
|
|
|
|
// Initialize an entity encoder if we're using the string interface:
|
|
if (mString && (mFlags & nsIDocumentEncoder::OutputEncodeEntities))
|
|
res = nsComponentManager::CreateInstance(kEntityConverterCID, NULL,
|
|
NS_GET_IID(nsIEntityConverter),
|
|
getter_AddRefs(mEntityConverter));
|
|
|
|
// Initialize a charset encoder if we're using the stream interface
|
|
if (mStream)
|
|
{
|
|
nsAutoString charsetName; charsetName.AssignWithConversion(mCharsetOverride);
|
|
NS_WITH_SERVICE(nsICharsetAlias, calias, kCharsetAliasCID, &res);
|
|
if (NS_SUCCEEDED(res) && calias) {
|
|
nsAutoString temp; temp.AssignWithConversion(mCharsetOverride);
|
|
res = calias->GetPreferred(temp, charsetName);
|
|
}
|
|
if (NS_FAILED(res))
|
|
{
|
|
// failed - unknown alias , fallback to ISO-8859-1
|
|
charsetName.AssignWithConversion("ISO-8859-1");
|
|
}
|
|
|
|
res = nsComponentManager::CreateInstance(kSaveAsCharsetCID, NULL,
|
|
NS_GET_IID(nsISaveAsCharset),
|
|
getter_AddRefs(mCharsetEncoder));
|
|
if (NS_FAILED(res))
|
|
return res;
|
|
// SaveAsCharset requires a const char* in its first argument:
|
|
nsCAutoString charsetCString; charsetCString.AssignWithConversion(charsetName);
|
|
// For ISO-8859-1 only, convert to entity first (always generate entites like ).
|
|
res = mCharsetEncoder->Init(charsetCString,
|
|
charsetName.EqualsIgnoreCase("ISO-8859-1") ?
|
|
nsISaveAsCharset::attr_htmlTextDefault :
|
|
nsISaveAsCharset::attr_EntityAfterCharsetConv
|
|
+ nsISaveAsCharset::attr_FallbackDecimalNCR,
|
|
nsIEntityConverter::html40);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::EnsureBufferSize(PRInt32 aNewSize)
|
|
{
|
|
if (mBufferSize < aNewSize) {
|
|
if(mBuffer) delete [] mBuffer;
|
|
|
|
mBufferSize = 2*aNewSize+1; // make this twice as large
|
|
mBuffer = new char[mBufferSize];
|
|
if(mBuffer){
|
|
mBuffer[0] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Writes to the buffer/stream.
|
|
* If we do both string and stream output, stream chars will override string.
|
|
*
|
|
* @param aString - the string to write.
|
|
* @return The number of characters written.
|
|
*/
|
|
PRInt32 nsHTMLContentSinkStream::Write(const nsString& aString)
|
|
{
|
|
if (mBodyOnly && !mInBody)
|
|
return 0;
|
|
|
|
int charsWritten = 0;
|
|
|
|
// For the string case, we don't want to do charset conversion,
|
|
// but we still want to encode entities.
|
|
if (mString)
|
|
{
|
|
if (!mEntityConverter && (mFlags & nsIDocumentEncoder::OutputEncodeEntities))
|
|
InitEncoders();
|
|
if (mEntityConverter && (mFlags & nsIDocumentEncoder::OutputEncodeEntities))
|
|
{
|
|
nsresult res;
|
|
PRUnichar *encodedBuffer = nsnull;
|
|
res = mEntityConverter->ConvertToEntities(aString.GetUnicode(),
|
|
nsIEntityConverter::html40Latin1,
|
|
&encodedBuffer);
|
|
if (NS_SUCCEEDED(res) && encodedBuffer)
|
|
{
|
|
PRInt32 len = nsCRT::strlen(encodedBuffer);
|
|
mString->Append(encodedBuffer, len);
|
|
nsCRT::free(encodedBuffer);
|
|
charsWritten = len;
|
|
}
|
|
else {
|
|
charsWritten = aString.Length();
|
|
mString->Append(aString);
|
|
}
|
|
}
|
|
else {
|
|
charsWritten = aString.Length();
|
|
mString->Append(aString);
|
|
}
|
|
}
|
|
|
|
if (!mStream)
|
|
return charsWritten;
|
|
|
|
// Now handle the stream case:
|
|
nsOutputStream out(mStream);
|
|
|
|
// If an encoder is being used then convert first convert the input string
|
|
char *encodedBuffer = nsnull;
|
|
nsresult res;
|
|
|
|
// Initialize the encoder if we haven't already
|
|
if (!mCharsetEncoder)
|
|
InitEncoders();
|
|
|
|
if (mCharsetEncoder)
|
|
{
|
|
// Call the converter to convert to the target charset.
|
|
// Convert() takes a char* output param even though it's writing unicode.
|
|
res = mCharsetEncoder->Convert(aString.GetUnicode(), &encodedBuffer);
|
|
if (NS_SUCCEEDED(res) && encodedBuffer)
|
|
{
|
|
charsWritten = nsCRT::strlen(encodedBuffer);
|
|
out.write(encodedBuffer, charsWritten);
|
|
nsCRT::free(encodedBuffer);
|
|
}
|
|
|
|
// If it didn't work, just write the unicode
|
|
else
|
|
{
|
|
const PRUnichar* unicode = aString.GetUnicode();
|
|
charsWritten = aString.Length();
|
|
out.write(unicode, charsWritten);
|
|
}
|
|
}
|
|
|
|
// If we couldn't get an encoder, just write the unicode
|
|
else
|
|
{
|
|
const PRUnichar* unicode = aString.GetUnicode();
|
|
charsWritten = aString.Length();
|
|
out.write(unicode, charsWritten);
|
|
}
|
|
|
|
return charsWritten;
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::Write(const char* aData)
|
|
{
|
|
if (mBodyOnly && !mInBody)
|
|
return;
|
|
|
|
if (mStream)
|
|
{
|
|
nsOutputStream out(mStream);
|
|
out << aData;
|
|
}
|
|
if (mString)
|
|
{
|
|
mString->AppendWithConversion(aData);
|
|
}
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::Write(char aData)
|
|
{
|
|
if (mBodyOnly && !mInBody)
|
|
return;
|
|
|
|
if (mStream)
|
|
{
|
|
nsOutputStream out(mStream);
|
|
out << aData;
|
|
}
|
|
if (mString)
|
|
{
|
|
mString->AppendWithConversion(aData);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Write the attributes of the current tag.
|
|
*
|
|
* @param aNode The parser node currently in play.
|
|
*/
|
|
void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode)
|
|
{
|
|
int theCount=aNode.GetAttributeCount();
|
|
if(theCount) {
|
|
int i=0;
|
|
for(i=0;i<theCount;i++){
|
|
nsString& key = (nsString&)aNode.GetKeyAt(i);
|
|
|
|
// See if there's an attribute:
|
|
// note that we copy here, because we're going to have to trim quotes.
|
|
nsAutoString value (aNode.GetValueAt(i));
|
|
|
|
// strip double quotes from beginning and end
|
|
value.Trim("\"", PR_TRUE, PR_TRUE);
|
|
|
|
//
|
|
// Filter out special case of <br type="_moz"> or <br _moz*>,
|
|
// used by the editor. Bug 16988. Yuck.
|
|
//
|
|
if ((eHTMLTags)aNode.GetNodeType() == eHTMLTag_br
|
|
&& ((key.EqualsWithConversion("type", PR_TRUE) && value.EqualsWithConversion("_moz"))
|
|
|| key.EqualsWithConversion("_moz", PR_TRUE, 4)))
|
|
continue;
|
|
|
|
//
|
|
// Filter out special case of _moz_dirty
|
|
//
|
|
if (key.Equals(gMozDirty))
|
|
continue;
|
|
|
|
if (mLowerCaseTags == PR_TRUE)
|
|
key.ToLowerCase();
|
|
else
|
|
key.ToUpperCase();
|
|
|
|
EnsureBufferSize(key.Length() + 1);
|
|
key.ToCString(mBuffer,mBufferSize);
|
|
|
|
// send to ouput " [KEY]="
|
|
Write(' ');
|
|
Write(mBuffer);
|
|
mColPos += 1 + strlen(mBuffer) + 1;
|
|
|
|
// Make all links absolute when converting only the selection:
|
|
if ((mFlags & nsIDocumentEncoder::OutputAbsoluteLinks)
|
|
&& (key.EqualsWithConversion("href", PR_TRUE) || key.EqualsWithConversion("src", PR_TRUE)
|
|
// Would be nice to handle OBJECT and APPLET tags,
|
|
// but that gets more complicated since we have to
|
|
// search the tag list for CODEBASE as well.
|
|
// For now, just leave them relative.
|
|
))
|
|
{
|
|
if (mURI)
|
|
{
|
|
nsAutoString absURI;
|
|
if (NS_SUCCEEDED(NS_MakeAbsoluteURI(absURI, value, mURI))
|
|
&& !absURI.IsEmpty())
|
|
value = absURI;
|
|
}
|
|
}
|
|
|
|
if (value.Length() > 0)
|
|
{
|
|
Write(char(kEqual));
|
|
mColPos += 1 + strlen(mBuffer) + 1;
|
|
|
|
// send to ouput "\"[VALUE]\""
|
|
Write('\"');
|
|
Write(value);
|
|
Write('\"');
|
|
}
|
|
|
|
mColPos += 1 + strlen(mBuffer) + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This method gets called by the parser when it encounters
|
|
* a title tag and wants to set the document title in the sink.
|
|
*
|
|
* @update 04/30/99 gpk
|
|
* @param nsString reference to new title value
|
|
* @return PR_TRUE if successful.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::SetTitle(const nsString& aValue)
|
|
{
|
|
const char* tagName = GetTagName(eHTMLTag_title);
|
|
Write(kLessThan);
|
|
Write(tagName);
|
|
Write(kGreaterThan);
|
|
|
|
Write(aValue);
|
|
|
|
Write(kLessThan);
|
|
Write(kForwardSlash);
|
|
Write(tagName);
|
|
Write(kGreaterThan);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This method is used to open the outer HTML container.
|
|
*
|
|
* XXX OpenHTML never gets called; AddStartTag gets called on
|
|
* XXX the html tag from OpenContainer, from nsXIFDTD::StartTopOfStack,
|
|
* XXX from nsXIFDTD::HandleStartToken.
|
|
*
|
|
* @param nsIParserNode reference to parser node interface
|
|
* @return PR_TRUE if successful.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::OpenHTML(const nsIParserNode& aNode)
|
|
{
|
|
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
|
|
if (tag == eHTMLTag_html)
|
|
{
|
|
if(!mHasOpenHtmlTag) {
|
|
AddStartTag(aNode);
|
|
mHasOpenHtmlTag=PR_TRUE;
|
|
}
|
|
else {
|
|
PRInt32 ac=aNode.GetAttributeCount();
|
|
if(ac>0) {
|
|
Write(kLessThan);
|
|
nsAutoString tagname;
|
|
tagname.AssignWithConversion(nsHTMLTags::GetStringValue(tag));
|
|
Write(tagname);
|
|
WriteAttributes(aNode);
|
|
Write(kGreaterThan);
|
|
}
|
|
}
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* All these HTML-specific methods may be called, or may not,
|
|
* depending on whether the parser is parsing XIF or HTML.
|
|
* So we can't depend on them; instead, we have Open/CloseContainer
|
|
* do all the specialized work, and the html-specific Open/Close
|
|
* methods must call the more general methods.
|
|
*
|
|
* Since there are so many of them, make macros:
|
|
*/
|
|
|
|
#define USE_GENERAL_OPEN_METHOD(methodname, tagtype) \
|
|
NS_IMETHODIMP nsHTMLContentSinkStream::methodname(const nsIParserNode& aNode) \
|
|
{ \
|
|
if ((eHTMLTags)aNode.GetNodeType() == tagtype) \
|
|
AddStartTag(aNode); \
|
|
return NS_OK; \
|
|
}
|
|
|
|
#define USE_GENERAL_CLOSE_METHOD(methodname, tagtype) \
|
|
NS_IMETHODIMP nsHTMLContentSinkStream::methodname(const nsIParserNode& aNode) \
|
|
{ \
|
|
if ((eHTMLTags)aNode.GetNodeType() == tagtype) \
|
|
AddEndTag(aNode); \
|
|
return NS_OK; \
|
|
}
|
|
|
|
USE_GENERAL_CLOSE_METHOD(CloseHTML, eHTMLTag_html)
|
|
USE_GENERAL_OPEN_METHOD(OpenHead, eHTMLTag_head)
|
|
USE_GENERAL_CLOSE_METHOD(CloseHead, eHTMLTag_head)
|
|
USE_GENERAL_OPEN_METHOD(OpenBody, eHTMLTag_body)
|
|
USE_GENERAL_CLOSE_METHOD(CloseBody, eHTMLTag_body)
|
|
USE_GENERAL_OPEN_METHOD(OpenForm, eHTMLTag_form)
|
|
USE_GENERAL_CLOSE_METHOD(CloseForm, eHTMLTag_form)
|
|
USE_GENERAL_OPEN_METHOD(OpenMap, eHTMLTag_map)
|
|
USE_GENERAL_CLOSE_METHOD(CloseMap, eHTMLTag_map)
|
|
USE_GENERAL_OPEN_METHOD(OpenFrameset, eHTMLTag_frameset)
|
|
USE_GENERAL_CLOSE_METHOD(CloseFrameset, eHTMLTag_frameset)
|
|
|
|
/**
|
|
*
|
|
* Check whether a node has the attribute _moz_dirty.
|
|
* If it does, we'll prettyprint it, otherwise we adhere to the
|
|
* surrounding text/whitespace/newline nodes provide formatting.
|
|
*/
|
|
PRBool nsHTMLContentSinkStream::IsDirty(const nsIParserNode& aNode)
|
|
{
|
|
// Apparently there's no way to just ask for a particular attribute
|
|
// without looping over the list.
|
|
int theCount = aNode.GetAttributeCount();
|
|
if (theCount)
|
|
{
|
|
for(int i=0; i < theCount; i++)
|
|
{
|
|
nsString& key = (nsString&)aNode.GetKeyAt(i);
|
|
if (key.Equals(gMozDirty))
|
|
return PR_TRUE;
|
|
}
|
|
}
|
|
return PR_FALSE;
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::AddIndent()
|
|
{
|
|
nsAutoString padding; padding.AssignWithConversion(" ");
|
|
for (PRInt32 i = mIndent; --i >= 0; )
|
|
{
|
|
Write(padding);
|
|
mColPos += 2;
|
|
}
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode)
|
|
{
|
|
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
|
|
PRBool isDirty = IsDirty(aNode);
|
|
|
|
const nsString& name = aNode.GetText();
|
|
nsAutoString tagName;
|
|
|
|
if (tag == eHTMLTag_body)
|
|
mInBody = PR_TRUE;
|
|
|
|
mHTMLTagStack[mHTMLStackPos] = tag;
|
|
mDirtyStack[mHTMLStackPos++] = isDirty;
|
|
tagName = name;
|
|
|
|
if (tag == eHTMLTag_markupDecl)
|
|
{
|
|
if (!(mFlags & nsIDocumentEncoder::OutputSelectionOnly))
|
|
{
|
|
Write("<!"); // mdo => Markup Declaration Open.
|
|
}
|
|
return;
|
|
}
|
|
// Quoted plaintext mail/news lives in a pre tag.
|
|
// The editor has substituted <br> tags for all the newlines in the pre,
|
|
// in order to get clickable blank lines.
|
|
// We can't emit these <br> tags formatted, or we'll get
|
|
// double-spacing (one for the br, one for the line break);
|
|
// but we can't emit them unformatted, either,
|
|
// because then long quoted passages will make html source lines
|
|
// too long for news servers (and some mail servers) to handle.
|
|
// So we map all <br> tags inside <pre> to line breaks.
|
|
// If this turns out to be a problem, we could do this only if gMozDirty.
|
|
else if (tag == eHTMLTag_br && mPreLevel > 0)
|
|
{
|
|
Write(NS_LINEBREAK);
|
|
return;
|
|
}
|
|
|
|
if (mLowerCaseTags == PR_TRUE)
|
|
tagName.ToLowerCase();
|
|
else
|
|
tagName.ToUpperCase();
|
|
|
|
#ifdef DEBUG_prettyprint
|
|
if (isDirty)
|
|
printf("AddStartTag(%s): BBO=%d, BAO=%d, BBC=%d, BAC=%d\n",
|
|
name.ToNewCString(),
|
|
BreakBeforeOpen(tag),
|
|
BreakAfterOpen(tag),
|
|
BreakBeforeClose(tag),
|
|
BreakAfterClose(tag));
|
|
#endif
|
|
|
|
if ((mDoFormat || isDirty) && mPreLevel == 0 && mColPos != 0
|
|
&& BreakBeforeOpen(tag))
|
|
{
|
|
Write(NS_LINEBREAK);
|
|
mColPos = 0;
|
|
}
|
|
if ((mDoFormat || isDirty) && mPreLevel == 0 && mColPos == 0)
|
|
AddIndent();
|
|
|
|
EnsureBufferSize(tagName.Length() + 1);
|
|
tagName.ToCString(mBuffer,mBufferSize);
|
|
|
|
Write(kLessThan);
|
|
Write(mBuffer);
|
|
|
|
mColPos += 1 + tagName.Length();
|
|
|
|
if ((mDoFormat || isDirty) && mPreLevel == 0 && tag == eHTMLTag_style)
|
|
{
|
|
Write(kGreaterThan);
|
|
Write(NS_LINEBREAK);
|
|
const nsString& data = aNode.GetSkippedContent();
|
|
PRInt32 size = data.Length();
|
|
char* buffer = new char[size+1];
|
|
if(buffer){
|
|
data.ToCString(buffer,size+1);
|
|
Write(buffer);
|
|
delete[] buffer;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
WriteAttributes(aNode);
|
|
Write(kGreaterThan);
|
|
mColPos += 1;
|
|
}
|
|
|
|
if (tag == eHTMLTag_pre)
|
|
++mPreLevel;
|
|
|
|
if (((mDoFormat || isDirty) && mPreLevel == 0 && BreakAfterOpen(tag)))
|
|
{
|
|
Write(NS_LINEBREAK);
|
|
mColPos = 0;
|
|
}
|
|
|
|
if (IndentChildren(tag))
|
|
mIndent++;
|
|
|
|
if (tag == eHTMLTag_head)
|
|
{
|
|
if(mDoHeader)
|
|
{
|
|
Write(gHeaderComment);
|
|
Write(NS_LINEBREAK);
|
|
Write(gDocTypeHeader);
|
|
Write(NS_LINEBREAK);
|
|
}
|
|
}
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode)
|
|
{
|
|
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
|
|
nsAutoString tagName;
|
|
PRBool isDirty = mDirtyStack[mHTMLStackPos-1];
|
|
|
|
#ifdef DEBUG_prettyprint
|
|
if (isDirty)
|
|
printf("AddEndTag(%s): BBO=%d, BAO=%d, BBC=%d, BAC=%d\n",
|
|
aNode.GetText().ToNewCString(),
|
|
BreakBeforeOpen(tag),
|
|
BreakAfterOpen(tag),
|
|
BreakBeforeClose(tag),
|
|
BreakAfterClose(tag));
|
|
#endif
|
|
|
|
if (tag == eHTMLTag_unknown)
|
|
{
|
|
tagName.Assign(aNode.GetText());
|
|
}
|
|
else if (tag == eHTMLTag_pre)
|
|
{
|
|
--mPreLevel;
|
|
tagName.Assign(aNode.GetText());
|
|
}
|
|
else if (tag == eHTMLTag_comment)
|
|
{
|
|
tagName.AssignWithConversion("--");
|
|
}
|
|
else if (tag == eHTMLTag_markupDecl)
|
|
{
|
|
if (!(mFlags & nsIDocumentEncoder::OutputSelectionOnly))
|
|
{
|
|
Write(kGreaterThan);
|
|
Write(NS_LINEBREAK);
|
|
}
|
|
if ( mHTMLTagStack[mHTMLStackPos-1] == eHTMLTag_markupDecl)
|
|
{
|
|
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
|
|
}
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
tagName.AssignWithConversion(nsHTMLTags::GetStringValue(tag));
|
|
}
|
|
if (mLowerCaseTags == PR_TRUE)
|
|
tagName.ToLowerCase();
|
|
// else
|
|
// tagName.ToUpperCase();
|
|
|
|
if (IndentChildren(tag))
|
|
mIndent--;
|
|
|
|
if ((mDoFormat || isDirty) && mPreLevel == 0 && BreakBeforeClose(tag))
|
|
{
|
|
if (mColPos != 0)
|
|
{
|
|
Write(NS_LINEBREAK);
|
|
mColPos = 0;
|
|
}
|
|
}
|
|
if ((mDoFormat || isDirty) && mPreLevel == 0 && mColPos == 0)
|
|
AddIndent();
|
|
|
|
EnsureBufferSize(tagName.Length() + 1);
|
|
tagName.ToCString(mBuffer,mBufferSize);
|
|
|
|
if (tag != eHTMLTag_comment)
|
|
{
|
|
Write(kLessThan);
|
|
Write(kForwardSlash);
|
|
mColPos += 1 + 1;
|
|
}
|
|
|
|
Write(mBuffer);
|
|
Write(kGreaterThan);
|
|
|
|
mColPos += strlen(mBuffer) + 1;
|
|
|
|
if (tag == eHTMLTag_body)
|
|
mInBody = PR_FALSE;
|
|
|
|
if (((mDoFormat || isDirty) && mPreLevel == 0 && BreakAfterClose(tag))
|
|
|| tag == eHTMLTag_body || tag == eHTMLTag_html)
|
|
{
|
|
Write(NS_LINEBREAK);
|
|
mColPos = 0;
|
|
}
|
|
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
|
|
}
|
|
|
|
/**
|
|
* This gets called by the parser when you want to add
|
|
* a leaf node to the current container in the content
|
|
* model.
|
|
*/
|
|
nsresult
|
|
nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode)
|
|
{
|
|
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
|
eHTMLTags tag = eHTMLTag_unknown;
|
|
if (mHTMLStackPos > 0)
|
|
tag = mHTMLTagStack[mHTMLStackPos-1];
|
|
|
|
if (type == eHTMLTag_area ||
|
|
type == eHTMLTag_base ||
|
|
type == eHTMLTag_basefont ||
|
|
type == eHTMLTag_br ||
|
|
type == eHTMLTag_col ||
|
|
type == eHTMLTag_frame ||
|
|
type == eHTMLTag_hr ||
|
|
type == eHTMLTag_img ||
|
|
type == eHTMLTag_image ||
|
|
type == eHTMLTag_input ||
|
|
type == eHTMLTag_isindex ||
|
|
type == eHTMLTag_link ||
|
|
type == eHTMLTag_meta ||
|
|
type == eHTMLTag_param ||
|
|
type == eHTMLTag_sound)
|
|
{
|
|
AddStartTag(aNode);
|
|
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
|
|
}
|
|
else if (type == eHTMLTag_entity)
|
|
{
|
|
Write('&');
|
|
const nsString& entity = aNode.GetText();
|
|
mColPos += Write(entity) + 1;
|
|
// Don't write the semicolon;
|
|
// rely on the DTD to include it if one is wanted.
|
|
}
|
|
else if (type == eHTMLTag_text)
|
|
{
|
|
if ((mHTMLStackPos > 0)
|
|
&& (mHTMLTagStack[mHTMLStackPos-1] == eHTMLTag_markupDecl)
|
|
&& (mFlags & nsIDocumentEncoder::OutputSelectionOnly))
|
|
return NS_OK;
|
|
|
|
const nsString& text = aNode.GetText();
|
|
if (mPreLevel > 0)
|
|
{
|
|
Write(text);
|
|
mColPos += text.Length();
|
|
}
|
|
else if (!mDoFormat)
|
|
{
|
|
if (HasLongLines(text))
|
|
{
|
|
WriteWrapped(text);
|
|
}
|
|
else
|
|
{
|
|
Write(text);
|
|
mColPos += text.Length();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
WriteWrapped(text);
|
|
}
|
|
}
|
|
else if (type == eHTMLTag_whitespace)
|
|
{
|
|
if (!mDoFormat || mPreLevel > 0)
|
|
{
|
|
const nsString& text = aNode.GetText();
|
|
Write(text);
|
|
mColPos += text.Length();
|
|
}
|
|
}
|
|
else if (type == eHTMLTag_newline)
|
|
{
|
|
if (!mDoFormat || mPreLevel > 0)
|
|
{
|
|
Write(NS_LINEBREAK);
|
|
mColPos = 0;
|
|
}
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
// See if the string has any lines longer than longLineLen:
|
|
// if so, we presume formatting is wonky (e.g. the node has been edited)
|
|
// and we'd better rewrap the whole text node.
|
|
PRBool nsHTMLContentSinkStream::HasLongLines(const nsString& text)
|
|
{
|
|
const PRUint32 longLineLen = 128;
|
|
nsString str = text;
|
|
PRUint32 start=0;
|
|
PRUint32 theLen=text.Length();
|
|
for (start = 0; start < theLen; )
|
|
{
|
|
PRInt32 eol = text.FindChar('\n', PR_FALSE, start);
|
|
if (eol < 0) eol = text.Length();
|
|
if ((PRUint32)(eol - start) > longLineLen)
|
|
return PR_TRUE;
|
|
start = eol+1;
|
|
}
|
|
return PR_FALSE;
|
|
}
|
|
|
|
void nsHTMLContentSinkStream::WriteWrapped(const nsString& text)
|
|
{
|
|
// 1. Determine the length of the input string
|
|
PRInt32 length = text.Length();
|
|
|
|
// 2. If the offset plus the length of the text is smaller
|
|
// than the max then just add it
|
|
if (mColPos + length < mMaxColumn)
|
|
{
|
|
Write(text);
|
|
mColPos += text.Length();
|
|
}
|
|
else
|
|
{
|
|
nsString str = text;
|
|
PRBool done = PR_FALSE;
|
|
PRInt32 indx = 0;
|
|
PRInt32 offset = mColPos;
|
|
|
|
while (!done)
|
|
{
|
|
// find the next break
|
|
PRInt32 start = mMaxColumn-offset;
|
|
if (start < 0)
|
|
start = 0;
|
|
|
|
indx = str.FindChar(' ', PR_FALSE, start);
|
|
|
|
// if there is no break than just add it
|
|
if (indx == kNotFound)
|
|
{
|
|
Write(str);
|
|
mColPos += str.Length();
|
|
done = PR_TRUE;
|
|
}
|
|
else
|
|
{
|
|
// make first equal to the str from the
|
|
// beginning to the index
|
|
nsString first = str;
|
|
|
|
first.Truncate(indx);
|
|
|
|
Write(first);
|
|
Write(NS_LINEBREAK);
|
|
mColPos = 0;
|
|
|
|
// cut the string from the beginning to the index
|
|
str.Cut(0,indx);
|
|
offset = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This gets called by the parser when you want to add
|
|
* a PI node to the current container in the content
|
|
* model.
|
|
*
|
|
* @updated gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
|
|
|
|
#ifdef VERBOSE_DEBUG
|
|
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
|
|
#endif
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This gets called by the parser when it encounters
|
|
* a DOCTYPE declaration in the HTML document.
|
|
*/
|
|
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMode)
|
|
{
|
|
#ifdef VERBOSE_DEBUG
|
|
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
|
|
#endif
|
|
|
|
// Write("<!");
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/**
|
|
* This gets called by the parser when you want to add
|
|
* a comment node to the current container in the content
|
|
* model.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
|
|
|
|
#ifdef VERBOSE_DEBUG
|
|
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
|
|
#endif
|
|
|
|
Write(aNode.GetText());
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is used to a general container.
|
|
* This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc.
|
|
*
|
|
* @param nsIParserNode reference to parser node interface
|
|
* @return PR_TRUE if successful.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode)
|
|
{
|
|
// Look for XIF document_info tag. This has a type of userdefined;
|
|
// GetText() is slow, so don't call it unless we see the right node type.
|
|
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
|
|
if (tag == eHTMLTag_userdefined)
|
|
{
|
|
nsAutoString name; name.Assign(aNode.GetText());
|
|
if (name.EqualsWithConversion("document_info"))
|
|
{
|
|
PRInt32 count=aNode.GetAttributeCount();
|
|
for(PRInt32 i=0;i<count;i++)
|
|
{
|
|
const nsString& key=aNode.GetKeyAt(i);
|
|
|
|
if (key.EqualsWithConversion("charset"))
|
|
{
|
|
const nsString& value=aNode.GetValueAt(i);
|
|
if (mCharsetOverride.IsEmpty())
|
|
mCharsetOverride.AssignWithConversion(value.GetUnicode());
|
|
InitEncoders();
|
|
}
|
|
else if (key.EqualsWithConversion("uri"))
|
|
{
|
|
nsAutoString uristring; uristring.Assign(aNode.GetValueAt(i));
|
|
|
|
// strip double quotes from beginning and end
|
|
uristring.Trim("\"", PR_TRUE, PR_TRUE);
|
|
|
|
// And make it into a URI:
|
|
if (!uristring.IsEmpty())
|
|
NS_NewURI(getter_AddRefs(mURI), uristring);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
AddStartTag(aNode);
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method is used to close a generic container.
|
|
*
|
|
* @update 04/30/99 gpk
|
|
* @param nsIParserNode reference to parser node interface
|
|
* @return PR_TRUE if successful.
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::CloseContainer(const nsIParserNode& aNode){
|
|
AddEndTag(aNode);
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method gets called when the parser begins the process
|
|
* of building the content model via the content sink.
|
|
*
|
|
* @update 5/7/98 gess
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::WillBuildModel(void)
|
|
{
|
|
mTabLevel=-1;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method gets called when the parser concludes the process
|
|
* of building the content model via the content sink.
|
|
*
|
|
* @param aQualityLevel describes how well formed the doc was.
|
|
* 0=GOOD; 1=FAIR; 2=POOR;
|
|
* @update 5/7/98 gess
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::DidBuildModel(PRInt32 aQualityLevel) {
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method gets called when the parser gets i/o blocked,
|
|
* and wants to notify the sink that it may be a while before
|
|
* more data is available.
|
|
*
|
|
* @update 5/7/98 gess
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::WillInterrupt(void) {
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* This method gets called when the parser i/o gets unblocked,
|
|
* and we're about to start dumping content again to the sink.
|
|
*
|
|
* @update 5/7/98 gess
|
|
*/
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::WillResume(void) {
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::SetParser(nsIParser* aParser) {
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsHTMLContentSinkStream::NotifyError(const nsParserError* aError)
|
|
{
|
|
return NS_OK;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////
|
|
//// Useful static methods
|
|
/////////////////////////////////////////////////////////////
|
|
|
|
static PRBool IsInline(eHTMLTags aTag)
|
|
{
|
|
PRBool result = PR_FALSE;
|
|
|
|
switch (aTag)
|
|
{
|
|
case eHTMLTag_a:
|
|
case eHTMLTag_address:
|
|
case eHTMLTag_big:
|
|
case eHTMLTag_blink:
|
|
case eHTMLTag_b:
|
|
case eHTMLTag_br:
|
|
case eHTMLTag_cite:
|
|
case eHTMLTag_code:
|
|
case eHTMLTag_dfn:
|
|
case eHTMLTag_em:
|
|
case eHTMLTag_font:
|
|
case eHTMLTag_img:
|
|
case eHTMLTag_i:
|
|
case eHTMLTag_kbd:
|
|
case eHTMLTag_keygen:
|
|
case eHTMLTag_nobr:
|
|
case eHTMLTag_samp:
|
|
case eHTMLTag_small:
|
|
case eHTMLTag_spacer:
|
|
case eHTMLTag_span:
|
|
case eHTMLTag_strike:
|
|
case eHTMLTag_strong:
|
|
case eHTMLTag_sub:
|
|
case eHTMLTag_sup:
|
|
case eHTMLTag_textarea:
|
|
case eHTMLTag_tt:
|
|
case eHTMLTag_u:
|
|
case eHTMLTag_var:
|
|
case eHTMLTag_wbr:
|
|
result = PR_TRUE;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static PRBool IsBlockLevel(eHTMLTags aTag)
|
|
{
|
|
return !IsInline(aTag);
|
|
}
|
|
|
|
/**
|
|
* **** Pretty Printing Methods ******
|
|
*
|
|
*/
|
|
|
|
/**
|
|
* Desired line break state before the open tag.
|
|
*/
|
|
static PRBool BreakBeforeOpen(eHTMLTags aTag)
|
|
{
|
|
PRBool result = PR_FALSE;
|
|
switch (aTag)
|
|
{
|
|
case eHTMLTag_html:
|
|
result = PR_FALSE;
|
|
break;
|
|
|
|
default:
|
|
result = IsBlockLevel(aTag);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Desired line break state after the open tag.
|
|
*/
|
|
static PRBool BreakAfterOpen(eHTMLTags aTag)
|
|
{
|
|
PRBool result = PR_FALSE;
|
|
switch (aTag)
|
|
{
|
|
case eHTMLTag_html:
|
|
case eHTMLTag_body:
|
|
case eHTMLTag_ul:
|
|
case eHTMLTag_ol:
|
|
case eHTMLTag_table:
|
|
case eHTMLTag_tbody:
|
|
case eHTMLTag_style:
|
|
case eHTMLTag_br:
|
|
result = PR_TRUE;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Desired line break state before the close tag.
|
|
*/
|
|
static PRBool BreakBeforeClose(eHTMLTags aTag)
|
|
{
|
|
PRBool result = PR_FALSE;
|
|
|
|
switch (aTag)
|
|
{
|
|
case eHTMLTag_html:
|
|
case eHTMLTag_head:
|
|
case eHTMLTag_body:
|
|
case eHTMLTag_ul:
|
|
case eHTMLTag_ol:
|
|
case eHTMLTag_table:
|
|
case eHTMLTag_tbody:
|
|
case eHTMLTag_style:
|
|
result = PR_TRUE;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Desired line break state after the close tag.
|
|
*/
|
|
static PRBool BreakAfterClose(eHTMLTags aTag)
|
|
{
|
|
PRBool result = PR_FALSE;
|
|
|
|
switch (aTag)
|
|
{
|
|
case eHTMLTag_html:
|
|
case eHTMLTag_tr:
|
|
case eHTMLTag_th:
|
|
case eHTMLTag_td:
|
|
case eHTMLTag_pre:
|
|
result = PR_TRUE;
|
|
break;
|
|
|
|
default:
|
|
result = IsBlockLevel(aTag);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Indent/outdent when the open/close tags are encountered.
|
|
* This implies that BreakAfterOpen() and BreakBeforeClose()
|
|
* are true no matter what those methods return.
|
|
*/
|
|
static PRBool IndentChildren(eHTMLTags aTag)
|
|
{
|
|
PRBool result = PR_FALSE;
|
|
|
|
switch (aTag)
|
|
{
|
|
case eHTMLTag_table:
|
|
case eHTMLTag_ul:
|
|
case eHTMLTag_ol:
|
|
case eHTMLTag_tbody:
|
|
case eHTMLTag_form:
|
|
case eHTMLTag_frameset:
|
|
result = PR_TRUE;
|
|
break;
|
|
|
|
default:
|
|
result = PR_FALSE;
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
|