Mozilla/mozilla/parser/htmlparser/src/nsHTMLContentSinkStream.cpp
2000-06-09 05:17:00 +00:00

1338 lines
33 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
* Pierre Phaneuf <pp@ludusdesign.com>
*/
/**
* MODULE NOTES:
*
* This file declares the concrete HTMLContentSink class.
* This class is used during the parsing process as the
* primary interface between the parser and the content
* model.
*/
#include "nsHTMLContentSinkStream.h"
#include "nsIParserNode.h"
#include <ctype.h>
#include "nsString.h"
#include "nsIParser.h"
#include "nsICharsetAlias.h"
#include "nsIServiceManager.h"
#include "nsIEntityConverter.h"
#include "nsCRT.h"
#include "nsIDocumentEncoder.h" // for output flags
#include "nshtmlpars.h"
#include "nsIOutputStream.h"
#include "nsFileStream.h"
#include "nsNetUtil.h" // for NS_MakeAbsoluteURI
static NS_DEFINE_CID(kSaveAsCharsetCID, NS_SAVEASCHARSET_CID);
static NS_DEFINE_CID(kEntityConverterCID, NS_ENTITYCONVERTER_CID);
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
const int gTabSize=2;
static const nsString gMozDirty = NS_ConvertToString("_moz_dirty");
static PRBool IsInline(eHTMLTags aTag);
static PRBool IsBlockLevel(eHTMLTags aTag);
static PRInt32 BreakBeforeOpen(eHTMLTags aTag);
static PRInt32 BreakAfterOpen(eHTMLTags aTag);
static PRInt32 BreakBeforeClose(eHTMLTags aTag);
static PRInt32 BreakAfterClose(eHTMLTags aTag);
static PRBool IndentChildren(eHTMLTags aTag);
/**
* This method gets called as part of our COM-like interfaces.
* Its purpose is to create an interface to parser object
* of some type.
*
* @update gess 4/8/98
* @param nsIID id of object to discover
* @param aInstancePtr ptr to newly discovered interface
* @return NS_xxx result code
*/
nsresult
nsHTMLContentSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
if (NULL == aInstancePtr) {
return NS_ERROR_NULL_POINTER;
}
if (aIID.Equals(NS_GET_IID(nsISupports))) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if (aIID.Equals(NS_GET_IID(nsIContentSink))) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if (aIID.Equals(NS_GET_IID(nsIHTMLContentSink))) {
*aInstancePtr = (nsIHTMLContentSink*)(this);
}
else if (aIID.Equals(NS_GET_IID(nsIHTMLContentSinkStream))) {
*aInstancePtr = (nsIHTMLContentSinkStream*)(this);
}
else {
*aInstancePtr=0;
return NS_NOINTERFACE;
}
NS_ADDREF_THIS();
return NS_OK;
}
NS_IMPL_ADDREF(nsHTMLContentSinkStream)
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
/**
* Construct a content sink stream.
* @update gess7/7/98
* @param
* @return
*/
nsHTMLContentSinkStream::nsHTMLContentSinkStream()
{
NS_INIT_REFCNT();
mLowerCaseTags = PR_TRUE;
memset(mHTMLTagStack,0,sizeof(mHTMLTagStack));
memset(mDirtyStack,0,sizeof(mDirtyStack));
mHTMLStackPos = 0;
mColPos = 0;
mIndent = 0;
mInBody = PR_FALSE;
mBuffer = nsnull;
mBufferSize = 0;
mBufferLength = 0;
mFlags = 0;
mHasOpenHtmlTag=PR_FALSE;
}
NS_IMETHODIMP
nsHTMLContentSinkStream::Initialize(nsIOutputStream* aOutStream,
nsString* aOutString,
const nsString* aCharsetOverride,
PRUint32 aFlags)
{
mDoFormat = (aFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE
: PR_FALSE;
mBodyOnly = (aFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
: PR_FALSE;
mDoHeader = (!mBodyOnly) && (mDoFormat) &&
((aFlags & nsIDocumentEncoder::OutputNoDoctype) ? PR_FALSE
: PR_TRUE);
mMaxColumn = 72;
mFlags = aFlags;
mStream = aOutStream;
mString = aOutString;
if (aCharsetOverride != nsnull)
mCharsetOverride.AssignWithConversion(aCharsetOverride->GetUnicode());
mPreLevel = 0;
return NS_OK;
}
nsHTMLContentSinkStream::~nsHTMLContentSinkStream()
{
if (mBuffer)
nsMemory::Free(mBuffer);
}
/**
* This method tells the sink whether or not it is
* encoding an HTML fragment or the whole document.
* By default, the entire document is encoded.
*
* @update 03/14/99 gpk
* @param aFlag set to true if only encoding a fragment
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::DoFragment(PRBool aFlag)
{
return NS_OK;
}
/**
* This gets called when handling illegal contents, especially
* in dealing with tables. This method creates a new context.
*
* @update 04/04/99 harishd
* @param aPosition - The position from where the new context begins.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::BeginContext(PRInt32 aPosition)
{
return NS_OK;
}
/**
* This method terminates any new context that got created by
* BeginContext and switches back to the main context.
*
* @update 04/04/99 harishd
* @param aPosition - Validates the end of a context.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::EndContext(PRInt32 aPosition)
{
return NS_OK;
}
/**
* Initialize the Unicode encoder with our current mCharsetOverride.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::InitEncoders()
{
nsresult res;
// Initialize an entity encoder if we're using the string interface:
if (mString && (mFlags & nsIDocumentEncoder::OutputEncodeEntities))
res = nsComponentManager::CreateInstance(kEntityConverterCID, NULL,
NS_GET_IID(nsIEntityConverter),
getter_AddRefs(mEntityConverter));
// Initialize a charset encoder if we're using the stream interface
if (mStream)
{
nsAutoString charsetName; charsetName.AssignWithConversion(mCharsetOverride);
NS_WITH_SERVICE(nsICharsetAlias, calias, kCharsetAliasCID, &res);
if (NS_SUCCEEDED(res) && calias) {
nsAutoString temp; temp.AssignWithConversion(mCharsetOverride);
res = calias->GetPreferred(temp, charsetName);
}
if (NS_FAILED(res))
{
// failed - unknown alias , fallback to ISO-8859-1
charsetName.AssignWithConversion("ISO-8859-1");
}
res = nsComponentManager::CreateInstance(kSaveAsCharsetCID, NULL,
NS_GET_IID(nsISaveAsCharset),
getter_AddRefs(mCharsetEncoder));
if (NS_FAILED(res))
return res;
// SaveAsCharset requires a const char* in its first argument:
nsCAutoString charsetCString; charsetCString.AssignWithConversion(charsetName);
// For ISO-8859-1 only, convert to entity first (always generate entites like &nbsp;).
res = mCharsetEncoder->Init(charsetCString,
charsetName.EqualsIgnoreCase("ISO-8859-1") ?
nsISaveAsCharset::attr_htmlTextDefault :
nsISaveAsCharset::attr_EntityAfterCharsetConv
+ nsISaveAsCharset::attr_FallbackDecimalNCR,
nsIEntityConverter::html40);
}
return res;
}
void nsHTMLContentSinkStream::EnsureBufferSize(PRInt32 aNewSize)
{
if (mBufferSize < aNewSize) {
if(mBuffer) delete [] mBuffer;
mBufferSize = 2*aNewSize+1; // make this twice as large
mBuffer = new char[mBufferSize];
if(mBuffer){
mBuffer[0] = 0;
}
}
}
/**
* Writes to the buffer/stream.
* If we do both string and stream output, stream chars will override string.
*
* @param aString - the string to write.
* @return The number of characters written.
*/
PRInt32 nsHTMLContentSinkStream::Write(const nsString& aString)
{
if (mBodyOnly && !mInBody)
return 0;
int charsWritten = 0;
// For the string case, we don't want to do charset conversion,
// but we still want to encode entities.
if (mString)
{
if (!mEntityConverter && (mFlags & nsIDocumentEncoder::OutputEncodeEntities))
InitEncoders();
if (mEntityConverter && (mFlags & nsIDocumentEncoder::OutputEncodeEntities))
{
nsresult res;
PRUnichar *encodedBuffer = nsnull;
res = mEntityConverter->ConvertToEntities(aString.GetUnicode(),
nsIEntityConverter::html40Latin1,
&encodedBuffer);
if (NS_SUCCEEDED(res) && encodedBuffer)
{
PRInt32 len = nsCRT::strlen(encodedBuffer);
mString->Append(encodedBuffer, len);
nsCRT::free(encodedBuffer);
charsWritten = len;
}
else {
charsWritten = aString.Length();
mString->Append(aString);
}
}
else {
charsWritten = aString.Length();
mString->Append(aString);
}
}
if (!mStream)
return charsWritten;
// Now handle the stream case:
nsOutputStream out(mStream);
// If an encoder is being used then convert first convert the input string
char *encodedBuffer = nsnull;
nsresult res;
// Initialize the encoder if we haven't already
if (!mCharsetEncoder)
InitEncoders();
if (mCharsetEncoder)
{
// Call the converter to convert to the target charset.
// Convert() takes a char* output param even though it's writing unicode.
res = mCharsetEncoder->Convert(aString.GetUnicode(), &encodedBuffer);
if (NS_SUCCEEDED(res) && encodedBuffer)
{
charsWritten = nsCRT::strlen(encodedBuffer);
out.write(encodedBuffer, charsWritten);
nsCRT::free(encodedBuffer);
}
// If it didn't work, just write the unicode
else
{
const PRUnichar* unicode = aString.GetUnicode();
charsWritten = aString.Length();
out.write(unicode, charsWritten);
}
}
// If we couldn't get an encoder, just write the unicode
else
{
const PRUnichar* unicode = aString.GetUnicode();
charsWritten = aString.Length();
out.write(unicode, charsWritten);
}
return charsWritten;
}
void nsHTMLContentSinkStream::Write(const char* aData)
{
if (mBodyOnly && !mInBody)
return;
if (mStream)
{
nsOutputStream out(mStream);
out << aData;
}
if (mString)
{
mString->AppendWithConversion(aData);
}
}
void nsHTMLContentSinkStream::Write(char aData)
{
if (mBodyOnly && !mInBody)
return;
if (mStream)
{
nsOutputStream out(mStream);
out << aData;
}
if (mString)
{
mString->AppendWithConversion(aData);
}
}
/**
* Write the attributes of the current tag.
*
* @param aNode The parser node currently in play.
*/
void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode)
{
int theCount=aNode.GetAttributeCount();
if(theCount) {
int i=0;
for(i=0;i<theCount;i++){
nsString& key = (nsString&)aNode.GetKeyAt(i);
// See if there's an attribute:
// note that we copy here, because we're going to have to trim quotes.
nsAutoString value (aNode.GetValueAt(i));
// strip double quotes from beginning and end
value.Trim("\"", PR_TRUE, PR_TRUE);
//
// Filter out special case of <br type="_moz"> or <br _moz*>,
// used by the editor. Bug 16988. Yuck.
//
if ((eHTMLTags)aNode.GetNodeType() == eHTMLTag_br
&& ((key.EqualsWithConversion("type", PR_TRUE) && value.EqualsWithConversion("_moz"))
|| key.EqualsWithConversion("_moz", PR_TRUE, 4)))
continue;
//
// Filter out special case of _moz_dirty
//
if (key.Equals(gMozDirty))
continue;
if (mLowerCaseTags == PR_TRUE)
key.ToLowerCase();
else
key.ToUpperCase();
EnsureBufferSize(key.Length() + 1);
key.ToCString(mBuffer,mBufferSize);
// send to ouput " [KEY]="
Write(' ');
Write(mBuffer);
mColPos += 1 + strlen(mBuffer) + 1;
// Make all links absolute when converting only the selection:
if ((mFlags & nsIDocumentEncoder::OutputAbsoluteLinks)
&& (key.EqualsWithConversion("href", PR_TRUE) || key.EqualsWithConversion("src", PR_TRUE)
// Would be nice to handle OBJECT and APPLET tags,
// but that gets more complicated since we have to
// search the tag list for CODEBASE as well.
// For now, just leave them relative.
))
{
if (mURI)
{
nsAutoString absURI;
if (NS_SUCCEEDED(NS_MakeAbsoluteURI(absURI, value, mURI))
&& !absURI.IsEmpty())
value = absURI;
}
}
if (value.Length() > 0)
{
Write(char(kEqual));
mColPos += 1 + strlen(mBuffer) + 1;
// send to ouput "\"[VALUE]\""
Write('\"');
Write(value);
Write('\"');
}
mColPos += 1 + strlen(mBuffer) + 1;
}
}
}
/**
* This method gets called by the parser when it encounters
* a title tag and wants to set the document title in the sink.
*
* @update 04/30/99 gpk
* @param nsString reference to new title value
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::SetTitle(const nsString& aValue)
{
const char* tagName = GetTagName(eHTMLTag_title);
Write(kLessThan);
Write(tagName);
Write(kGreaterThan);
Write(aValue);
Write(kLessThan);
Write(kForwardSlash);
Write(tagName);
Write(kGreaterThan);
return NS_OK;
}
/**
* This method is used to open the outer HTML container.
*
* XXX OpenHTML never gets called; AddStartTag gets called on
* XXX the html tag from OpenContainer, from nsXIFDTD::StartTopOfStack,
* XXX from nsXIFDTD::HandleStartToken.
*
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenHTML(const nsIParserNode& aNode)
{
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_html)
{
if(!mHasOpenHtmlTag) {
AddStartTag(aNode);
mHasOpenHtmlTag=PR_TRUE;
}
else {
PRInt32 ac=aNode.GetAttributeCount();
if(ac>0) {
Write(kLessThan);
nsAutoString tagname;
tagname.AssignWithConversion(nsHTMLTags::GetStringValue(tag));
Write(tagname);
WriteAttributes(aNode);
Write(kGreaterThan);
}
}
}
return NS_OK;
}
/**
* All these HTML-specific methods may be called, or may not,
* depending on whether the parser is parsing XIF or HTML.
* So we can't depend on them; instead, we have Open/CloseContainer
* do all the specialized work, and the html-specific Open/Close
* methods must call the more general methods.
*
* Since there are so many of them, make macros:
*/
#define USE_GENERAL_OPEN_METHOD(methodname, tagtype) \
NS_IMETHODIMP nsHTMLContentSinkStream::methodname(const nsIParserNode& aNode) \
{ \
if ((eHTMLTags)aNode.GetNodeType() == tagtype) \
AddStartTag(aNode); \
return NS_OK; \
}
#define USE_GENERAL_CLOSE_METHOD(methodname, tagtype) \
NS_IMETHODIMP nsHTMLContentSinkStream::methodname(const nsIParserNode& aNode) \
{ \
if ((eHTMLTags)aNode.GetNodeType() == tagtype) \
AddEndTag(aNode); \
return NS_OK; \
}
USE_GENERAL_CLOSE_METHOD(CloseHTML, eHTMLTag_html)
USE_GENERAL_OPEN_METHOD(OpenHead, eHTMLTag_head)
USE_GENERAL_CLOSE_METHOD(CloseHead, eHTMLTag_head)
USE_GENERAL_OPEN_METHOD(OpenBody, eHTMLTag_body)
USE_GENERAL_CLOSE_METHOD(CloseBody, eHTMLTag_body)
USE_GENERAL_OPEN_METHOD(OpenForm, eHTMLTag_form)
USE_GENERAL_CLOSE_METHOD(CloseForm, eHTMLTag_form)
USE_GENERAL_OPEN_METHOD(OpenMap, eHTMLTag_map)
USE_GENERAL_CLOSE_METHOD(CloseMap, eHTMLTag_map)
USE_GENERAL_OPEN_METHOD(OpenFrameset, eHTMLTag_frameset)
USE_GENERAL_CLOSE_METHOD(CloseFrameset, eHTMLTag_frameset)
/**
*
* Check whether a node has the attribute _moz_dirty.
* If it does, we'll prettyprint it, otherwise we adhere to the
* surrounding text/whitespace/newline nodes provide formatting.
*/
PRBool nsHTMLContentSinkStream::IsDirty(const nsIParserNode& aNode)
{
// Apparently there's no way to just ask for a particular attribute
// without looping over the list.
int theCount = aNode.GetAttributeCount();
if (theCount)
{
for(int i=0; i < theCount; i++)
{
nsString& key = (nsString&)aNode.GetKeyAt(i);
if (key.Equals(gMozDirty))
return PR_TRUE;
}
}
return PR_FALSE;
}
void nsHTMLContentSinkStream::AddIndent()
{
nsAutoString padding; padding.AssignWithConversion(" ");
for (PRInt32 i = mIndent; --i >= 0; )
{
Write(padding);
mColPos += 2;
}
}
void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode)
{
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
PRBool isDirty = IsDirty(aNode);
const nsString& name = aNode.GetText();
nsAutoString tagName;
if (tag == eHTMLTag_body)
mInBody = PR_TRUE;
mHTMLTagStack[mHTMLStackPos] = tag;
mDirtyStack[mHTMLStackPos++] = isDirty;
tagName = name;
if (tag == eHTMLTag_markupDecl)
{
if (!(mFlags & nsIDocumentEncoder::OutputSelectionOnly))
{
Write("<!"); // mdo => Markup Declaration Open.
}
return;
}
// Quoted plaintext mail/news lives in a pre tag.
// The editor has substituted <br> tags for all the newlines in the pre,
// in order to get clickable blank lines.
// We can't emit these <br> tags formatted, or we'll get
// double-spacing (one for the br, one for the line break);
// but we can't emit them unformatted, either,
// because then long quoted passages will make html source lines
// too long for news servers (and some mail servers) to handle.
// So we map all <br> tags inside <pre> to line breaks.
// If this turns out to be a problem, we could do this only if gMozDirty.
else if (tag == eHTMLTag_br && mPreLevel > 0)
{
Write(NS_LINEBREAK);
return;
}
if (mLowerCaseTags == PR_TRUE)
tagName.ToLowerCase();
else
tagName.ToUpperCase();
#ifdef DEBUG_prettyprint
if (isDirty)
printf("AddStartTag(%s): BBO=%d, BAO=%d, BBC=%d, BAC=%d\n",
name.ToNewCString(),
BreakBeforeOpen(tag),
BreakAfterOpen(tag),
BreakBeforeClose(tag),
BreakAfterClose(tag));
#endif
if ((mDoFormat || isDirty) && mPreLevel == 0 && mColPos != 0
&& BreakBeforeOpen(tag))
{
Write(NS_LINEBREAK);
mColPos = 0;
}
if ((mDoFormat || isDirty) && mPreLevel == 0 && mColPos == 0)
AddIndent();
EnsureBufferSize(tagName.Length() + 1);
tagName.ToCString(mBuffer,mBufferSize);
Write(kLessThan);
Write(mBuffer);
mColPos += 1 + tagName.Length();
if ((mDoFormat || isDirty) && mPreLevel == 0 && tag == eHTMLTag_style)
{
Write(kGreaterThan);
Write(NS_LINEBREAK);
const nsString& data = aNode.GetSkippedContent();
PRInt32 size = data.Length();
char* buffer = new char[size+1];
if(buffer){
data.ToCString(buffer,size+1);
Write(buffer);
delete[] buffer;
}
}
else
{
WriteAttributes(aNode);
Write(kGreaterThan);
mColPos += 1;
}
if (tag == eHTMLTag_pre)
++mPreLevel;
if (((mDoFormat || isDirty) && mPreLevel == 0 && BreakAfterOpen(tag)))
{
Write(NS_LINEBREAK);
mColPos = 0;
}
if (IndentChildren(tag))
mIndent++;
if (tag == eHTMLTag_head)
{
if(mDoHeader)
{
Write(gHeaderComment);
Write(NS_LINEBREAK);
Write(gDocTypeHeader);
Write(NS_LINEBREAK);
}
}
}
void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode)
{
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
nsAutoString tagName;
PRBool isDirty = mDirtyStack[mHTMLStackPos-1];
#ifdef DEBUG_prettyprint
if (isDirty)
printf("AddEndTag(%s): BBO=%d, BAO=%d, BBC=%d, BAC=%d\n",
aNode.GetText().ToNewCString(),
BreakBeforeOpen(tag),
BreakAfterOpen(tag),
BreakBeforeClose(tag),
BreakAfterClose(tag));
#endif
if (tag == eHTMLTag_unknown)
{
tagName.Assign(aNode.GetText());
}
else if (tag == eHTMLTag_pre)
{
--mPreLevel;
tagName.Assign(aNode.GetText());
}
else if (tag == eHTMLTag_comment)
{
tagName.AssignWithConversion("--");
}
else if (tag == eHTMLTag_markupDecl)
{
if (!(mFlags & nsIDocumentEncoder::OutputSelectionOnly))
{
Write(kGreaterThan);
Write(NS_LINEBREAK);
}
if ( mHTMLTagStack[mHTMLStackPos-1] == eHTMLTag_markupDecl)
{
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
return;
}
else
{
tagName.AssignWithConversion(nsHTMLTags::GetStringValue(tag));
}
if (mLowerCaseTags == PR_TRUE)
tagName.ToLowerCase();
// else
// tagName.ToUpperCase();
if (IndentChildren(tag))
mIndent--;
if ((mDoFormat || isDirty) && mPreLevel == 0 && BreakBeforeClose(tag))
{
if (mColPos != 0)
{
Write(NS_LINEBREAK);
mColPos = 0;
}
}
if ((mDoFormat || isDirty) && mPreLevel == 0 && mColPos == 0)
AddIndent();
EnsureBufferSize(tagName.Length() + 1);
tagName.ToCString(mBuffer,mBufferSize);
if (tag != eHTMLTag_comment)
{
Write(kLessThan);
Write(kForwardSlash);
mColPos += 1 + 1;
}
Write(mBuffer);
Write(kGreaterThan);
mColPos += strlen(mBuffer) + 1;
if (tag == eHTMLTag_body)
mInBody = PR_FALSE;
if (((mDoFormat || isDirty) && mPreLevel == 0 && BreakAfterClose(tag))
|| tag == eHTMLTag_body || tag == eHTMLTag_html)
{
Write(NS_LINEBREAK);
mColPos = 0;
}
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
/**
* This gets called by the parser when you want to add
* a leaf node to the current container in the content
* model.
*/
nsresult
nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
eHTMLTags tag = eHTMLTag_unknown;
if (mHTMLStackPos > 0)
tag = mHTMLTagStack[mHTMLStackPos-1];
if (type == eHTMLTag_area ||
type == eHTMLTag_base ||
type == eHTMLTag_basefont ||
type == eHTMLTag_br ||
type == eHTMLTag_col ||
type == eHTMLTag_frame ||
type == eHTMLTag_hr ||
type == eHTMLTag_img ||
type == eHTMLTag_image ||
type == eHTMLTag_input ||
type == eHTMLTag_isindex ||
type == eHTMLTag_link ||
type == eHTMLTag_meta ||
type == eHTMLTag_param ||
type == eHTMLTag_sound)
{
AddStartTag(aNode);
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
else if (type == eHTMLTag_entity)
{
Write('&');
const nsString& entity = aNode.GetText();
mColPos += Write(entity) + 1;
// Don't write the semicolon;
// rely on the DTD to include it if one is wanted.
}
else if (type == eHTMLTag_text)
{
if ((mHTMLStackPos > 0)
&& (mHTMLTagStack[mHTMLStackPos-1] == eHTMLTag_markupDecl)
&& (mFlags & nsIDocumentEncoder::OutputSelectionOnly))
return NS_OK;
const nsString& text = aNode.GetText();
if (mPreLevel > 0)
{
Write(text);
mColPos += text.Length();
}
else if (!mDoFormat)
{
if (HasLongLines(text))
{
WriteWrapped(text);
}
else
{
Write(text);
mColPos += text.Length();
}
}
else
{
WriteWrapped(text);
}
}
else if (type == eHTMLTag_whitespace)
{
if (!mDoFormat || mPreLevel > 0)
{
const nsString& text = aNode.GetText();
Write(text);
mColPos += text.Length();
}
}
else if (type == eHTMLTag_newline)
{
if (!mDoFormat || mPreLevel > 0)
{
Write(NS_LINEBREAK);
mColPos = 0;
}
}
return NS_OK;
}
// See if the string has any lines longer than longLineLen:
// if so, we presume formatting is wonky (e.g. the node has been edited)
// and we'd better rewrap the whole text node.
PRBool nsHTMLContentSinkStream::HasLongLines(const nsString& text)
{
const PRUint32 longLineLen = 128;
nsString str = text;
PRUint32 start=0;
PRUint32 theLen=text.Length();
for (start = 0; start < theLen; )
{
PRInt32 eol = text.FindChar('\n', PR_FALSE, start);
if (eol < 0) eol = text.Length();
if ((PRUint32)(eol - start) > longLineLen)
return PR_TRUE;
start = eol+1;
}
return PR_FALSE;
}
void nsHTMLContentSinkStream::WriteWrapped(const nsString& text)
{
// 1. Determine the length of the input string
PRInt32 length = text.Length();
// 2. If the offset plus the length of the text is smaller
// than the max then just add it
if (mColPos + length < mMaxColumn)
{
Write(text);
mColPos += text.Length();
}
else
{
nsString str = text;
PRBool done = PR_FALSE;
PRInt32 indx = 0;
PRInt32 offset = mColPos;
while (!done)
{
// find the next break
PRInt32 start = mMaxColumn-offset;
if (start < 0)
start = 0;
indx = str.FindChar(' ', PR_FALSE, start);
// if there is no break than just add it
if (indx == kNotFound)
{
Write(str);
mColPos += str.Length();
done = PR_TRUE;
}
else
{
// make first equal to the str from the
// beginning to the index
nsString first = str;
first.Truncate(indx);
Write(first);
Write(NS_LINEBREAK);
mColPos = 0;
// cut the string from the beginning to the index
str.Cut(0,indx);
offset = 0;
}
}
}
}
/**
* This gets called by the parser when you want to add
* a PI node to the current container in the content
* model.
*
* @updated gess 3/25/98
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
#ifdef VERBOSE_DEBUG
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
return NS_OK;
}
/**
* This gets called by the parser when it encounters
* a DOCTYPE declaration in the HTML document.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMode)
{
#ifdef VERBOSE_DEBUG
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
// Write("<!");
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a comment node to the current container in the content
* model.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
#ifdef VERBOSE_DEBUG
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
Write(aNode.GetText());
return NS_OK;
}
/**
* This method is used to a general container.
* This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc.
*
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode)
{
// Look for XIF document_info tag. This has a type of userdefined;
// GetText() is slow, so don't call it unless we see the right node type.
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_userdefined)
{
nsAutoString name; name.Assign(aNode.GetText());
if (name.EqualsWithConversion("document_info"))
{
PRInt32 count=aNode.GetAttributeCount();
for(PRInt32 i=0;i<count;i++)
{
const nsString& key=aNode.GetKeyAt(i);
if (key.EqualsWithConversion("charset"))
{
const nsString& value=aNode.GetValueAt(i);
if (mCharsetOverride.IsEmpty())
mCharsetOverride.AssignWithConversion(value.GetUnicode());
InitEncoders();
}
else if (key.EqualsWithConversion("uri"))
{
nsAutoString uristring; uristring.Assign(aNode.GetValueAt(i));
// strip double quotes from beginning and end
uristring.Trim("\"", PR_TRUE, PR_TRUE);
// And make it into a URI:
if (!uristring.IsEmpty())
NS_NewURI(getter_AddRefs(mURI), uristring);
}
}
}
}
else
{
AddStartTag(aNode);
}
return NS_OK;
}
/**
* This method is used to close a generic container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseContainer(const nsIParserNode& aNode){
AddEndTag(aNode);
return NS_OK;
}
/**
* This method gets called when the parser begins the process
* of building the content model via the content sink.
*
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::WillBuildModel(void)
{
mTabLevel=-1;
return NS_OK;
}
/**
* This method gets called when the parser concludes the process
* of building the content model via the content sink.
*
* @param aQualityLevel describes how well formed the doc was.
* 0=GOOD; 1=FAIR; 2=POOR;
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::DidBuildModel(PRInt32 aQualityLevel) {
return NS_OK;
}
/**
* This method gets called when the parser gets i/o blocked,
* and wants to notify the sink that it may be a while before
* more data is available.
*
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::WillInterrupt(void) {
return NS_OK;
}
/**
* This method gets called when the parser i/o gets unblocked,
* and we're about to start dumping content again to the sink.
*
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::WillResume(void) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLContentSinkStream::SetParser(nsIParser* aParser) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLContentSinkStream::NotifyError(const nsParserError* aError)
{
return NS_OK;
}
/////////////////////////////////////////////////////////////
//// Useful static methods
/////////////////////////////////////////////////////////////
static PRBool IsInline(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_a:
case eHTMLTag_address:
case eHTMLTag_big:
case eHTMLTag_blink:
case eHTMLTag_b:
case eHTMLTag_br:
case eHTMLTag_cite:
case eHTMLTag_code:
case eHTMLTag_dfn:
case eHTMLTag_em:
case eHTMLTag_font:
case eHTMLTag_img:
case eHTMLTag_i:
case eHTMLTag_kbd:
case eHTMLTag_keygen:
case eHTMLTag_nobr:
case eHTMLTag_samp:
case eHTMLTag_small:
case eHTMLTag_spacer:
case eHTMLTag_span:
case eHTMLTag_strike:
case eHTMLTag_strong:
case eHTMLTag_sub:
case eHTMLTag_sup:
case eHTMLTag_textarea:
case eHTMLTag_tt:
case eHTMLTag_u:
case eHTMLTag_var:
case eHTMLTag_wbr:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
static PRBool IsBlockLevel(eHTMLTags aTag)
{
return !IsInline(aTag);
}
/**
* **** Pretty Printing Methods ******
*
*/
/**
* Desired line break state before the open tag.
*/
static PRBool BreakBeforeOpen(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
result = PR_FALSE;
break;
default:
result = IsBlockLevel(aTag);
}
return result;
}
/**
* Desired line break state after the open tag.
*/
static PRBool BreakAfterOpen(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
case eHTMLTag_body:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_table:
case eHTMLTag_tbody:
case eHTMLTag_style:
case eHTMLTag_br:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
/**
* Desired line break state before the close tag.
*/
static PRBool BreakBeforeClose(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
case eHTMLTag_head:
case eHTMLTag_body:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_table:
case eHTMLTag_tbody:
case eHTMLTag_style:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
/**
* Desired line break state after the close tag.
*/
static PRBool BreakAfterClose(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
case eHTMLTag_tr:
case eHTMLTag_th:
case eHTMLTag_td:
case eHTMLTag_pre:
result = PR_TRUE;
break;
default:
result = IsBlockLevel(aTag);
}
return result;
}
/**
* Indent/outdent when the open/close tags are encountered.
* This implies that BreakAfterOpen() and BreakBeforeClose()
* are true no matter what those methods return.
*/
static PRBool IndentChildren(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_table:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_tbody:
case eHTMLTag_form:
case eHTMLTag_frameset:
result = PR_TRUE;
break;
default:
result = PR_FALSE;
break;
}
return result;
}