Mozilla/mozilla/parser/htmlparser/src/nsHTMLContentSinkStream.cpp
akkana%netscape.com d444fffb40 Backing out change to write the override charset in a meta tag.
The charset menu should be changing the meta tag in the document,
which the output sink will pick up automatically without needing
any special code.  See bugs 12085 (on seeing two meta charset tags)
and 7849 (that the charset menu should change the actual meta tag
in the document).


git-svn-id: svn://10.0.0.236/trunk@46850 18797224-902f-48f8-a5cc-f745e15eee43
1999-09-10 22:32:32 +00:00

1576 lines
38 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1998
* Netscape Communications Corporation. All Rights Reserved.
*/
/**
* MODULE NOTES:
* @update gess 4/1/98
*
* This file declares the concrete HTMLContentSink class.
* This class is used during the parsing process as the
* primary interface between the parser and the content
* model.
*/
#include "nsHTMLContentSinkStream.h"
#include "nsHTMLTokens.h"
#include <ctype.h>
#include "nsString.h"
#include "nsIParser.h"
#include "nsHTMLEntities.h"
#include "nsCRT.h"
#include "nsIDocumentEncoder.h" // for output flags
#include "nsIUnicodeEncoder.h"
#include "nsICharsetAlias.h"
#include "nsIServiceManager.h"
#include "nsICharsetConverterManager.h"
#include "nsIOutputStream.h"
#include "nsFileStream.h"
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
const int gTabSize=2;
/** PRETTY PRINTING PROTOTYPES **/
class nsTagFormat
{
public:
void Init(PRBool aBefore, PRBool aStart, PRBool aEnd, PRBool aAfter);
void SetIndentGroup(PRUint8 aGroup);
void SetFormat(PRBool aOnOff);
public:
PRBool mBreakBefore;
PRBool mBreakStart;
PRBool mBreakEnd;
PRBool mBreakAfter;
PRUint8 mIndentGroup; // zero for none
PRBool mFormat; // format (on|off)
};
void nsTagFormat::Init(PRBool aBefore, PRBool aStart, PRBool aEnd, PRBool aAfter)
{
mBreakBefore = aBefore;
mBreakStart = aStart;
mBreakEnd = aEnd;
mBreakAfter = aAfter;
mFormat = PR_TRUE;
}
void nsTagFormat::SetIndentGroup(PRUint8 aGroup)
{
mIndentGroup = aGroup;
}
void nsTagFormat::SetFormat(PRBool aOnOff)
{
mFormat = aOnOff;
}
class nsPrettyPrinter
{
public:
void Init(PRBool aIndentEnable = PR_TRUE, PRUint8 aColSize = 2, PRUint8 aTabSize = 8, PRBool aUseTabs = PR_FALSE );
PRBool mIndentEnable;
PRUint8 mIndentColSize;
PRUint8 mIndentTabSize;
PRBool mIndentUseTabs;
PRBool mAutowrapEnable;
PRUint32 mAutoWrapColWidth;
nsTagFormat mTagFormat[NS_HTML_TAG_MAX+1];
};
void nsPrettyPrinter::Init(PRBool aIndentEnable, PRUint8 aColSize, PRUint8 aTabSize, PRBool aUseTabs)
{
mIndentEnable = aIndentEnable;
mIndentColSize = aColSize;
mIndentTabSize = aTabSize;
mIndentUseTabs = aUseTabs;
mAutowrapEnable = PR_TRUE;
mAutoWrapColWidth = 72;
for (PRUint32 i = 0; i < NS_HTML_TAG_MAX; i++)
mTagFormat[i].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_a].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_abbr].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_applet].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_area].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_b].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_FALSE);
mTagFormat[eHTMLTag_base].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_blockquote].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_body].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_br].Init(PR_FALSE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_caption].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_center].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_dd].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_dir].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_div].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_dl].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_dt].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_embed].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_form].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_frame].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_frameset].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h1].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h2].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h3].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h4].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h5].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_h6].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_head].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_hr].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_html].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_ilayer].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_input].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_isindex].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_layer].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_li].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_link].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_map].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_menu].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_meta].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_object].Init(PR_FALSE,PR_TRUE,PR_TRUE,PR_FALSE);
mTagFormat[eHTMLTag_ol].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_option].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_p].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_param].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_pre].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_script].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_select].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_style].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_table].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
mTagFormat[eHTMLTag_td].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_textarea].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_th].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_title].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_tr].Init(PR_TRUE,PR_FALSE,PR_FALSE,PR_TRUE);
mTagFormat[eHTMLTag_ul].Init(PR_TRUE,PR_TRUE,PR_TRUE,PR_TRUE);
}
static PRBool IsInline(eHTMLTags aTag);
static PRBool IsBlockLevel(eHTMLTags aTag);
static PRInt32 BreakBeforeOpen(eHTMLTags aTag);
static PRInt32 BreakAfterOpen(eHTMLTags aTag);
static PRInt32 BreakBeforeClose(eHTMLTags aTag);
static PRInt32 BreakAfterClose(eHTMLTags aTag);
static PRBool IndentChildren(eHTMLTags aTag);
static PRBool PreformattedChildren(eHTMLTags aTag);
static PRBool PermitWSBeforeOpen(eHTMLTags aTag);
#ifdef OBSOLETE
static PRBool EatOpen(eHTMLTags aTag);
static PRBool EatClose(eHTMLTags aTag);
static PRBool PermitWSAfterOpen(eHTMLTags aTag);
static PRBool PermitWSBeforeClose(eHTMLTags aTag);
static PRBool PermitWSAfterClose(eHTMLTags aTag);
static PRBool IgnoreWS(eHTMLTags aTag);
#endif // OBSOLETE
/**
* This method gets called as part of our COM-like interfaces.
* Its purpose is to create an interface to parser object
* of some type.
*
* @update gess 4/8/98
* @param nsIID id of object to discover
* @param aInstancePtr ptr to newly discovered interface
* @return NS_xxx result code
*/
nsresult
nsHTMLContentSinkStream::QueryInterface(const nsIID& aIID, void** aInstancePtr)
{
if (NULL == aInstancePtr) {
return NS_ERROR_NULL_POINTER;
}
if(aIID.Equals(kISupportsIID)) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if(aIID.Equals(kIContentSinkIID)) {
*aInstancePtr = (nsIContentSink*)(this);
}
else if(aIID.Equals(kIHTMLContentSinkIID)) {
*aInstancePtr = (nsIHTMLContentSink*)(this);
}
else {
*aInstancePtr=0;
return NS_NOINTERFACE;
}
NS_ADDREF_THIS();
return NS_OK;
}
NS_IMPL_ADDREF(nsHTMLContentSinkStream)
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
/**
* Create an new sink
*
* @update gpk 05/01/99
* @return NS_xxx error result
*/
NS_HTMLPARS nsresult
NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult,
nsIOutputStream* aOutStream,
const nsString* aCharsetOverride,
PRUint32 aFlags)
{
nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(aOutStream,
nsnull,
aCharsetOverride,
aFlags);
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Create an new sink
*
* @update gpk 05/01/99
* @return NS_xxx error result
*/
NS_HTMLPARS nsresult
NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult,
nsString* aOutString,
PRUint32 aFlags)
{
nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(nsnull,
aOutString,
nsnull,
aFlags);
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Inits the encoder instance variable for the sink based on the charset
*
* @update gpk 4/21/99
* @param aCharset
* @return NS_xxx error result
*/
nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
{
nsresult res = NS_OK;
nsICharsetAlias* calias = nsnull;
res = nsServiceManager::GetService(kCharsetAliasCID,
kICharsetAliasIID,
(nsISupports**)&calias);
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
nsAutoString charsetName = aCharset;
if( NS_SUCCEEDED(res) && (nsnull != calias))
{
res = calias->GetPreferred(aCharset, charsetName);
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
if(NS_FAILED(res))
{
// failed - unknown alias , fallback to ISO-8859-1
charsetName = "ISO-8859-1";
}
nsICharsetConverterManager * ccm = nsnull;
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
nsCOMTypeInfo<nsICharsetConverterManager>::GetIID(),
(nsISupports**)&ccm);
if(NS_SUCCEEDED(res) && (nsnull != ccm))
{
nsIUnicodeEncoder * encoder = nsnull;
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
if(NS_SUCCEEDED(res) && (nsnull != encoder))
{
NS_IF_RELEASE(mUnicodeEncoder);
mUnicodeEncoder = encoder;
}
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
}
}
return res;
}
/**
* Construct a content sink stream.
* @update gess7/7/98
* @param
* @return
*/
nsHTMLContentSinkStream::nsHTMLContentSinkStream(nsIOutputStream* aOutStream,
nsString* aOutString,
const nsString* aCharsetOverride,
PRUint32 aFlags)
{
NS_INIT_REFCNT();
mLowerCaseTags = PR_TRUE;
memset(mHTMLTagStack,0,sizeof(mHTMLTagStack));
mHTMLStackPos = 0;
mColPos = 0;
mIndent = 0;
mDoFormat = (aFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE
: PR_FALSE;
mBodyOnly = (aFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
: PR_FALSE;
mDoHeader = (!mBodyOnly) && (mDoFormat) &&
((aFlags & nsIDocumentEncoder::OutputNoDoctype) ? PR_FALSE
: PR_TRUE);
mBuffer = nsnull;
mBufferSize = 0;
mUnicodeEncoder = nsnull;
mStream = aOutStream;
mString = aOutString;
mInBody = PR_FALSE;
if (aCharsetOverride != nsnull)
mCharsetOverride = *aCharsetOverride;
}
/**
* This method tells the sink whether or not it is
* encoding an HTML fragment or the whole document.
* By default, the entire document is encoded.
*
* @update 03/14/99 gpk
* @param aFlag set to true if only encoding a fragment
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::DoFragment(PRBool aFlag)
{
return NS_OK;
}
/**
* This gets called when handling illegal contents, especially
* in dealing with tables. This method creates a new context.
*
* @update 04/04/99 harishd
* @param aPosition - The position from where the new context begins.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::BeginContext(PRInt32 aPosition)
{
return NS_OK;
}
/**
* This method terminates any new context that got created by
* BeginContext and switches back to the main context.
*
* @update 04/04/99 harishd
* @param aPosition - Validates the end of a context.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::EndContext(PRInt32 aPosition)
{
return NS_OK;
}
void nsHTMLContentSinkStream::EnsureBufferSize(PRInt32 aNewSize)
{
if (mBufferSize < aNewSize)
{
delete [] mBuffer;
mBufferSize = 2*aNewSize+1; // make the twice as large
mBuffer = new char[mBufferSize];
if(mBuffer){
mBuffer[0] = 0;
}
}
}
/*
* Entities are represented in the dom as single elements.
* Substitute them back into entity for (e.g. &acute;) here.
*/
void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc,
nsString& aDst)
{
PRInt32 length = aSrc.Length();
PRUnichar ch;
if (mUnicodeEncoder == nsnull)
InitEncoder("");
if (length > 0)
{
// Convert anything that maps to character entity
// to the entity value
EnsureBufferSize(length);
for (PRInt32 i = 0; i < length; i++)
{
ch = aSrc.CharAt(i);
const nsCString& entity = nsHTMLEntities::UnicodeToEntity(ch);
if (0 < entity.Length())
{
aDst.Append('&');
aDst.Append(entity);
aDst.Append(';');
}
else
{
aDst.Append(ch);
}
}
}
}
void nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc)
{
nsString htmlstr;
UnicodeToHTMLString(aSrc, htmlstr);
NS_VERIFY(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized");
if (mUnicodeEncoder == nsnull)
return;
PRInt32 length = htmlstr.Length();
nsresult result;
if (mUnicodeEncoder != nsnull && length > 0)
{
EnsureBufferSize(length);
mBufferLength = mBufferSize;
mUnicodeEncoder->Reset();
result = mUnicodeEncoder->Convert(htmlstr.GetUnicode(), &length,
mBuffer, &mBufferLength);
mBuffer[mBufferLength] = 0;
PRInt32 temp = mBufferLength;
if (NS_SUCCEEDED(result))
result = mUnicodeEncoder->Finish(mBuffer,&temp);
#if 0
// Do some conversions to make up for the unicode encoder's foibles:
PRInt32 nbsp = nsHTMLEntities::EntityToUnicode(nsCAutoString("nbsp"));
PRInt32 quot = nsHTMLEntities::EntityToUnicode(nsCAutoString("quot"));
for (PRInt32 i = 0; i < mBufferLength; i++)
{
if (mBuffer[i] == quot)
mBuffer[i] = '"';
// I don't know why this nbsp mapping was here ...
else if (mBuffer[i] == nbsp)
mBuffer[i] = ' ';
}
#endif
}
}
void nsHTMLContentSinkStream::Write(const nsString& aString)
{
if (mBodyOnly && !mInBody)
return;
// No need to re-encode strings, since they're going from UCS2 to UCS2
if (mString)
mString->Append(aString);
if (!mStream)
return;
// Now handle the stream case:
nsOutputStream out(mStream);
// If a encoder is being used then convert first convert the input string
if (mUnicodeEncoder)
{
EncodeToBuffer(aString);
out.write(mBuffer, mBufferLength);
}
// else just write the unicode
else
{
const PRUnichar* unicode = aString.GetUnicode();
out.write(unicode, aString.Length());
}
}
void nsHTMLContentSinkStream::Write(const char* aData)
{
if (mBodyOnly && !mInBody)
return;
if (mStream)
{
nsOutputStream out(mStream);
out << aData;
}
if (mString)
{
mString->Append(aData);
}
}
void nsHTMLContentSinkStream::Write(char aData)
{
if (mBodyOnly && !mInBody)
return;
if (mStream)
{
nsOutputStream out(mStream);
out << aData;
}
if (mString)
{
mString->Append(aData);
}
}
/**
*
* @update 04/30/99 gpk
* @param
* @return
*/
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
NS_IF_RELEASE(mUnicodeEncoder);
}
/**
*
* @update gess7/7/98
* @param
* @return
*/
void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode) {
int theCount=aNode.GetAttributeCount();
if(theCount) {
int i=0;
for(i=0;i<theCount;i++){
const nsString& temp=aNode.GetKeyAt(i);
nsString key = temp;
if (mLowerCaseTags == PR_TRUE)
key.ToLowerCase();
else
key.ToUpperCase();
EnsureBufferSize(key.Length());
key.ToCString(mBuffer,mBufferSize);
// send to ouput " [KEY]="
Write(' ');
Write(mBuffer);
mColPos += 1 + strlen(mBuffer) + 1;
// See if there's an attribute:
const nsString& value=aNode.GetValueAt(i);
if (value.Length() > 0)
{
Write(char(kEqual));
mColPos += 1 + strlen(mBuffer) + 1;
// send to ouput "\"[VALUE]\""
Write('\"');
Write(value);
Write('\"');
}
mColPos += 1 + strlen(mBuffer) + 1;
}
}
}
/**
* This method gets called by the parser when it encounters
* a title tag and wants to set the document title in the sink.
*
* @update 04/30/99 gpk
* @param nsString reference to new title value
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::SetTitle(const nsString& aValue){
const char* tagName = GetTagName(eHTMLTag_title);
Write(kLessThan);
Write(tagName);
Write(kGreaterThan);
Write(aValue);
Write(kLessThan);
Write(kForwardSlash);
Write(tagName);
Write(kGreaterThan);
return NS_OK;
}
// XXX OpenHTML never gets called; AddStartTag gets called on
// XXX the html tag from OpenContainer, from nsXIFDTD::StartTopOfStack,
// XXX from nsXIFDTD::HandleStartToken.
/**
* This method is used to open the outer HTML container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenHTML(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_html)
AddStartTag(aNode);
return NS_OK;
}
/**
* This method is used to close the outer HTML container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseHTML(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_html)
AddEndTag(aNode);
return NS_OK;
}
/**
* This method is used to open the only HEAD container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenHead(const nsIParserNode& aNode)
{
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_head)
AddStartTag(aNode);
return NS_OK;
}
/**
* This method is used to close the only HEAD container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseHead(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_head)
AddEndTag(aNode);
return NS_OK;
}
/**
* This method is used to open the main BODY container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenBody(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_body)
AddStartTag(aNode);
return NS_OK;
}
/**
* This method is used to close the main BODY container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseBody(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_body)
AddEndTag(aNode);
return NS_OK;
}
/**
* This method is used to open a new FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenForm(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_form)
AddStartTag(aNode);
return NS_OK;
}
/**
* This method is used to close the outer FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseForm(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_form)
AddEndTag(aNode);
return NS_OK;
}
/**
* This method is used to open a new FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenMap(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_map)
AddStartTag(aNode);
return NS_OK;
}
/**
* This method is used to close the outer FORM container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseMap(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_map)
AddEndTag(aNode);
return NS_OK;
}
/**
* This method is used to open the FRAMESET container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenFrameset(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_frameset)
AddStartTag(aNode);
return NS_OK;
}
/**
* This method is used to close the FRAMESET container.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseFrameset(const nsIParserNode& aNode){
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
if (tag == eHTMLTag_frameset)
AddEndTag(aNode);
return NS_OK;
}
void nsHTMLContentSinkStream::AddIndent()
{
nsString padding(" ");
for (PRInt32 i = mIndent; --i >= 0; )
{
Write(padding);
mColPos += 2;
}
}
void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode)
{
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
nsString tagName;
if (tag == eHTMLTag_body)
mInBody = PR_TRUE;
mHTMLTagStack[mHTMLStackPos++] = tag;
tagName = name;
if (mLowerCaseTags == PR_TRUE)
tagName.ToLowerCase();
else
tagName.ToUpperCase();
if ((mDoFormat || !mInBody) && mColPos != 0 && BreakBeforeOpen(tag))
{
Write(NS_LINEBREAK);
mColPos = 0;
}
if ((mDoFormat || !mInBody) && PermitWSBeforeOpen(tag))
AddIndent();
EnsureBufferSize(tagName.Length());
tagName.ToCString(mBuffer,mBufferSize);
Write(kLessThan);
Write(mBuffer);
mColPos += 1 + tagName.Length();
if (mDoFormat && tag == eHTMLTag_style)
{
Write(kGreaterThan);
Write(NS_LINEBREAK);
const nsString& data = aNode.GetSkippedContent();
PRInt32 size = data.Length();
char* buffer = new char[size+1];
if(buffer){
data.ToCString(buffer,size+1);
Write(buffer);
delete[] buffer;
}
}
else
{
WriteAttributes(aNode);
Write(kGreaterThan);
mColPos += 1;
}
if ((mDoFormat && BreakAfterOpen(tag)) || (tag == eHTMLTag_pre))
{
Write(NS_LINEBREAK);
mColPos = 0;
}
if (IndentChildren(tag))
mIndent++;
if (tag == eHTMLTag_head)
{
if(mDoHeader)
{
Write(gHeaderComment);
Write(NS_LINEBREAK);
Write(gDocTypeHeader);
Write(NS_LINEBREAK);
}
}
}
void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode)
{
eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
// const nsString& name = aNode.GetText();
nsAutoString tagName;
if (tag == eHTMLTag_unknown)
{
tagName = aNode.GetText();
}
else if (tag == eHTMLTag_comment)
{
tagName = "--";
}
else
{
tagName = nsHTMLTags::GetStringValue(tag);
}
if (mLowerCaseTags == PR_TRUE)
tagName.ToLowerCase();
else
tagName.ToUpperCase();
if (IndentChildren(tag))
mIndent--;
if ((mDoFormat || !mInBody) && BreakBeforeClose(tag))
{
if (mColPos != 0)
{
Write(NS_LINEBREAK);
mColPos = 0;
}
AddIndent();
}
EnsureBufferSize(tagName.Length());
tagName.ToCString(mBuffer,mBufferSize);
if (tag != eHTMLTag_comment)
{
Write(kLessThan);
Write(kForwardSlash);
mColPos += 1 + 1;
}
Write(mBuffer);
Write(kGreaterThan);
mColPos += strlen(mBuffer) + 1;
if (tag == eHTMLTag_body)
mInBody = PR_FALSE;
if ((mDoFormat || !mInBody) && BreakAfterClose(tag))
{
Write(NS_LINEBREAK);
mColPos = 0;
}
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
/**
* This gets called by the parser when you want to add
* a leaf node to the current container in the content
* model.
*
* @updated gpk 06/18/98
* @param
* @return
*/
nsresult
nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
eHTMLTags tag = eHTMLTag_unknown;
if (mHTMLStackPos > 0)
tag = mHTMLTagStack[mHTMLStackPos-1];
PRBool preformatted = PR_FALSE;
for (PRInt32 i = mHTMLStackPos-1; i >= 0; i--)
{
preformatted |= PreformattedChildren(mHTMLTagStack[i]);
if (preformatted)
break;
}
if (type == eHTMLTag_br ||
type == eHTMLTag_hr ||
type == eHTMLTag_meta ||
type == eHTMLTag_style)
{
AddStartTag(aNode);
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
else if (type == eHTMLTag_entity)
{
const nsString& entity = aNode.GetText();
EncodeToBuffer(entity);
Write('&');
Write(mBuffer);
Write(';');
mColPos += entity.Length() + 2;
}
else if (type == eHTMLTag_text)
{
const nsString& text = aNode.GetText();
if (!mDoFormat || preformatted)
{
Write(text);
mColPos += text.Length();
}
else
{
PRInt32 mMaxColumn = 72;
// 1. Determine the length of the input string
PRInt32 length = text.Length();
// 2. If the offset plus the length of the text is smaller
// than the max then just add it
if (mColPos + length < mMaxColumn)
{
Write(text);
mColPos += text.Length();
}
else
{
nsString str = text;
PRBool done = PR_FALSE;
PRInt32 indx = 0;
PRInt32 offset = mColPos;
while (!done)
{
// find the next break
PRInt32 start = mMaxColumn-offset;
if (start < 0)
start = 0;
indx = str.FindChar(' ',PR_FALSE,start);
// if there is no break than just add it
if (indx == kNotFound)
{
Write(str);
mColPos += str.Length();
done = PR_TRUE;
}
else
{
// make first equal to the str from the
// beginning to the index
nsString first = str;
first.Truncate(indx);
Write(first);
Write(NS_LINEBREAK);
mColPos = 0;
// cut the string from the beginning to the index
str.Cut(0,indx);
offset = 0;
}
}
}
}
}
else if (type == eHTMLTag_whitespace)
{
if (!mDoFormat || preformatted)
{
const nsString& text = aNode.GetText();
Write(text);
mColPos += text.Length();
}
}
else if (type == eHTMLTag_newline)
{
if (!mDoFormat || preformatted)
{
Write(NS_LINEBREAK);
mColPos = 0;
}
}
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a PI node to the current container in the content
* model.
*
* @updated gess 3/25/98
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::AddProcessingInstruction(const nsIParserNode& aNode){
#ifdef VERBOSE_DEBUG
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
return NS_OK;
}
/**
* This gets called by the parser when it encounters
* a DOCTYPE declaration in the HTML document.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMode)
{
#ifdef VERBOSE_DEBUG
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a comment node to the current container in the content
* model.
*
* @updated gess 3/25/98
* @param
* @return
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
#ifdef VERBOSE_DEBUG
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
Write("<!--");
return NS_OK;
}
/**
* This method is used to a general container.
* This includes: OL,UL,DIR,SPAN,TABLE,H[1..6],etc.
*
* @update 07/12/98 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
const nsString& name = aNode.GetText();
if (name.Equals("XIF_DOC_INFO"))
{
PRInt32 count=aNode.GetAttributeCount();
for(PRInt32 i=0;i<count;i++)
{
const nsString& key=aNode.GetKeyAt(i);
const nsString& value=aNode.GetValueAt(i);
if (key.Equals("charset"))
{
if (mCharsetOverride.Length() == 0)
InitEncoder(value);
else
InitEncoder(mCharsetOverride);
}
}
}
else
{
AddStartTag(aNode);
}
return NS_OK;
}
/**
* This method is used to close a generic container.
*
* @update 04/30/99 gpk
* @param nsIParserNode reference to parser node interface
* @return PR_TRUE if successful.
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::CloseContainer(const nsIParserNode& aNode){
AddEndTag(aNode);
return NS_OK;
}
/**
* This method gets called when the parser begins the process
* of building the content model via the content sink.
*
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::WillBuildModel(void)
{
mTabLevel=-1;
return NS_OK;
}
/**
* This method gets called when the parser concludes the process
* of building the content model via the content sink.
*
* @param aQualityLevel describes how well formed the doc was.
* 0=GOOD; 1=FAIR; 2=POOR;
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::DidBuildModel(PRInt32 aQualityLevel) {
return NS_OK;
}
/**
* This method gets called when the parser gets i/o blocked,
* and wants to notify the sink that it may be a while before
* more data is available.
*
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::WillInterrupt(void) {
return NS_OK;
}
/**
* This method gets called when the parser i/o gets unblocked,
* and we're about to start dumping content again to the sink.
*
* @update 5/7/98 gess
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::WillResume(void) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLContentSinkStream::SetParser(nsIParser* aParser) {
return NS_OK;
}
NS_IMETHODIMP
nsHTMLContentSinkStream::NotifyError(const nsParserError* aError)
{
return NS_OK;
}
/**
* **** Pretty Printing Methods ******
*
*/
PRBool IsInline(eHTMLTags aTag)
{
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_a:
case eHTMLTag_address:
case eHTMLTag_big:
case eHTMLTag_blink:
case eHTMLTag_b:
case eHTMLTag_br:
case eHTMLTag_cite:
case eHTMLTag_code:
case eHTMLTag_dfn:
case eHTMLTag_em:
case eHTMLTag_font:
case eHTMLTag_img:
case eHTMLTag_i:
case eHTMLTag_kbd:
case eHTMLTag_keygen:
case eHTMLTag_nobr:
case eHTMLTag_samp:
case eHTMLTag_small:
case eHTMLTag_spacer:
case eHTMLTag_span:
case eHTMLTag_strike:
case eHTMLTag_strong:
case eHTMLTag_sub:
case eHTMLTag_sup:
case eHTMLTag_td:
case eHTMLTag_textarea:
case eHTMLTag_tt:
case eHTMLTag_var:
case eHTMLTag_wbr:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
PRBool IsBlockLevel(eHTMLTags aTag)
{
return !IsInline(aTag);
}
/**
* Desired line break state before the open tag.
*/
PRBool BreakBeforeOpen(eHTMLTags aTag) {
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
result = PR_FALSE;
break;
default:
result = IsBlockLevel(aTag);
}
return result;
}
/**
* Desired line break state after the open tag.
*/
PRBool BreakAfterOpen(eHTMLTags aTag) {
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
case eHTMLTag_body:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_table:
case eHTMLTag_tbody:
case eHTMLTag_style:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
/**
* Desired line break state before the close tag.
*/
PRBool BreakBeforeClose(eHTMLTags aTag) {
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
case eHTMLTag_head:
case eHTMLTag_body:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_table:
case eHTMLTag_tbody:
case eHTMLTag_style:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
/**
* Desired line break state after the close tag.
*/
PRBool BreakAfterClose(eHTMLTags aTag) {
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
result = PR_TRUE;
break;
default:
result = IsBlockLevel(aTag);
}
return result;
}
/**
* Indent/outdent when the open/close tags are encountered.
* This implies that BreakAfterOpen() and BreakBeforeClose()
* are true no matter what those methods return.
*/
PRBool IndentChildren(eHTMLTags aTag) {
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_table:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_tbody:
case eHTMLTag_form:
case eHTMLTag_frameset:
result = PR_TRUE;
break;
default:
result = PR_FALSE;
break;
}
return result;
}
/**
* All tags after this tag and before the closing tag will be output with no
* formatting.
*/
PRBool PreformattedChildren(eHTMLTags aTag) {
PRBool result = PR_FALSE;
if (aTag == eHTMLTag_pre)
{
result = PR_TRUE;
}
return result;
}
/**
* Are we allowed to insert new white space before the open tag.
*
* Returning false does not prevent inserting WS
* before the tag if WS insertion is allowed for another reason,
* e.g. there is already WS there or we are after a tag that
* has PermitWSAfter*().
*/
PRBool PermitWSBeforeOpen(eHTMLTags aTag) {
PRBool result = IsInline(aTag) == PR_FALSE;
return result;
}
#ifdef OBSOLETE
/**
* Eat the open tag. Pretty much just for <P*>.
*/
PRBool EatOpen(eHTMLTags aTag) {
return PR_FALSE;
}
/**
* Eat the close tag. Pretty much just for </P>.
*/
PRBool EatClose(eHTMLTags aTag) {
return PR_FALSE;
}
/** @see PermitWSBeforeOpen */
PRBool PermitWSAfterOpen(eHTMLTags aTag) {
if (aTag == eHTMLTag_pre)
{
return PR_FALSE;
}
return PR_TRUE;
}
/** @see PermitWSBeforeOpen */
PRBool PermitWSBeforeClose(eHTMLTags aTag) {
if (aTag == eHTMLTag_pre)
{
return PR_FALSE;
}
return PR_TRUE;
}
/** @see PermitWSBeforeOpen */
PRBool PermitWSAfterClose(eHTMLTags aTag) {
return PR_TRUE;
}
/** @see PermitWSBeforeOpen */
PRBool IgnoreWS(eHTMLTags aTag) {
PRBool result = PR_FALSE;
switch (aTag)
{
case eHTMLTag_html:
case eHTMLTag_head:
case eHTMLTag_body:
case eHTMLTag_ul:
case eHTMLTag_ol:
case eHTMLTag_li:
case eHTMLTag_table:
case eHTMLTag_tbody:
case eHTMLTag_style:
result = PR_TRUE;
break;
default:
break;
}
return result;
}
#endif /* OBSOLETE */