Added support to encode the document as either a nsString or an nsIOutputStream.

In the case of the stream, the document is encoded using the document defined charset
or the user may override the charset.

Added constructors for creating a sink using a nsIOutputStream or an nsString.

Removed all HTML and TEXT encoding which relied on references to ostream. Using John McMullen's
platform independent streaming code in its place. :)


git-svn-id: svn://10.0.0.236/trunk@29963 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
kostello%netscape.com
1999-05-03 18:41:23 +00:00
parent 6bff10b32e
commit 472d53575b
8 changed files with 960 additions and 934 deletions

View File

@@ -39,6 +39,8 @@
#include "nsICharsetAlias.h"
#include "nsIServiceManager.h"
#include "nsICharsetConverterManager.h"
#include "nsIOutputStream.h"
#include "nsFileStream.h"
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
@@ -60,7 +62,19 @@ static PRBool IsBlockLevel(eHTMLTags aTag);
nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
{
nsresult res = NS_OK;
// If the converter is ucs2, then do not use a converter
nsString ucs2("ucs2");
if (aCharset.Equals(ucs2))
{
NS_IF_RELEASE(mUnicodeEncoder);
return res;
}
nsICharsetAlias* calias = nsnull;
res = nsServiceManager::GetService(kCharsetAliasCID,
@@ -141,58 +155,71 @@ NS_IMPL_RELEASE(nsHTMLToTXTSinkStream)
/**
* This method is defined in nsIParser. It is used to
* cause the COM-like construction of an nsParser.
* This method creates a new sink, it sets the stream used
* for the sink to aStream
*
* @update gpk02/03/99
* @param nsIParser** ptr to newly instantiated parser
* @return NS_xxx error result
* @update gpk 04/30/99
*/
NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult) {
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream();
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
nsIOutputStream* aStream,
const nsString* aCharsetOverride) {
NS_ASSERTION(aStream != nsnull, "a valid stream is required");
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(aStream,nsnull);
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
if (aCharsetOverride != nsnull)
it->SetCharsetOverride(aCharsetOverride);
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
* This method creates a new sink, it sets the stream used
* for the sink to aStream
*
* @update gpk 04/30/99
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
NS_INIT_REFCNT();
mOutput=&cout;
mColPos = 0;
mIndent = 0;
mDoOutput = PR_FALSE;
mBufferSize = 0;
mBuffer = nsnull;
mUnicodeEncoder = nsnull;
NS_HTMLPARS nsresult
NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult,
nsString* aString) {
NS_ASSERTION(aString != nsnull, "a valid stream is required");
nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull,aString);
if (nsnull == it) {
return NS_ERROR_OUT_OF_MEMORY;
}
nsString ucs2("ucs2");
it->SetCharsetOverride(&ucs2);
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
/**
* Construct a content sink stream.
* @update gpk02/03/99
* @param
* @return
*/
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString* aString) {
NS_INIT_REFCNT();
mOutput = &aStream;
mStream = aStream;
mColPos = 0;
mIndent = 0;
mDoOutput = PR_FALSE;
mBufferSize = 0;
mBufferLength = 0;
mBuffer = nsnull;
mUnicodeEncoder = nsnull;
mStream = aStream;
mString = aString;
}
/**
*
* @update gpk02/03/99
@@ -200,7 +227,6 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
* @return
*/
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
mOutput=0; //we don't own the stream we're given; just forget it.
delete [] mBuffer;
NS_IF_RELEASE(mUnicodeEncoder);
}
@@ -208,50 +234,24 @@ nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
/**
*
* @update gpk02/03/99
* @update gpk04/30/99
* @param
* @return
*/
NS_IMETHODIMP_(void)
nsHTMLToTXTSinkStream::SetOutputStream(ostream& aStream){
mOutput=&aStream;
NS_IMETHODIMP
nsHTMLToTXTSinkStream::SetCharsetOverride(const nsString* aCharset)
{
if (aCharset)
{
mCharsetOverride = *aCharset;
InitEncoder(mCharsetOverride);
}
return NS_OK;
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void OpenTag(const char* theTag,int tab,ostream& aStream,PRBool aNewline) {
}
/**
*
* @update gpk02/03/99
* @param
* @return
*/
static
void CloseTag(const char* theTag,int tab,ostream& aStream) {
}
/**
* This method gets called by the parser when it encounters
* a title tag and wants to set the document title in the sink.
@@ -463,38 +463,38 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize)
mBufferSize = 2*aNewSize+1; // make the twice as large
mBuffer = new char[mBufferSize];
mBuffer[0] = 0;
mBufferLength = 0;
}
}
void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc)
void nsHTMLToTXTSinkStream::EncodeToBuffer(const nsString& aSrc)
{
NS_ASSERTION(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized");
if (mUnicodeEncoder == nsnull)
return;
#define CH_NBSP 160
PRInt32 length = aSrc.Length();
nsresult result;
PRInt32 bufferLength;
if (mUnicodeEncoder == nsnull)
InitEncoder("");
if (length > 0)
if (mUnicodeEncoder != nsnull && length > 0)
{
EnsureBufferSize(length);
bufferLength = mBufferSize;
mBufferLength = mBufferSize;
mUnicodeEncoder->Reset();
result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &bufferLength);
mBuffer[bufferLength] = 0;
PRInt32 temp = bufferLength;
result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &mBufferLength);
mBuffer[mBufferLength] = 0;
PRInt32 temp = mBufferLength;
if (NS_SUCCEEDED(result))
result = mUnicodeEncoder->Finish(mBuffer,&temp);
for (PRInt32 i = 0; i < bufferLength; i++)
for (PRInt32 i = 0; i < mBufferLength; i++)
{
if (mBuffer[i] == char(CH_NBSP))
mBuffer[i] = ' ';
@@ -504,76 +504,47 @@ void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc)
}
NS_IMETHODIMP
nsHTMLToTXTSinkStream::GetStringBuffer(nsString & aStrBuffer)
{
aStrBuffer = mStrBuffer;
return NS_OK;
}
/**
* This gets called by the parser when you want to add
* a leaf node to the current container in the content
* model.
* Write places the contents of aString into either the output stream
* or the output string.
* When going to the stream, all data is run through the encoder
*
* @updated gpk 06/18/98
* @updated gpk02/03/99
* @param
* @return
*/
nsresult
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
void nsHTMLToTXTSinkStream::Write(const nsString& aString)
{
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& text = aNode.GetText();
if (mDoOutput == PR_FALSE)
return NS_OK;
if (type == eHTMLTag_text) {
UnicodeToTXTString(text);
aStream << mBuffer;
mStrBuffer.Append(mBuffer);
mColPos += text.Length();
}
else if (type == eHTMLTag_entity)
// If a encoder is being used then convert first convert the input string
if (mUnicodeEncoder != nsnull)
{
const nsString& text = aNode.GetText();
UnicodeToTXTString(text);
PRInt32 entity = NS_EntityToUnicode(mBuffer);
if (entity < 256)
EncodeToBuffer(aString);
if (mStream != nsnull)
{
char ch = (char)entity;
aStream << ch;
mColPos++;
nsOutputStream out(mStream);
out.write(mBuffer,mBufferLength);
}
if (mString != nsnull)
{
mString->Append(mBuffer);
}
}
else if (type == eHTMLTag_whitespace)
else
{
if (PR_TRUE)
if (mStream != nsnull)
{
const nsString& text = aNode.GetText();
UnicodeToTXTString(text);
aStream << mBuffer;
mStrBuffer.Append(mBuffer);
mColPos += text.Length();
nsOutputStream out(mStream);
const PRUnichar* unicode = aString.GetUnicode();
PRUint32 length = aString.Length();
out.write(unicode,length);
}
else
{
mString->Append(aString);
}
}
else if (type == eHTMLTag_br)
{
if (PR_TRUE)
{
aStream << endl;
mStrBuffer.Append("\n");
mColPos += 1;
}
}
return NS_OK;
}
@@ -628,7 +599,10 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
const nsString& value=aNode.GetValueAt(i);
if (key.Equals("charset"))
InitEncoder(value);
{
if (mCharsetOverride.Length() == 0)
InitEncoder(value);
}
}
}
@@ -657,9 +631,8 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
{
if (mColPos != 0)
{
if (mOutput)
*mOutput << endl;
mStrBuffer.Append("\n");
nsString temp("\n");
Write(temp);
mColPos = 0;
}
}
@@ -677,11 +650,53 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){
*/
NS_IMETHODIMP
nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){
nsresult result = NS_OK;
if(mOutput) {
result = AddLeaf(aNode,*mOutput);
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& text = aNode.GetText();
if (mDoOutput == PR_FALSE)
return NS_OK;
if (type == eHTMLTag_text) {
Write(text);
mColPos += text.Length();
}
else if (type == eHTMLTag_entity)
{
const nsString& text = aNode.GetText();
EncodeToBuffer(text);
PRUnichar entity = NS_EntityToUnicode(mBuffer);
nsString temp;
temp.Append(entity);
Write(temp);
mColPos++;
}
return result;
else if (type == eHTMLTag_whitespace)
{
if (PR_TRUE)
{
const nsString& text = aNode.GetText();
Write(text);
mColPos += text.Length();
}
}
else if (type == eHTMLTag_br)
{
nsString temp("\n");
Write(text);
mColPos++;
}
else if (type == eHTMLTag_newline)
{
nsString temp("\n");
Write(text);
mColPos++;
}
return NS_OK;
}