diff --git a/mozilla/htmlparser/src/nsHTMLContentSinkStream.cpp b/mozilla/htmlparser/src/nsHTMLContentSinkStream.cpp
index c7743085442..ac20b9d799b 100644
--- a/mozilla/htmlparser/src/nsHTMLContentSinkStream.cpp
+++ b/mozilla/htmlparser/src/nsHTMLContentSinkStream.cpp
@@ -36,6 +36,14 @@
#include "nsIParser.h"
#include "nsHTMLEntities.h"
+
+
+#include "nsIUnicodeEncoder.h"
+#include "nsICharsetAlias.h"
+#include "nsIServiceManager.h"
+#include "nsICharsetConverterManager.h"
+
+
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
@@ -343,6 +351,58 @@ NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult,
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
+
+
+/**
+ * Inits the encoder instance variable for the sink based on the charset
+ *
+ * @update gpk 4/21/99
+ * @param aCharset
+ * @return NS_xxx error result
+ */
+nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
+{
+
+ nsresult res = NS_OK;
+
+ nsICharsetAlias* calias = nsnull;
+ res = nsServiceManager::GetService(kCharsetAliasCID,
+ kICharsetAliasIID,
+ (nsISupports**)&calias);
+
+ NS_ASSERTION( nsnull != calias, "cannot find charet alias");
+ nsAutoString charsetName = aCharset;
+ if( NS_SUCCEEDED(res) && (nsnull != calias))
+ {
+ res = calias->GetPreferred(aCharset, charsetName);
+ nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
+
+ if(NS_FAILED(res))
+ {
+ // failed - unknown alias , fallback to ISO-8859-1
+ charsetName = "ISO-8859-1";
+ }
+
+ nsICharsetConverterManager * ccm = nsnull;
+ res = nsServiceManager::GetService(kCharsetConverterManagerCID,
+ kICharsetConverterManagerIID,
+ (nsISupports**)&ccm);
+ if(NS_SUCCEEDED(res) && (nsnull != ccm))
+ {
+ nsIUnicodeEncoder * encoder = nsnull;
+ res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
+ if(NS_SUCCEEDED(res) && (nsnull != encoder))
+ {
+ NS_IF_RELEASE(mUnicodeEncoder);
+ mUnicodeEncoder = encoder;
+ }
+ nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
+ }
+ }
+ return res;
+}
+
+
/**
* Construct a content sink stream.
* @update gess7/7/98
@@ -361,6 +421,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHead
mDoHeader = aDoHeader;
mBuffer = nsnull;
mBufferSize = 0;
+ mUnicodeEncoder = nsnull;
}
/**
@@ -381,6 +442,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoForm
mDoHeader = aDoHeader;
mBuffer = nsnull;
mBufferSize = 0;
+ mUnicodeEncoder = nsnull;
}
@@ -443,9 +505,16 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
const char* entity = nsnull;
PRUint32 offset = 0;
PRUint32 addedLength = 0;
+ nsAutoString data;
+
+
+ if (mUnicodeEncoder == nsnull)
+ InitEncoder("");
if (length > 0)
{
+ // Step 1. Convert anything that maps to character entity to
+ // the entity value
EnsureBufferSize(length);
for (PRInt32 i = 0; i < length; i++)
{
@@ -454,29 +523,31 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
entity = UnicodeToEntity(ch);
if (entity)
{
- PRUint32 size = strlen(entity);
- addedLength += size;
- EnsureBufferSize(length+addedLength+1);
- mBuffer[offset++] = '&';
- mBuffer[offset] = 0;
- strcat(mBuffer,entity);
-
- PRUint32 temp = offset + size;
- while (offset < temp)
- {
- mBuffer[offset] = tolower(mBuffer[offset]);
- offset++;
- }
- mBuffer[offset++] = ';';
- mBuffer[offset] = 0;
+ nsAutoString temp(entity);
+
+ temp.ToLowerCase();
+ data.Append('&');
+ data.Append(temp);
+ data.Append(';');
}
- else if (ch < 128)
+ else
{
- mBuffer[offset++] = (unsigned char)ch;
- mBuffer[offset] = 0;
+ data.Append(ch);
}
}
- }
+
+ // Step 2. Run the result through the converter
+ length = data.Length();
+ EnsureBufferSize(length);
+ PRInt32 bufferLength = mBufferSize;
+
+ mUnicodeEncoder->Reset();
+ nsresult result = mUnicodeEncoder->Convert(data, &length, mBuffer, &bufferLength);
+ mBuffer[bufferLength] = 0;
+ PRInt32 temp = bufferLength;
+ if (NS_SUCCEEDED(result))
+ result = mUnicodeEncoder->Finish(mBuffer,&temp);
+ }
}
@@ -487,6 +558,7 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
* @return
*/
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
+ NS_IF_RELEASE(mUnicodeEncoder);
mOutput=0; //we don't own the stream we're given; just forget it.
}
@@ -526,8 +598,8 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream
key.ToUpperCase();
-
- key.ToCString(mBuffer,sizeof(gBuffer)-1);
+ EnsureBufferSize(key.Length());
+ key.ToCString(mBuffer,mBufferSize);
aStream << " " << mBuffer << char(kEqual);
mColPos += 1 + strlen(mBuffer) + 1;
@@ -993,7 +1065,14 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){
AddStartTag(aNode,aStream);
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
- if (type == eHTMLTag_text)
+ else if (type == eHTMLTag_entity)
+ {
+ const nsString& entity = aNode.GetText();
+ UnicodeToHTMLString(entity);
+ aStream << '&' << mBuffer << ';';
+ mColPos += entity.Length() + 2;
+ }
+ else if (type == eHTMLTag_text)
{
const nsString& text = aNode.GetText();
if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE)
@@ -1136,9 +1215,25 @@ nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
- if(mOutput) {
- AddStartTag(aNode,*mOutput);
-// eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
+ if(mOutput)
+ {
+ const nsString& name = aNode.GetText();
+ if (name.Equals("XIF_DOC_INFO"))
+ {
+ PRInt32 count=aNode.GetAttributeCount();
+ for(PRInt32 i=0;iGetPreferred(aCharset, charsetName);
+ nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
+
+ if(NS_FAILED(res))
+ {
+ // failed - unknown alias , fallback to ISO-8859-1
+ charsetName = "ISO-8859-1";
+ }
+
+ nsICharsetConverterManager * ccm = nsnull;
+ res = nsServiceManager::GetService(kCharsetConverterManagerCID,
+ kICharsetConverterManagerIID,
+ (nsISupports**)&ccm);
+ if(NS_SUCCEEDED(res) && (nsnull != ccm))
+ {
+ nsIUnicodeEncoder * encoder = nsnull;
+ res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
+ if(NS_SUCCEEDED(res) && (nsnull != encoder))
+ {
+ NS_IF_RELEASE(mUnicodeEncoder);
+ mUnicodeEncoder = encoder;
+ }
+ nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
+ }
+ }
+ return res;
+}
+
@@ -117,6 +172,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
mDoOutput = PR_FALSE;
mBufferSize = 0;
mBuffer = nsnull;
+ mUnicodeEncoder = nsnull;
}
/**
@@ -133,6 +189,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
mDoOutput = PR_FALSE;
mBufferSize = 0;
mBuffer = nsnull;
+ mUnicodeEncoder = nsnull;
}
@@ -145,6 +202,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
mOutput=0; //we don't own the stream we're given; just forget it.
delete [] mBuffer;
+ NS_IF_RELEASE(mUnicodeEncoder);
}
@@ -409,42 +467,40 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize)
}
+
void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc)
{
+
+
#define CH_NBSP 160
-#define CH_QUOT 34
-#define CH_AMP 38
-#define CH_LT 60
-#define CH_GT 62
PRInt32 length = aSrc.Length();
- PRUnichar ch;
- const char* entity = nsnull;
- PRUint32 offset = 0;
- PRUint32 addedLength = 0;
+ nsresult result;
+ PRInt32 bufferLength;
+
+ if (mUnicodeEncoder == nsnull)
+ InitEncoder("");
if (length > 0)
{
EnsureBufferSize(length);
- for (PRInt32 i = 0; i < length; i++)
- {
- ch = aSrc.CharAt(i);
- switch (ch)
- {
- case CH_QUOT: ch = '"'; break;
- case CH_AMP: ch = '&'; break;
- case CH_GT: ch = '>'; break;
- case CH_LT: ch = '<'; break;
- case CH_NBSP: ch = ' '; break;
- }
+ bufferLength = mBufferSize;
+
+ mUnicodeEncoder->Reset();
+ result = mUnicodeEncoder->Convert(aSrc, &length, mBuffer, &bufferLength);
+ mBuffer[bufferLength] = 0;
+ PRInt32 temp = bufferLength;
+ if (NS_SUCCEEDED(result))
+ result = mUnicodeEncoder->Finish(mBuffer,&temp);
- if (ch < 128)
- {
- mBuffer[offset++] = (unsigned char)ch;
- mBuffer[offset] = 0;
- }
+
+ for (PRInt32 i = 0; i < bufferLength; i++)
+ {
+ if (mBuffer[i] == char(CH_NBSP))
+ mBuffer[i] = ' ';
}
}
+
}
@@ -483,6 +539,18 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
mStrBuffer.Append(mBuffer);
mColPos += text.Length();
}
+ else if (type == eHTMLTag_entity)
+ {
+ const nsString& text = aNode.GetText();
+ UnicodeToTXTString(text);
+ PRInt32 entity = NS_EntityToUnicode(mBuffer);
+ if (entity < 256)
+ {
+ char ch = (char)entity;
+ aStream << ch;
+ mColPos++;
+ }
+ }
else if (type == eHTMLTag_whitespace)
{
if (PR_TRUE)
@@ -551,6 +619,18 @@ NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
+ if (name.Equals("XIF_DOC_INFO"))
+ {
+ PRInt32 count=aNode.GetAttributeCount();
+ for(PRInt32 i=0;i");
- if (offset != -1)
- aBuffer.Cut(0,offset);
aContentType= kXIFTextContentType;
result=ePrimaryDetect;
}
}
+
+ nsString charset ="ISO-8859-1";
+ PRInt32 offset;
+ offset = aBuffer.Find(kXIFDocInfo);
+ if(kNotFound!=offset)
+ {
+ offset = aBuffer.Find(kXIFCharset);
+ if (kNotFound!=offset)
+ {
+ PRInt32 start = aBuffer.Find('"',offset);
+ PRInt32 end = aBuffer.Find('"',start+1);
+
+ if ((start != kNotFound) && (end != kNotFound))
+ {
+ charset = "";
+ for (PRInt32 i = start+1; i < end; i++)
+ {
+ PRUnichar ch = aBuffer[i];
+ charset.Append(ch);
+ }
+ }
+ }
+ }
+ mCharset = charset;
+
return result;
}
@@ -638,6 +667,11 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
result = OpenContainer(node);
break;
+ case eXIFTag_entity:
+ StartTopOfStack();
+ ProcessEntityTag(node);
+ break;
+
case eXIFTag_content:
StartTopOfStack();
mInContent = PR_TRUE;
@@ -647,6 +681,10 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
ProcessEncodeTag(node);
break;
+ case eXIFTag_document_info:
+ ProcessDocumentInfoTag(node);
+ break;
+
case eXIFTag_attr:
AddAttribute(node);
@@ -1355,8 +1393,8 @@ void nsXIFDTD::BeginStartTag(const nsIParserNode& aNode)
if (type == eXIFTag_container)
PushHTMLTag(tag,tagName);
- CToken* token = new CStartToken(tagName);
- nsCParserNode* node = new nsCParserNode(token);
+// CToken* token = new CStartToken(tagName);
+// nsCParserNode* node = new nsCParserNode(token);
PushNodeAndToken(tagName);
break;
}
@@ -1629,6 +1667,38 @@ void nsXIFDTD::ProcessEncodeTag(const nsIParserNode& aNode)
}
+void nsXIFDTD::ProcessEntityTag(const nsIParserNode& aNode)
+{
+ nsString value;
+
+ if (GetAttribute(aNode,nsString("value"),value))
+ {
+ CEntityToken* entity = new CEntityToken(value);
+ nsCParserNode node((CToken*)entity);
+ mSink->AddLeaf(node);
+ }
+}
+
+
+void nsXIFDTD::ProcessDocumentInfoTag(const nsIParserNode& aNode)
+{
+ nsString value;
+ nsString key("charset");
+
+ if (GetAttribute(aNode,key,value))
+ {
+ PushNodeAndToken(nsString("XIF_DOC_INFO"));
+ CAttributeToken* attribute = new CAttributeToken(key,value);
+ nsIParserNode* top = PeekNode();
+ if (top != nsnull)
+ ((nsCParserNode*)top)->AddAttribute(attribute);
+
+ }
+}
+
+
+
+
/*** CSS Methods ****/
void nsXIFDTD::BeginCSSStyleSheet(const nsIParserNode& aNode)
diff --git a/mozilla/htmlparser/src/nsXIFDTD.h b/mozilla/htmlparser/src/nsXIFDTD.h
index d10224cbf3f..ce76bdf1a03 100644
--- a/mozilla/htmlparser/src/nsXIFDTD.h
+++ b/mozilla/htmlparser/src/nsXIFDTD.h
@@ -71,7 +71,9 @@ enum eXIFTags
eXIFTag_css_stylesheet,
eXIFTag_doctype,
- eXIFTag_encode,
+ eXIFTag_document_info,
+ eXIFTag_encode,
+ eXIFTag_entity,
eXIFTag_import,
eXIFTag_leaf,
eXIFTag_link,
@@ -490,6 +492,8 @@ private:
private:
void ProcessEncodeTag(const nsIParserNode& aNode);
+ void ProcessEntityTag(const nsIParserNode& aNode);
+ void ProcessDocumentInfoTag(const nsIParserNode& aNode);
void BeginCSSStyleSheet(const nsIParserNode& aNode);
void EndCSSStyleSheet(const nsIParserNode& aNode);
@@ -556,6 +560,7 @@ protected:
PRBool mLowerCaseTags;
PRBool mLowerCaseAttributes;
nsITokenizer* mTokenizer;
+ nsString mCharset;
};
diff --git a/mozilla/parser/htmlparser/src/nsHTMLContentSinkStream.cpp b/mozilla/parser/htmlparser/src/nsHTMLContentSinkStream.cpp
index c7743085442..ac20b9d799b 100644
--- a/mozilla/parser/htmlparser/src/nsHTMLContentSinkStream.cpp
+++ b/mozilla/parser/htmlparser/src/nsHTMLContentSinkStream.cpp
@@ -36,6 +36,14 @@
#include "nsIParser.h"
#include "nsHTMLEntities.h"
+
+
+#include "nsIUnicodeEncoder.h"
+#include "nsICharsetAlias.h"
+#include "nsIServiceManager.h"
+#include "nsICharsetConverterManager.h"
+
+
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
@@ -343,6 +351,58 @@ NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult,
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
}
+
+
+/**
+ * Inits the encoder instance variable for the sink based on the charset
+ *
+ * @update gpk 4/21/99
+ * @param aCharset
+ * @return NS_xxx error result
+ */
+nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
+{
+
+ nsresult res = NS_OK;
+
+ nsICharsetAlias* calias = nsnull;
+ res = nsServiceManager::GetService(kCharsetAliasCID,
+ kICharsetAliasIID,
+ (nsISupports**)&calias);
+
+ NS_ASSERTION( nsnull != calias, "cannot find charet alias");
+ nsAutoString charsetName = aCharset;
+ if( NS_SUCCEEDED(res) && (nsnull != calias))
+ {
+ res = calias->GetPreferred(aCharset, charsetName);
+ nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
+
+ if(NS_FAILED(res))
+ {
+ // failed - unknown alias , fallback to ISO-8859-1
+ charsetName = "ISO-8859-1";
+ }
+
+ nsICharsetConverterManager * ccm = nsnull;
+ res = nsServiceManager::GetService(kCharsetConverterManagerCID,
+ kICharsetConverterManagerIID,
+ (nsISupports**)&ccm);
+ if(NS_SUCCEEDED(res) && (nsnull != ccm))
+ {
+ nsIUnicodeEncoder * encoder = nsnull;
+ res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
+ if(NS_SUCCEEDED(res) && (nsnull != encoder))
+ {
+ NS_IF_RELEASE(mUnicodeEncoder);
+ mUnicodeEncoder = encoder;
+ }
+ nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
+ }
+ }
+ return res;
+}
+
+
/**
* Construct a content sink stream.
* @update gess7/7/98
@@ -361,6 +421,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHead
mDoHeader = aDoHeader;
mBuffer = nsnull;
mBufferSize = 0;
+ mUnicodeEncoder = nsnull;
}
/**
@@ -381,6 +442,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoForm
mDoHeader = aDoHeader;
mBuffer = nsnull;
mBufferSize = 0;
+ mUnicodeEncoder = nsnull;
}
@@ -443,9 +505,16 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
const char* entity = nsnull;
PRUint32 offset = 0;
PRUint32 addedLength = 0;
+ nsAutoString data;
+
+
+ if (mUnicodeEncoder == nsnull)
+ InitEncoder("");
if (length > 0)
{
+ // Step 1. Convert anything that maps to character entity to
+ // the entity value
EnsureBufferSize(length);
for (PRInt32 i = 0; i < length; i++)
{
@@ -454,29 +523,31 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
entity = UnicodeToEntity(ch);
if (entity)
{
- PRUint32 size = strlen(entity);
- addedLength += size;
- EnsureBufferSize(length+addedLength+1);
- mBuffer[offset++] = '&';
- mBuffer[offset] = 0;
- strcat(mBuffer,entity);
-
- PRUint32 temp = offset + size;
- while (offset < temp)
- {
- mBuffer[offset] = tolower(mBuffer[offset]);
- offset++;
- }
- mBuffer[offset++] = ';';
- mBuffer[offset] = 0;
+ nsAutoString temp(entity);
+
+ temp.ToLowerCase();
+ data.Append('&');
+ data.Append(temp);
+ data.Append(';');
}
- else if (ch < 128)
+ else
{
- mBuffer[offset++] = (unsigned char)ch;
- mBuffer[offset] = 0;
+ data.Append(ch);
}
}
- }
+
+ // Step 2. Run the result through the converter
+ length = data.Length();
+ EnsureBufferSize(length);
+ PRInt32 bufferLength = mBufferSize;
+
+ mUnicodeEncoder->Reset();
+ nsresult result = mUnicodeEncoder->Convert(data, &length, mBuffer, &bufferLength);
+ mBuffer[bufferLength] = 0;
+ PRInt32 temp = bufferLength;
+ if (NS_SUCCEEDED(result))
+ result = mUnicodeEncoder->Finish(mBuffer,&temp);
+ }
}
@@ -487,6 +558,7 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
* @return
*/
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
+ NS_IF_RELEASE(mUnicodeEncoder);
mOutput=0; //we don't own the stream we're given; just forget it.
}
@@ -526,8 +598,8 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream
key.ToUpperCase();
-
- key.ToCString(mBuffer,sizeof(gBuffer)-1);
+ EnsureBufferSize(key.Length());
+ key.ToCString(mBuffer,mBufferSize);
aStream << " " << mBuffer << char(kEqual);
mColPos += 1 + strlen(mBuffer) + 1;
@@ -993,7 +1065,14 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){
AddStartTag(aNode,aStream);
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
}
- if (type == eHTMLTag_text)
+ else if (type == eHTMLTag_entity)
+ {
+ const nsString& entity = aNode.GetText();
+ UnicodeToHTMLString(entity);
+ aStream << '&' << mBuffer << ';';
+ mColPos += entity.Length() + 2;
+ }
+ else if (type == eHTMLTag_text)
{
const nsString& text = aNode.GetText();
if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE)
@@ -1136,9 +1215,25 @@ nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
*/
NS_IMETHODIMP
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
- if(mOutput) {
- AddStartTag(aNode,*mOutput);
-// eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
+ if(mOutput)
+ {
+ const nsString& name = aNode.GetText();
+ if (name.Equals("XIF_DOC_INFO"))
+ {
+ PRInt32 count=aNode.GetAttributeCount();
+ for(PRInt32 i=0;iGetPreferred(aCharset, charsetName);
+ nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
+
+ if(NS_FAILED(res))
+ {
+ // failed - unknown alias , fallback to ISO-8859-1
+ charsetName = "ISO-8859-1";
+ }
+
+ nsICharsetConverterManager * ccm = nsnull;
+ res = nsServiceManager::GetService(kCharsetConverterManagerCID,
+ kICharsetConverterManagerIID,
+ (nsISupports**)&ccm);
+ if(NS_SUCCEEDED(res) && (nsnull != ccm))
+ {
+ nsIUnicodeEncoder * encoder = nsnull;
+ res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
+ if(NS_SUCCEEDED(res) && (nsnull != encoder))
+ {
+ NS_IF_RELEASE(mUnicodeEncoder);
+ mUnicodeEncoder = encoder;
+ }
+ nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
+ }
+ }
+ return res;
+}
+
@@ -117,6 +172,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
mDoOutput = PR_FALSE;
mBufferSize = 0;
mBuffer = nsnull;
+ mUnicodeEncoder = nsnull;
}
/**
@@ -133,6 +189,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
mDoOutput = PR_FALSE;
mBufferSize = 0;
mBuffer = nsnull;
+ mUnicodeEncoder = nsnull;
}
@@ -145,6 +202,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
mOutput=0; //we don't own the stream we're given; just forget it.
delete [] mBuffer;
+ NS_IF_RELEASE(mUnicodeEncoder);
}
@@ -409,42 +467,40 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize)
}
+
void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc)
{
+
+
#define CH_NBSP 160
-#define CH_QUOT 34
-#define CH_AMP 38
-#define CH_LT 60
-#define CH_GT 62
PRInt32 length = aSrc.Length();
- PRUnichar ch;
- const char* entity = nsnull;
- PRUint32 offset = 0;
- PRUint32 addedLength = 0;
+ nsresult result;
+ PRInt32 bufferLength;
+
+ if (mUnicodeEncoder == nsnull)
+ InitEncoder("");
if (length > 0)
{
EnsureBufferSize(length);
- for (PRInt32 i = 0; i < length; i++)
- {
- ch = aSrc.CharAt(i);
- switch (ch)
- {
- case CH_QUOT: ch = '"'; break;
- case CH_AMP: ch = '&'; break;
- case CH_GT: ch = '>'; break;
- case CH_LT: ch = '<'; break;
- case CH_NBSP: ch = ' '; break;
- }
+ bufferLength = mBufferSize;
+
+ mUnicodeEncoder->Reset();
+ result = mUnicodeEncoder->Convert(aSrc, &length, mBuffer, &bufferLength);
+ mBuffer[bufferLength] = 0;
+ PRInt32 temp = bufferLength;
+ if (NS_SUCCEEDED(result))
+ result = mUnicodeEncoder->Finish(mBuffer,&temp);
- if (ch < 128)
- {
- mBuffer[offset++] = (unsigned char)ch;
- mBuffer[offset] = 0;
- }
+
+ for (PRInt32 i = 0; i < bufferLength; i++)
+ {
+ if (mBuffer[i] == char(CH_NBSP))
+ mBuffer[i] = ' ';
}
}
+
}
@@ -483,6 +539,18 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
mStrBuffer.Append(mBuffer);
mColPos += text.Length();
}
+ else if (type == eHTMLTag_entity)
+ {
+ const nsString& text = aNode.GetText();
+ UnicodeToTXTString(text);
+ PRInt32 entity = NS_EntityToUnicode(mBuffer);
+ if (entity < 256)
+ {
+ char ch = (char)entity;
+ aStream << ch;
+ mColPos++;
+ }
+ }
else if (type == eHTMLTag_whitespace)
{
if (PR_TRUE)
@@ -551,6 +619,18 @@ NS_IMETHODIMP
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
const nsString& name = aNode.GetText();
+ if (name.Equals("XIF_DOC_INFO"))
+ {
+ PRInt32 count=aNode.GetAttributeCount();
+ for(PRInt32 i=0;i");
- if (offset != -1)
- aBuffer.Cut(0,offset);
aContentType= kXIFTextContentType;
result=ePrimaryDetect;
}
}
+
+ nsString charset ="ISO-8859-1";
+ PRInt32 offset;
+ offset = aBuffer.Find(kXIFDocInfo);
+ if(kNotFound!=offset)
+ {
+ offset = aBuffer.Find(kXIFCharset);
+ if (kNotFound!=offset)
+ {
+ PRInt32 start = aBuffer.Find('"',offset);
+ PRInt32 end = aBuffer.Find('"',start+1);
+
+ if ((start != kNotFound) && (end != kNotFound))
+ {
+ charset = "";
+ for (PRInt32 i = start+1; i < end; i++)
+ {
+ PRUnichar ch = aBuffer[i];
+ charset.Append(ch);
+ }
+ }
+ }
+ }
+ mCharset = charset;
+
return result;
}
@@ -638,6 +667,11 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
result = OpenContainer(node);
break;
+ case eXIFTag_entity:
+ StartTopOfStack();
+ ProcessEntityTag(node);
+ break;
+
case eXIFTag_content:
StartTopOfStack();
mInContent = PR_TRUE;
@@ -647,6 +681,10 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
ProcessEncodeTag(node);
break;
+ case eXIFTag_document_info:
+ ProcessDocumentInfoTag(node);
+ break;
+
case eXIFTag_attr:
AddAttribute(node);
@@ -1355,8 +1393,8 @@ void nsXIFDTD::BeginStartTag(const nsIParserNode& aNode)
if (type == eXIFTag_container)
PushHTMLTag(tag,tagName);
- CToken* token = new CStartToken(tagName);
- nsCParserNode* node = new nsCParserNode(token);
+// CToken* token = new CStartToken(tagName);
+// nsCParserNode* node = new nsCParserNode(token);
PushNodeAndToken(tagName);
break;
}
@@ -1629,6 +1667,38 @@ void nsXIFDTD::ProcessEncodeTag(const nsIParserNode& aNode)
}
+void nsXIFDTD::ProcessEntityTag(const nsIParserNode& aNode)
+{
+ nsString value;
+
+ if (GetAttribute(aNode,nsString("value"),value))
+ {
+ CEntityToken* entity = new CEntityToken(value);
+ nsCParserNode node((CToken*)entity);
+ mSink->AddLeaf(node);
+ }
+}
+
+
+void nsXIFDTD::ProcessDocumentInfoTag(const nsIParserNode& aNode)
+{
+ nsString value;
+ nsString key("charset");
+
+ if (GetAttribute(aNode,key,value))
+ {
+ PushNodeAndToken(nsString("XIF_DOC_INFO"));
+ CAttributeToken* attribute = new CAttributeToken(key,value);
+ nsIParserNode* top = PeekNode();
+ if (top != nsnull)
+ ((nsCParserNode*)top)->AddAttribute(attribute);
+
+ }
+}
+
+
+
+
/*** CSS Methods ****/
void nsXIFDTD::BeginCSSStyleSheet(const nsIParserNode& aNode)
diff --git a/mozilla/parser/htmlparser/src/nsXIFDTD.h b/mozilla/parser/htmlparser/src/nsXIFDTD.h
index d10224cbf3f..ce76bdf1a03 100644
--- a/mozilla/parser/htmlparser/src/nsXIFDTD.h
+++ b/mozilla/parser/htmlparser/src/nsXIFDTD.h
@@ -71,7 +71,9 @@ enum eXIFTags
eXIFTag_css_stylesheet,
eXIFTag_doctype,
- eXIFTag_encode,
+ eXIFTag_document_info,
+ eXIFTag_encode,
+ eXIFTag_entity,
eXIFTag_import,
eXIFTag_leaf,
eXIFTag_link,
@@ -490,6 +492,8 @@ private:
private:
void ProcessEncodeTag(const nsIParserNode& aNode);
+ void ProcessEntityTag(const nsIParserNode& aNode);
+ void ProcessDocumentInfoTag(const nsIParserNode& aNode);
void BeginCSSStyleSheet(const nsIParserNode& aNode);
void EndCSSStyleSheet(const nsIParserNode& aNode);
@@ -556,6 +560,7 @@ protected:
PRBool mLowerCaseTags;
PRBool mLowerCaseAttributes;
nsITokenizer* mTokenizer;
+ nsString mCharset;
};