From 2dd996accfdcbb79c5553f8ac3447b445298b962 Mon Sep 17 00:00:00 2001 From: "nhotta%netscape.com" Date: Tue, 13 Feb 2001 01:00:14 +0000 Subject: [PATCH] Changed to control entity (CER) generation by a document charset, bug 65324, r=jst, sr=vidur. git-svn-id: svn://10.0.0.236/trunk@86864 18797224-902f-48f8-a5cc-f745e15eee43 --- .../base/public/nsIContentSerializer.h | 4 +- .../content/base/src/nsDocumentEncoder.cpp | 41 ++++++++++++++----- .../base/src/nsHTMLContentSerializer.cpp | 15 ++++++- .../base/src/nsHTMLContentSerializer.h | 5 ++- .../base/src/nsPlainTextSerializer.cpp | 5 ++- .../content/base/src/nsPlainTextSerializer.h | 3 +- .../base/src/nsXMLContentSerializer.cpp | 3 +- .../content/base/src/nsXMLContentSerializer.h | 3 +- .../layout/base/public/nsIContentSerializer.h | 4 +- mozilla/layout/base/src/nsDocumentEncoder.cpp | 41 ++++++++++++++----- .../base/src/nsHTMLContentSerializer.cpp | 15 ++++++- .../layout/base/src/nsHTMLContentSerializer.h | 5 ++- .../layout/base/src/nsPlainTextSerializer.cpp | 5 ++- .../layout/base/src/nsPlainTextSerializer.h | 3 +- .../base/src/nsXMLContentSerializer.cpp | 3 +- .../layout/base/src/nsXMLContentSerializer.h | 3 +- 16 files changed, 120 insertions(+), 38 deletions(-) diff --git a/mozilla/content/base/public/nsIContentSerializer.h b/mozilla/content/base/public/nsIContentSerializer.h index 7e3a968b164..df4db0a033e 100644 --- a/mozilla/content/base/public/nsIContentSerializer.h +++ b/mozilla/content/base/public/nsIContentSerializer.h @@ -25,6 +25,7 @@ #include "nsISupports.h" #include "nsAWritableString.h" +#include "nsIAtom.h" class nsIDOMText; /* forward declaration */ class nsIDOMCDATASection; /* forward declaration */ @@ -45,7 +46,8 @@ class nsIContentSerializer : public nsISupports { NS_DEFINE_STATIC_IID_ACCESSOR(NS_ICONTENTSERIALIZER_IID) - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn) = 0; + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) = 0; NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr) = 0; diff --git a/mozilla/content/base/src/nsDocumentEncoder.cpp b/mozilla/content/base/src/nsDocumentEncoder.cpp index f2162e354e0..d985b0ee2aa 100644 --- a/mozilla/content/base/src/nsDocumentEncoder.cpp +++ b/mozilla/content/base/src/nsDocumentEncoder.cpp @@ -45,6 +45,7 @@ #include "nsIDOMRange.h" #include "nsRange.h" #include "nsICharsetConverterManager.h" +#include "nsICharsetConverterManager2.h" #include "nsHTMLAtoms.h" #include "nsITextContent.h" #include "nsIEnumerator.h" @@ -130,6 +131,7 @@ protected: nsCOMPtr mUnicodeEncoder; nsCOMPtr mCommonParent; nsCOMPtr mNodeFixup; + nsCOMPtr mCharsetConverterManager; nsString mMimeType; nsString mCharset; @@ -429,6 +431,15 @@ ConvertAndWrite(nsAReadableString& aString, // If the converter couldn't convert a chraacer we replace the // character with a characre entity. if (convert_rv == NS_ERROR_UENC_NOMAPPING) { + // Finishes the conversion. + // The converter has the possibility to write some extra data and flush its final state. + char finish_buf[32]; + charLength = 32; + rv = aEncoder->Finish(finish_buf, &charLength); + NS_ENSURE_SUCCESS(rv, rv); + rv = aStream->Write(finish_buf, charLength, &written); + NS_ENSURE_SUCCESS(rv, rv); + nsCAutoString entString("&#"); entString.AppendInt(unicodeBuf[unicodeLength - 1]); entString.Append(';'); @@ -853,10 +864,19 @@ nsDocumentEncoder::EncodeToString(nsAWritableString& aOutputString) mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId)); NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED); - mSerializer->Init(mFlags, mWrapColumn); - nsresult rv = NS_OK; + nsCOMPtr charsetAtom; + if (!mCharset.IsEmpty()) { + if (!mCharsetConverterManager) { + mCharsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + } + rv = mCharsetConverterManager->GetCharsetAtom(mCharset.GetUnicode(), getter_AddRefs(charsetAtom)); + NS_ENSURE_SUCCESS(rv, rv); + } + mSerializer->Init(mFlags, mWrapColumn, charsetAtom); + if (mSelection) { nsCOMPtr range; PRInt32 i, count = 0; @@ -896,16 +916,17 @@ nsDocumentEncoder::EncodeToStream(nsIOutputStream* aStream) if (!mDocument) return NS_ERROR_NOT_INITIALIZED; - NS_WITH_SERVICE(nsICharsetConverterManager, - charsetConv, - kCharsetConverterManagerCID, - &rv); + if (!mCharsetConverterManager) { + mCharsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + } + + nsCOMPtr charsetAtom; + rv = mCharsetConverterManager->GetCharsetAtom(mCharset.GetUnicode(), getter_AddRefs(charsetAtom)); NS_ENSURE_SUCCESS(rv, rv); - nsAutoString charsetStr; - charsetStr.Assign(mCharset); - rv = charsetConv->GetUnicodeEncoder(&charsetStr, - getter_AddRefs(mUnicodeEncoder)); + rv = mCharsetConverterManager->GetUnicodeEncoder(charsetAtom, + getter_AddRefs(mUnicodeEncoder)); NS_ENSURE_SUCCESS(rv, rv); // xxx Also make sure mString is a mime type "text/html" or "text/plain" diff --git a/mozilla/content/base/src/nsHTMLContentSerializer.cpp b/mozilla/content/base/src/nsHTMLContentSerializer.cpp index 523bd2e8bac..d8dc13b904c 100644 --- a/mozilla/content/base/src/nsHTMLContentSerializer.cpp +++ b/mozilla/content/base/src/nsHTMLContentSerializer.cpp @@ -83,7 +83,8 @@ nsHTMLContentSerializer::GetParserService(nsIParserService** aParserService) } NS_IMETHODIMP -nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) +nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) { mFlags = aFlags; if (!aWrapColumn) { @@ -114,6 +115,16 @@ nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) mPreLevel = 0; + mIsLatin1 = PR_FALSE; + if (aCharSet) { + const PRUnichar *charset; + aCharSet->GetUnicode(&charset); + + if (NS_LITERAL_STRING("ISO-8859-1").Equals(charset)) { + mIsLatin1 = PR_TRUE; + } + } + return NS_OK; } @@ -526,7 +537,7 @@ nsHTMLContentSerializer::AppendToString(const nsAReadableString& aStr, if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; - } else if (val > 127) { + } else if (mIsLatin1 && val > 127 && val < 256) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (entityReplacement.Length() > 0) { diff --git a/mozilla/content/base/src/nsHTMLContentSerializer.h b/mozilla/content/base/src/nsHTMLContentSerializer.h index 4f9d553f88a..c1e6e054596 100644 --- a/mozilla/content/base/src/nsHTMLContentSerializer.h +++ b/mozilla/content/base/src/nsHTMLContentSerializer.h @@ -35,7 +35,8 @@ class nsHTMLContentSerializer : public nsXMLContentSerializer { nsHTMLContentSerializer(); virtual ~nsHTMLContentSerializer(); - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn); + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet); NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, @@ -104,6 +105,8 @@ class nsHTMLContentSerializer : public nsXMLContentSerializer { PRInt32 mMaxColumn; nsString mLineBreak; + + PRBool mIsLatin1; }; extern nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer); diff --git a/mozilla/content/base/src/nsPlainTextSerializer.cpp b/mozilla/content/base/src/nsPlainTextSerializer.cpp index 80190494405..bf349798f82 100644 --- a/mozilla/content/base/src/nsPlainTextSerializer.cpp +++ b/mozilla/content/base/src/nsPlainTextSerializer.cpp @@ -126,7 +126,8 @@ NS_IMPL_ISUPPORTS4(nsPlainTextSerializer, NS_IMETHODIMP -nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) +nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) { #ifdef DEBUG // Check if the major control flags are set correctly. @@ -185,7 +186,7 @@ NS_IMETHODIMP nsPlainTextSerializer::Initialize(nsAWritableString* aOutString, PRUint32 aFlags, PRUint32 aWrapCol) { - nsresult rv = Init(aFlags, aWrapCol); + nsresult rv = Init(aFlags, aWrapCol, nsnull); NS_ENSURE_SUCCESS(rv, rv); // XXX This is wrong. It violates XPCOM string ownership rules. diff --git a/mozilla/content/base/src/nsPlainTextSerializer.h b/mozilla/content/base/src/nsPlainTextSerializer.h index 8608f897f51..a627d294ce7 100644 --- a/mozilla/content/base/src/nsPlainTextSerializer.h +++ b/mozilla/content/base/src/nsPlainTextSerializer.h @@ -47,7 +47,8 @@ public: NS_DECL_ISUPPORTS // nsIContentSerializer - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn); + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet); NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr); diff --git a/mozilla/content/base/src/nsXMLContentSerializer.cpp b/mozilla/content/base/src/nsXMLContentSerializer.cpp index 10895ac8fdb..6108183420e 100644 --- a/mozilla/content/base/src/nsXMLContentSerializer.cpp +++ b/mozilla/content/base/src/nsXMLContentSerializer.cpp @@ -66,7 +66,8 @@ nsXMLContentSerializer::~nsXMLContentSerializer() NS_IMPL_ISUPPORTS1(nsXMLContentSerializer, nsIContentSerializer) NS_IMETHODIMP -nsXMLContentSerializer::Init(PRUint32 flags, PRUint32 aWrapColumn) +nsXMLContentSerializer::Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) { return NS_OK; } diff --git a/mozilla/content/base/src/nsXMLContentSerializer.h b/mozilla/content/base/src/nsXMLContentSerializer.h index 5406169196d..2f28058da4b 100644 --- a/mozilla/content/base/src/nsXMLContentSerializer.h +++ b/mozilla/content/base/src/nsXMLContentSerializer.h @@ -37,7 +37,8 @@ class nsXMLContentSerializer : public nsIContentSerializer { NS_DECL_ISUPPORTS - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn); + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet); NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr); diff --git a/mozilla/layout/base/public/nsIContentSerializer.h b/mozilla/layout/base/public/nsIContentSerializer.h index 7e3a968b164..df4db0a033e 100644 --- a/mozilla/layout/base/public/nsIContentSerializer.h +++ b/mozilla/layout/base/public/nsIContentSerializer.h @@ -25,6 +25,7 @@ #include "nsISupports.h" #include "nsAWritableString.h" +#include "nsIAtom.h" class nsIDOMText; /* forward declaration */ class nsIDOMCDATASection; /* forward declaration */ @@ -45,7 +46,8 @@ class nsIContentSerializer : public nsISupports { NS_DEFINE_STATIC_IID_ACCESSOR(NS_ICONTENTSERIALIZER_IID) - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn) = 0; + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) = 0; NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr) = 0; diff --git a/mozilla/layout/base/src/nsDocumentEncoder.cpp b/mozilla/layout/base/src/nsDocumentEncoder.cpp index f2162e354e0..d985b0ee2aa 100644 --- a/mozilla/layout/base/src/nsDocumentEncoder.cpp +++ b/mozilla/layout/base/src/nsDocumentEncoder.cpp @@ -45,6 +45,7 @@ #include "nsIDOMRange.h" #include "nsRange.h" #include "nsICharsetConverterManager.h" +#include "nsICharsetConverterManager2.h" #include "nsHTMLAtoms.h" #include "nsITextContent.h" #include "nsIEnumerator.h" @@ -130,6 +131,7 @@ protected: nsCOMPtr mUnicodeEncoder; nsCOMPtr mCommonParent; nsCOMPtr mNodeFixup; + nsCOMPtr mCharsetConverterManager; nsString mMimeType; nsString mCharset; @@ -429,6 +431,15 @@ ConvertAndWrite(nsAReadableString& aString, // If the converter couldn't convert a chraacer we replace the // character with a characre entity. if (convert_rv == NS_ERROR_UENC_NOMAPPING) { + // Finishes the conversion. + // The converter has the possibility to write some extra data and flush its final state. + char finish_buf[32]; + charLength = 32; + rv = aEncoder->Finish(finish_buf, &charLength); + NS_ENSURE_SUCCESS(rv, rv); + rv = aStream->Write(finish_buf, charLength, &written); + NS_ENSURE_SUCCESS(rv, rv); + nsCAutoString entString("&#"); entString.AppendInt(unicodeBuf[unicodeLength - 1]); entString.Append(';'); @@ -853,10 +864,19 @@ nsDocumentEncoder::EncodeToString(nsAWritableString& aOutputString) mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId)); NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED); - mSerializer->Init(mFlags, mWrapColumn); - nsresult rv = NS_OK; + nsCOMPtr charsetAtom; + if (!mCharset.IsEmpty()) { + if (!mCharsetConverterManager) { + mCharsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + } + rv = mCharsetConverterManager->GetCharsetAtom(mCharset.GetUnicode(), getter_AddRefs(charsetAtom)); + NS_ENSURE_SUCCESS(rv, rv); + } + mSerializer->Init(mFlags, mWrapColumn, charsetAtom); + if (mSelection) { nsCOMPtr range; PRInt32 i, count = 0; @@ -896,16 +916,17 @@ nsDocumentEncoder::EncodeToStream(nsIOutputStream* aStream) if (!mDocument) return NS_ERROR_NOT_INITIALIZED; - NS_WITH_SERVICE(nsICharsetConverterManager, - charsetConv, - kCharsetConverterManagerCID, - &rv); + if (!mCharsetConverterManager) { + mCharsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + } + + nsCOMPtr charsetAtom; + rv = mCharsetConverterManager->GetCharsetAtom(mCharset.GetUnicode(), getter_AddRefs(charsetAtom)); NS_ENSURE_SUCCESS(rv, rv); - nsAutoString charsetStr; - charsetStr.Assign(mCharset); - rv = charsetConv->GetUnicodeEncoder(&charsetStr, - getter_AddRefs(mUnicodeEncoder)); + rv = mCharsetConverterManager->GetUnicodeEncoder(charsetAtom, + getter_AddRefs(mUnicodeEncoder)); NS_ENSURE_SUCCESS(rv, rv); // xxx Also make sure mString is a mime type "text/html" or "text/plain" diff --git a/mozilla/layout/base/src/nsHTMLContentSerializer.cpp b/mozilla/layout/base/src/nsHTMLContentSerializer.cpp index 523bd2e8bac..d8dc13b904c 100644 --- a/mozilla/layout/base/src/nsHTMLContentSerializer.cpp +++ b/mozilla/layout/base/src/nsHTMLContentSerializer.cpp @@ -83,7 +83,8 @@ nsHTMLContentSerializer::GetParserService(nsIParserService** aParserService) } NS_IMETHODIMP -nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) +nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) { mFlags = aFlags; if (!aWrapColumn) { @@ -114,6 +115,16 @@ nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) mPreLevel = 0; + mIsLatin1 = PR_FALSE; + if (aCharSet) { + const PRUnichar *charset; + aCharSet->GetUnicode(&charset); + + if (NS_LITERAL_STRING("ISO-8859-1").Equals(charset)) { + mIsLatin1 = PR_TRUE; + } + } + return NS_OK; } @@ -526,7 +537,7 @@ nsHTMLContentSerializer::AppendToString(const nsAReadableString& aStr, if ((val <= kGTVal) && (entityTable[val][0] != 0)) { entityText = entityTable[val]; break; - } else if (val > 127) { + } else if (mIsLatin1 && val > 127 && val < 256) { parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); if (entityReplacement.Length() > 0) { diff --git a/mozilla/layout/base/src/nsHTMLContentSerializer.h b/mozilla/layout/base/src/nsHTMLContentSerializer.h index 4f9d553f88a..c1e6e054596 100644 --- a/mozilla/layout/base/src/nsHTMLContentSerializer.h +++ b/mozilla/layout/base/src/nsHTMLContentSerializer.h @@ -35,7 +35,8 @@ class nsHTMLContentSerializer : public nsXMLContentSerializer { nsHTMLContentSerializer(); virtual ~nsHTMLContentSerializer(); - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn); + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet); NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, @@ -104,6 +105,8 @@ class nsHTMLContentSerializer : public nsXMLContentSerializer { PRInt32 mMaxColumn; nsString mLineBreak; + + PRBool mIsLatin1; }; extern nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer); diff --git a/mozilla/layout/base/src/nsPlainTextSerializer.cpp b/mozilla/layout/base/src/nsPlainTextSerializer.cpp index 80190494405..bf349798f82 100644 --- a/mozilla/layout/base/src/nsPlainTextSerializer.cpp +++ b/mozilla/layout/base/src/nsPlainTextSerializer.cpp @@ -126,7 +126,8 @@ NS_IMPL_ISUPPORTS4(nsPlainTextSerializer, NS_IMETHODIMP -nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn) +nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) { #ifdef DEBUG // Check if the major control flags are set correctly. @@ -185,7 +186,7 @@ NS_IMETHODIMP nsPlainTextSerializer::Initialize(nsAWritableString* aOutString, PRUint32 aFlags, PRUint32 aWrapCol) { - nsresult rv = Init(aFlags, aWrapCol); + nsresult rv = Init(aFlags, aWrapCol, nsnull); NS_ENSURE_SUCCESS(rv, rv); // XXX This is wrong. It violates XPCOM string ownership rules. diff --git a/mozilla/layout/base/src/nsPlainTextSerializer.h b/mozilla/layout/base/src/nsPlainTextSerializer.h index 8608f897f51..a627d294ce7 100644 --- a/mozilla/layout/base/src/nsPlainTextSerializer.h +++ b/mozilla/layout/base/src/nsPlainTextSerializer.h @@ -47,7 +47,8 @@ public: NS_DECL_ISUPPORTS // nsIContentSerializer - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn); + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet); NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr); diff --git a/mozilla/layout/base/src/nsXMLContentSerializer.cpp b/mozilla/layout/base/src/nsXMLContentSerializer.cpp index 10895ac8fdb..6108183420e 100644 --- a/mozilla/layout/base/src/nsXMLContentSerializer.cpp +++ b/mozilla/layout/base/src/nsXMLContentSerializer.cpp @@ -66,7 +66,8 @@ nsXMLContentSerializer::~nsXMLContentSerializer() NS_IMPL_ISUPPORTS1(nsXMLContentSerializer, nsIContentSerializer) NS_IMETHODIMP -nsXMLContentSerializer::Init(PRUint32 flags, PRUint32 aWrapColumn) +nsXMLContentSerializer::Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet) { return NS_OK; } diff --git a/mozilla/layout/base/src/nsXMLContentSerializer.h b/mozilla/layout/base/src/nsXMLContentSerializer.h index 5406169196d..2f28058da4b 100644 --- a/mozilla/layout/base/src/nsXMLContentSerializer.h +++ b/mozilla/layout/base/src/nsXMLContentSerializer.h @@ -37,7 +37,8 @@ class nsXMLContentSerializer : public nsIContentSerializer { NS_DECL_ISUPPORTS - NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn); + NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn, + nsIAtom* aCharSet); NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAWritableString& aStr);