From 76ce81179416c6573ef4cec70b6cf7261554525f Mon Sep 17 00:00:00 2001 From: "rbs%maths.uq.edu.au" Date: Fri, 13 Dec 2002 22:12:10 +0000 Subject: [PATCH] Refine the conversion of entities during serialization, b=169590, r=nhotta, sr=jst git-svn-id: svn://10.0.0.236/trunk@135264 18797224-902f-48f8-a5cc-f745e15eee43 --- .../content/base/public/nsIDocumentEncoder.h | 22 ++++++++++++--- .../base/src/nsHTMLContentSerializer.cpp | 18 +++++------- .../base/src/nsHTMLContentSerializer.h | 1 - .../html/content/src/nsGenericHTMLElement.cpp | 2 +- .../ui/composer/content/ComposerCommands.js | 5 +++- mozilla/editor/ui/composer/content/editor.js | 4 ++- .../ui/dialogs/content/EdConvertToTable.js | 1 - .../public/nsIWebBrowserPersist.idl | 28 +++++++++++++++++-- .../src/nsWebBrowserPersist.cpp | 10 +++++-- .../xmlextras/base/src/nsDOMSerializer.cpp | 2 +- mozilla/layout/generic/nsObjectFrame.cpp | 2 +- .../layout/html/base/src/nsObjectFrame.cpp | 2 +- 12 files changed, 70 insertions(+), 27 deletions(-) diff --git a/mozilla/content/base/public/nsIDocumentEncoder.h b/mozilla/content/base/public/nsIDocumentEncoder.h index bb4e60ace5f..b9a6329bfa6 100644 --- a/mozilla/content/base/public/nsIDocumentEncoder.h +++ b/mozilla/content/base/public/nsIDocumentEncoder.h @@ -140,9 +140,11 @@ public: // Convert links, image src, and script src to absolute URLs when possible OutputAbsoluteLinks = 128, - // Encode entities when outputting to a string. - // E.g. If set, we'll output   if clear, we'll output 0xa0. - OutputEncodeEntities = 256, + // Attempt to encode entities standardized at W3C (HTML, MathML, etc). + // This is a catch-all flag for documents with mixed contents. Beware of + // interoperability issues. See below for other flags which might likely + // do what you want. + OutputEncodeW3CEntities = 256, // LineBreak processing: we can do either platform line breaks, // CR, LF, or CRLF. If neither of these flags is set, then we @@ -160,7 +162,19 @@ public: // Don't allow any formatting nodes (e.g.
, ) inside a
.
     // This is used primarily by mail.
-    OutputNoFormattingInPre = 8192
+    OutputNoFormattingInPre = 8192,
+
+    // Encode entities when outputting to a string.
+    // E.g. If set, we'll output   if clear, we'll output 0xa0.
+    // The basic set is just   & < > " for interoperability
+    // with older products that don't support α and friends.
+    // The Latin1 entity set additionally includes 8bit accented letters
+    // between 128 and 255.
+    // The HTML entity set additionally includes accented letters, greek
+    // letters, and other special markup symbols as defined in HTML4.
+    OutputEncodeBasicEntities = 16384,
+    OutputEncodeLatin1Entities = 32768,
+    OutputEncodeHTMLEntities = 65536
   };
 
   /**
diff --git a/mozilla/content/base/src/nsHTMLContentSerializer.cpp b/mozilla/content/base/src/nsHTMLContentSerializer.cpp
index 128831b8268..7fa0d84a42b 100644
--- a/mozilla/content/base/src/nsHTMLContentSerializer.cpp
+++ b/mozilla/content/base/src/nsHTMLContentSerializer.cpp
@@ -135,15 +135,6 @@ nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
   mPreLevel = 0;
 
   mCharSet = aCharSet;
-  mIsLatin1 = PR_FALSE;
-  if (aCharSet) {
-    const PRUnichar *charset;
-    aCharSet->GetUnicode(&charset);
-
-    if (NS_LITERAL_STRING("ISO-8859-1").Equals(charset)) {
-      mIsLatin1 = PR_TRUE;
-    }
-  }
 
   return NS_OK;
 }
@@ -811,7 +802,9 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
   }
 
   if (aTranslateEntities && !mInCDATA) {
-    if (mFlags & nsIDocumentEncoder::OutputEncodeEntities) {
+    if (mFlags & nsIDocumentEncoder::OutputEncodeBasicEntities ||
+        mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities ||
+        mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities) {
       nsIParserService* parserService =
         nsContentUtils::GetParserServiceWeakRef();
 
@@ -851,7 +844,10 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
           else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
             entityText = entityTable[val];
             break;
-          } else if (mIsLatin1 && val > 127 && val < 256) {
+          } else if (val > 127 &&
+                    ((val < 256 &&
+                      mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
+                      mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
             parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
 
             if (!entityReplacement.IsEmpty()) {
diff --git a/mozilla/content/base/src/nsHTMLContentSerializer.h b/mozilla/content/base/src/nsHTMLContentSerializer.h
index e5089deb7b9..744c18ca3a8 100644
--- a/mozilla/content/base/src/nsHTMLContentSerializer.h
+++ b/mozilla/content/base/src/nsHTMLContentSerializer.h
@@ -135,7 +135,6 @@ class nsHTMLContentSerializer : public nsXMLContentSerializer {
    * what so ever.
    */
   PRPackedBool mInCDATA;
-  PRPackedBool mIsLatin1;
 
   PRInt32   mMaxColumn;
 
diff --git a/mozilla/content/html/content/src/nsGenericHTMLElement.cpp b/mozilla/content/html/content/src/nsGenericHTMLElement.cpp
index 00aef0706a1..1ecc6022c55 100644
--- a/mozilla/content/html/content/src/nsGenericHTMLElement.cpp
+++ b/mozilla/content/html/content/src/nsGenericHTMLElement.cpp
@@ -878,7 +878,7 @@ nsGenericHTMLElement::GetInnerHTML(nsAString& aInnerHTML)
   NS_ENSURE_TRUE(docEncoder, NS_ERROR_FAILURE);
 
   docEncoder->Init(doc, NS_LITERAL_STRING("text/html"),
-                   nsIDocumentEncoder::OutputEncodeEntities);
+                   nsIDocumentEncoder::OutputEncodeBasicEntities);
 
   nsCOMPtr range(new nsRange);
   NS_ENSURE_TRUE(range, NS_ERROR_OUT_OF_MEMORY);
diff --git a/mozilla/editor/ui/composer/content/ComposerCommands.js b/mozilla/editor/ui/composer/content/ComposerCommands.js
index 9068e1fe20c..9298ac01e0f 100644
--- a/mozilla/editor/ui/composer/content/ComposerCommands.js
+++ b/mozilla/editor/ui/composer/content/ComposerCommands.js
@@ -986,7 +986,10 @@ function OutputFileWithPersistAPI(editorDoc, aDestinationLocation, aRelatedFiles
 // returns output flags based on mimetype, wrapCol and prefs
 function GetOutputFlags(aMimeType, aWrapColumn)
 {
-  var outputFlags = webPersist.ENCODE_FLAGS_ENCODE_ENTITIES;
+  var editor = GetCurrentEditor();
+  var outputFlags = (editor && editor.documentCharacterSet == "ISO-8859-1")
+    ? webPersist.ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES
+    : webPersist.ENCODE_FLAGS_ENCODE_BASIC_ENTITIES;
   if (aMimeType == "text/plain")
   {
     // When saving in "text/plain" format, always do formatting
diff --git a/mozilla/editor/ui/composer/content/editor.js b/mozilla/editor/ui/composer/content/editor.js
index 275ea9ff18a..1b408e01bf2 100644
--- a/mozilla/editor/ui/composer/content/editor.js
+++ b/mozilla/editor/ui/composer/content/editor.js
@@ -1575,7 +1575,9 @@ function SetEditMode(mode)
     }
     // Get the entire document's source string
 
-    var flags = 256; // OutputEncodeEntities;
+    var flags = (editor.documentCharacterSet == "ISO-8859-1")
+      ? 32768  // OutputEncodeLatin1Entities
+      : 16384; // OutputEncodeBasicEntities
 
     try { 
       var prettyPrint = gPrefs.getBoolPref("editor.prettyprint");
diff --git a/mozilla/editor/ui/dialogs/content/EdConvertToTable.js b/mozilla/editor/ui/dialogs/content/EdConvertToTable.js
index 2d1d7b1cee0..c45b1a60e88 100644
--- a/mozilla/editor/ui/dialogs/content/EdConvertToTable.js
+++ b/mozilla/editor/ui/dialogs/content/EdConvertToTable.js
@@ -109,7 +109,6 @@ function onAccept()
   var str;
   try {
     // 1 = OutputSelectionOnly, 1024 = OutputLFLineBreak
-    // 256 = OutputEncodeEntities
     str = editor.outputToString("text/html", 1+1024);
   } catch (e) {}
   if (!str)
diff --git a/mozilla/embedding/components/webbrowserpersist/public/nsIWebBrowserPersist.idl b/mozilla/embedding/components/webbrowserpersist/public/nsIWebBrowserPersist.idl
index 7695da0b2b0..71757dd6b95 100644
--- a/mozilla/embedding/components/webbrowserpersist/public/nsIWebBrowserPersist.idl
+++ b/mozilla/embedding/components/webbrowserpersist/public/nsIWebBrowserPersist.idl
@@ -145,8 +145,15 @@ interface nsIWebBrowserPersist : nsISupports
   const unsigned long ENCODE_FLAGS_FORMAT_FLOWED = 64;
   /** Convert links to absolute links where possible. */
   const unsigned long ENCODE_FLAGS_ABSOLUTE_LINKS = 128;
-  /** Encode entities, e.g. output   instead of character code 0xa0. */
-  const unsigned long ENCODE_FLAGS_ENCODE_ENTITIES = 256;
+
+  /** 
+   * Attempt to encode entities standardized at W3C (HTML, MathML, etc).
+   * This is a catch-all flag for documents with mixed contents. Beware of
+   * interoperability issues. See below for other flags which might likely
+   * do what you want.
+   */
+  const unsigned long ENCODE_FLAGS_ENCODE_W3C_ENTITIES = 256;
+
   /**
    * Output with carriage return line breaks. May also be combined with
    * ENCODE_FLAGS_LF_LINEBREAKS and if neither is specified, the platform
@@ -164,6 +171,23 @@ interface nsIWebBrowserPersist : nsISupports
   /** For plaintext output. Output the content of noframes elements. */
   const unsigned long ENCODE_FLAGS_NOFRAMES_CONTENT = 4096;
 
+  /**
+   * Encode basic entities, e.g. output   instead of character code 0xa0. 
+   * The basic set is just   & < > " for interoperability
+   * with older products that don't support α and friends.
+   */
+  const unsigned long ENCODE_FLAGS_ENCODE_BASIC_ENTITIES = 8192;
+  /**
+   * Encode Latin1 entities. This includes the basic set and
+   * accented letters between 128 and 255.
+   */
+  const unsigned long ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES = 16384;
+  /**
+   * Encode HTML4 entities. This includes the basic set, accented
+   * letters, greek letters and certain special markup symbols.
+   */
+  const unsigned long ENCODE_FLAGS_ENCODE_HTML_ENTITIES = 32768;
+
   /**
    * Save the specified DOM document to file and optionally all linked files
    * (e.g. images, CSS, JS & subframes). Do not call this method until the
diff --git a/mozilla/embedding/components/webbrowserpersist/src/nsWebBrowserPersist.cpp b/mozilla/embedding/components/webbrowserpersist/src/nsWebBrowserPersist.cpp
index 704d0e9319b..68a0b524d2e 100644
--- a/mozilla/embedding/components/webbrowserpersist/src/nsWebBrowserPersist.cpp
+++ b/mozilla/embedding/components/webbrowserpersist/src/nsWebBrowserPersist.cpp
@@ -395,8 +395,14 @@ NS_IMETHODIMP nsWebBrowserPersist::SaveDocument(
         mEncodingFlags |= nsIDocumentEncoder::OutputFormatFlowed;
     if (aEncodingFlags & ENCODE_FLAGS_ABSOLUTE_LINKS)
         mEncodingFlags |= nsIDocumentEncoder::OutputAbsoluteLinks;
-    if (aEncodingFlags & ENCODE_FLAGS_ENCODE_ENTITIES)
-        mEncodingFlags |= nsIDocumentEncoder::OutputEncodeEntities;
+    if (aEncodingFlags & ENCODE_FLAGS_ENCODE_BASIC_ENTITIES)
+        mEncodingFlags |= nsIDocumentEncoder::OutputEncodeBasicEntities;
+    if (aEncodingFlags & ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES)
+        mEncodingFlags |= nsIDocumentEncoder::OutputEncodeLatin1Entities;
+    if (aEncodingFlags & ENCODE_FLAGS_ENCODE_HTML_ENTITIES)
+        mEncodingFlags |= nsIDocumentEncoder::OutputEncodeHTMLEntities;
+    if (aEncodingFlags & ENCODE_FLAGS_ENCODE_W3C_ENTITIES)
+        mEncodingFlags |= nsIDocumentEncoder::OutputEncodeW3CEntities;
     if (aEncodingFlags & ENCODE_FLAGS_CR_LINEBREAKS)
         mEncodingFlags |= nsIDocumentEncoder::OutputCRLineBreak;
     if (aEncodingFlags & ENCODE_FLAGS_LF_LINEBREAKS)
diff --git a/mozilla/extensions/xmlextras/base/src/nsDOMSerializer.cpp b/mozilla/extensions/xmlextras/base/src/nsDOMSerializer.cpp
index 17a4a7ce66c..675f4dadd96 100644
--- a/mozilla/extensions/xmlextras/base/src/nsDOMSerializer.cpp
+++ b/mozilla/extensions/xmlextras/base/src/nsDOMSerializer.cpp
@@ -100,7 +100,7 @@ static nsresult SetUpEncoder(nsIDOMNode *aRoot, const char* aCharset, nsIDocumen
   }
 
   // This method will fail if no document
-  rv = encoder->Init(document,NS_LITERAL_STRING("text/xml"),nsIDocumentEncoder::OutputEncodeEntities);
+  rv = encoder->Init(document,NS_LITERAL_STRING("text/xml"),nsIDocumentEncoder::OutputEncodeBasicEntities);
   if (NS_FAILED(rv))
     return rv;
 
diff --git a/mozilla/layout/generic/nsObjectFrame.cpp b/mozilla/layout/generic/nsObjectFrame.cpp
index da5023daa9d..c13845f4ec6 100644
--- a/mozilla/layout/generic/nsObjectFrame.cpp
+++ b/mozilla/layout/generic/nsObjectFrame.cpp
@@ -2590,7 +2590,7 @@ NS_IMETHODIMP nsPluginInstanceOwner::GetTagText(const char* *result)
         nsCOMPtr docEncoder(do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html", &rv));
         if (NS_FAILED(rv))
             return rv;
-        rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeEntities);
+        rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeBasicEntities);
         if (NS_FAILED(rv))
             return rv;
 
diff --git a/mozilla/layout/html/base/src/nsObjectFrame.cpp b/mozilla/layout/html/base/src/nsObjectFrame.cpp
index da5023daa9d..c13845f4ec6 100644
--- a/mozilla/layout/html/base/src/nsObjectFrame.cpp
+++ b/mozilla/layout/html/base/src/nsObjectFrame.cpp
@@ -2590,7 +2590,7 @@ NS_IMETHODIMP nsPluginInstanceOwner::GetTagText(const char* *result)
         nsCOMPtr docEncoder(do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html", &rv));
         if (NS_FAILED(rv))
             return rv;
-        rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeEntities);
+        rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeBasicEntities);
         if (NS_FAILED(rv))
             return rv;