From 11ea188ef27acae0fa4fc48c58146bd6633017a7 Mon Sep 17 00:00:00 2001
From: "waterson%netscape.com"
 <waterson%netscape.com@18797224-902f-48f8-a5cc-f745e15eee43>
Date: Fri, 26 May 2000 22:24:34 +0000
Subject: [PATCH] Bug 40461. Implement NS_ConvertUCS2toUTF8, r=scc,brendan

git-svn-id: svn://10.0.0.236/trunk@70949 18797224-902f-48f8-a5cc-f745e15eee43
---
 mozilla/string/obsolete/nsString.cpp        | 89 ++++++++++++++++++++-
 mozilla/string/obsolete/nsString.h          | 28 +++++++
 mozilla/string/obsolete/nsString2.cpp       | 81 ++++---------------
 mozilla/xpcom/ds/nsString.cpp               | 89 ++++++++++++++++++++-
 mozilla/xpcom/ds/nsString.h                 | 28 +++++++
 mozilla/xpcom/ds/nsString2.cpp              | 81 ++++---------------
 mozilla/xpcom/string/obsolete/nsString.cpp  | 89 ++++++++++++++++++++-
 mozilla/xpcom/string/obsolete/nsString.h    | 28 +++++++
 mozilla/xpcom/string/obsolete/nsString2.cpp | 81 ++++---------------
 9 files changed, 390 insertions(+), 204 deletions(-)

diff --git a/mozilla/string/obsolete/nsString.cpp b/mozilla/string/obsolete/nsString.cpp
index 127827e8a46..524e02bc5e2 100644
--- a/mozilla/string/obsolete/nsString.cpp
+++ b/mozilla/string/obsolete/nsString.cpp
@@ -1,4 +1,3 @@
-
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  *
  * The contents of this file are subject to the Netscape Public
@@ -1862,6 +1861,94 @@ void nsCString::DebugDump(void) const {
   }
 }
        
+//----------------------------------------------------------------------
+
+NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
+  {
+    // Caculate how many bytes we need
+    const PRUnichar* p;
+    PRInt32 count, utf8len;
+    for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
+      {
+        if (! ((*p) & 0xFF80))
+          utf8len += 1; // 0000 0000 - 0000 007F
+        else if (! ((*p) & 0xF800))
+          utf8len += 2; // 0000 0080 - 0000 07FF
+        else 
+          utf8len += 3; // 0000 0800 - 0000 FFFF
+        // Note: Surrogate pair needs 4 bytes, but in this calcuation
+        // we count it as 6 bytes. It will waste 2 bytes per surrogate pair
+      }
+
+    // Make sure our buffer's big enough, so we don't need to do
+    // multiple allocations.
+    if((utf8len+1) > sizeof(mBuffer))
+      SetCapacity(utf8len+1); 
+
+    char* out = mStr;
+    PRUint32 ucs4=0;
+
+    for (p = aString, utf8len=0, count = aLength; 0 != count && 0 != (*p); count--, p++)
+      {
+        if (0 == ucs4)
+          {
+            if (! ((*p) & 0xFF80))
+              {
+                *out++ = (char)*p;
+              } 
+            else if (! ((*p) & 0xF800))
+              {
+                *out++ = 0xC0 | (char)((*p) >> 6);
+                *out++ = 0x80 | (char)(0x003F & (*p));
+              }
+            else
+              {
+                if (0xD800 == (0xFC00 & (*p))) 
+                  {
+                    // D800- DBFF - High Surrogate 
+                    // N = (H- D800) *400 + 10000 + ...
+                    ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
+                  }
+                else if (0xDC00 == (0xFC00 & (*p)))
+                  { 
+                    // DC00- DFFF - Low Surrogate 
+                    // error here. We should hit High Surrogate first
+                    // Do not output any thing in this case
+                  }
+                else
+                  {
+                    *out++ = 0xE0 | (char)((*p) >> 12);
+                    *out++ = 0x80 | (char)(0x003F & (*p >> 6));
+                    *out++ = 0x80 | (char)(0x003F & (*p) );
+                  }
+              }
+          }
+        else
+          {
+            if (0xDC00 == (0xFC00 & (*p)))
+              { 
+                // DC00- DFFF - Low Surrogate 
+                // N += ( L - DC00 )  
+                ucs4 |= (0x03FF & (*p));
+
+                // 0001 0000-001F FFFF
+                *out++ = 0xF0 | (char)(ucs4 >> 18);
+                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
+                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
+                *out++ = 0x80 | (char)(0x003F & ucs4) ;
+              }
+            else
+              {
+                // Got a High Surrogate but no low surrogate
+                // output nothing.
+              }
+            ucs4 = 0;
+          }
+      }
+
+    *out = '\0'; // null terminate
+  }
+
 
 /***********************************************************************
   IMPLEMENTATION NOTES: AUTOSTRING...
diff --git a/mozilla/string/obsolete/nsString.h b/mozilla/string/obsolete/nsString.h
index 1eff5732764..00c6200a2fe 100644
--- a/mozilla/string/obsolete/nsString.h
+++ b/mozilla/string/obsolete/nsString.h
@@ -870,6 +870,34 @@ NS_DEF_STRING_COMPARISON_OPERATORS(nsCAutoString, char)
 NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char)
 #endif
 
+/**
+ * A helper class that converts a UCS2 string to UTF8
+ */
+class NS_COM NS_ConvertUCS2toUTF8
+      : public nsCAutoString
+    /*
+      ...
+    */
+  {
+    public:
+      NS_ConvertUCS2toUTF8( const PRUnichar* aString )
+        { Init( aString, ~PRUint32(0) /* MAXINT */); }
+
+      NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
+        { Init( aString, aLength ); }
+
+      NS_ConvertUCS2toUTF8( PRUnichar aChar )
+        { Init( &aChar, 1 ); }
+
+    protected:
+      Init( const PRUnichar* aString, PRUint32 aLength );
+
+    private:
+        // NOT TO BE IMPLEMENTED
+      NS_ConvertUCS2toUTF8( char );
+  };
+
+
 /***************************************************************
   The subsumestr class is very unusual. 
   It differs from a normal string in that it doesn't use normal
diff --git a/mozilla/string/obsolete/nsString2.cpp b/mozilla/string/obsolete/nsString2.cpp
index 76e2dd67c1c..aed93b24b5a 100644
--- a/mozilla/string/obsolete/nsString2.cpp
+++ b/mozilla/string/obsolete/nsString2.cpp
@@ -1,4 +1,3 @@
-
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  *
  * The contents of this file are subject to the Netscape Public
@@ -737,75 +736,23 @@ char* nsString::ToNewCString() const {
  * http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html
  */
 char* nsString::ToNewUTF8String() const {
-  nsCString temp("");
-  temp.SetCapacity(8); //ensure that we get an allocated buffer instead of the common empty one.
+  NS_ConvertUCS2toUTF8 temp(mUStr);
 
-  // Caculate how many bytes we need
-  PRUnichar* p;
-  PRInt32 utf8len;
-  for(p = this->mUStr, utf8len=0; 0 != (*p);p++)
-  {
-     if(0x0000 == ((*p) & 0x007F))
-        utf8len += 1; // 0000 0000 - 0000 007F
-     else if(0x0000 == ((*p) & 0x07FF))
-        utf8len += 2; // 0000 0080 - 0000 07FF
-     else 
-        utf8len += 3; // 0000 0800 - 0000 FFFF
-     // Note: Surrogate pair need 4 bytes, but in this caculation
-     // we count as 6 bytes. It will wast 2 bytes per surrogate pair
+  char* result;
+  if (temp.mOwnsBuffer) {
+    // We allocated. Trick the string into not freeing its buffer to
+    // avoid an extra allocation.
+    result = temp.mStr;
+
+    temp.mStr=0;
+    temp.mOwnsBuffer = PR_FALSE;
+  }
+  else {
+    // We didn't allocate a buffer, so we need to copy it out of the
+    // nsCAutoString's storage.
+    result = nsCRT::strdup(temp.mStr);
   }
 
-  if((utf8len+1) > 8)
-     temp.SetCapacity(utf8len+1); 
-
-  char* result=temp.mStr;
-  char* out = result;
-  PRUint32 ucs4=0;
-
-  for(p = this->mUStr, utf8len=0; 0 != (*p);p++)
-  {
-     if(0 == ucs4) {
-       if(0x0000 == ((*p) & 0xFF80)) {
-          *out++ = (char)*p;
-       } else if(0x0000 == ((*p) & 0xF800)) {
-          *out++ = 0xC0 | (char)((*p) >> 6);
-          *out++ = 0x80 | (char)(0x003F & (*p));
-       } else {
-          if( 0xD800 == ( 0xFC00 & (*p))) 
-          { // D800- DBFF - High Surrogate 
-            // N = (H- D800) *400 + 10000 + ...
-            ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
-          } else if( 0xDC00 == ( 0xFC00 & (*p))) { 
-            // DC00- DFFF - Low Surrogate 
-            // error here. We should hit High Surrogate first
-            // Do not output any thing in this case
-          } else {
-            *out++ = 0xE0 | (char)((*p) >> 12);
-            *out++ = 0x80 | (char)(0x003F & (*p >> 6));
-            *out++ = 0x80 | (char)(0x003F & (*p) );
-          }
-       }
-     } else {
-       if( 0xDC00 == (0xFC00 & (*p))) { 
-         // DC00- DFFF - Low Surrogate 
-         // N += ( L - DC00 )  
-         ucs4 |= (0x03FF & (*p));
-         // 0001 0000-001F FFFF
-         *out++ = 0xF0 | (char)(ucs4 >> 18);
-         *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
-         *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
-         *out++ = 0x80 | (char)(0x003F & ucs4) ;
-       } else {
-         // Got a High Surrogate but no low surrogate
-         // output nothing.
-       }
-       ucs4 = 0;
-     }
-  }
-  *out = '\0'; // null terminate
-  temp.mStr=0;
-  temp.mOwnsBuffer=PR_FALSE;
-  
   return result;
 }
 
diff --git a/mozilla/xpcom/ds/nsString.cpp b/mozilla/xpcom/ds/nsString.cpp
index 127827e8a46..524e02bc5e2 100644
--- a/mozilla/xpcom/ds/nsString.cpp
+++ b/mozilla/xpcom/ds/nsString.cpp
@@ -1,4 +1,3 @@
-
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  *
  * The contents of this file are subject to the Netscape Public
@@ -1862,6 +1861,94 @@ void nsCString::DebugDump(void) const {
   }
 }
        
+//----------------------------------------------------------------------
+
+NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
+  {
+    // Caculate how many bytes we need
+    const PRUnichar* p;
+    PRInt32 count, utf8len;
+    for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
+      {
+        if (! ((*p) & 0xFF80))
+          utf8len += 1; // 0000 0000 - 0000 007F
+        else if (! ((*p) & 0xF800))
+          utf8len += 2; // 0000 0080 - 0000 07FF
+        else 
+          utf8len += 3; // 0000 0800 - 0000 FFFF
+        // Note: Surrogate pair needs 4 bytes, but in this calcuation
+        // we count it as 6 bytes. It will waste 2 bytes per surrogate pair
+      }
+
+    // Make sure our buffer's big enough, so we don't need to do
+    // multiple allocations.
+    if((utf8len+1) > sizeof(mBuffer))
+      SetCapacity(utf8len+1); 
+
+    char* out = mStr;
+    PRUint32 ucs4=0;
+
+    for (p = aString, utf8len=0, count = aLength; 0 != count && 0 != (*p); count--, p++)
+      {
+        if (0 == ucs4)
+          {
+            if (! ((*p) & 0xFF80))
+              {
+                *out++ = (char)*p;
+              } 
+            else if (! ((*p) & 0xF800))
+              {
+                *out++ = 0xC0 | (char)((*p) >> 6);
+                *out++ = 0x80 | (char)(0x003F & (*p));
+              }
+            else
+              {
+                if (0xD800 == (0xFC00 & (*p))) 
+                  {
+                    // D800- DBFF - High Surrogate 
+                    // N = (H- D800) *400 + 10000 + ...
+                    ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
+                  }
+                else if (0xDC00 == (0xFC00 & (*p)))
+                  { 
+                    // DC00- DFFF - Low Surrogate 
+                    // error here. We should hit High Surrogate first
+                    // Do not output any thing in this case
+                  }
+                else
+                  {
+                    *out++ = 0xE0 | (char)((*p) >> 12);
+                    *out++ = 0x80 | (char)(0x003F & (*p >> 6));
+                    *out++ = 0x80 | (char)(0x003F & (*p) );
+                  }
+              }
+          }
+        else
+          {
+            if (0xDC00 == (0xFC00 & (*p)))
+              { 
+                // DC00- DFFF - Low Surrogate 
+                // N += ( L - DC00 )  
+                ucs4 |= (0x03FF & (*p));
+
+                // 0001 0000-001F FFFF
+                *out++ = 0xF0 | (char)(ucs4 >> 18);
+                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
+                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
+                *out++ = 0x80 | (char)(0x003F & ucs4) ;
+              }
+            else
+              {
+                // Got a High Surrogate but no low surrogate
+                // output nothing.
+              }
+            ucs4 = 0;
+          }
+      }
+
+    *out = '\0'; // null terminate
+  }
+
 
 /***********************************************************************
   IMPLEMENTATION NOTES: AUTOSTRING...
diff --git a/mozilla/xpcom/ds/nsString.h b/mozilla/xpcom/ds/nsString.h
index 1eff5732764..00c6200a2fe 100644
--- a/mozilla/xpcom/ds/nsString.h
+++ b/mozilla/xpcom/ds/nsString.h
@@ -870,6 +870,34 @@ NS_DEF_STRING_COMPARISON_OPERATORS(nsCAutoString, char)
 NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char)
 #endif
 
+/**
+ * A helper class that converts a UCS2 string to UTF8
+ */
+class NS_COM NS_ConvertUCS2toUTF8
+      : public nsCAutoString
+    /*
+      ...
+    */
+  {
+    public:
+      NS_ConvertUCS2toUTF8( const PRUnichar* aString )
+        { Init( aString, ~PRUint32(0) /* MAXINT */); }
+
+      NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
+        { Init( aString, aLength ); }
+
+      NS_ConvertUCS2toUTF8( PRUnichar aChar )
+        { Init( &aChar, 1 ); }
+
+    protected:
+      Init( const PRUnichar* aString, PRUint32 aLength );
+
+    private:
+        // NOT TO BE IMPLEMENTED
+      NS_ConvertUCS2toUTF8( char );
+  };
+
+
 /***************************************************************
   The subsumestr class is very unusual. 
   It differs from a normal string in that it doesn't use normal
diff --git a/mozilla/xpcom/ds/nsString2.cpp b/mozilla/xpcom/ds/nsString2.cpp
index 76e2dd67c1c..aed93b24b5a 100644
--- a/mozilla/xpcom/ds/nsString2.cpp
+++ b/mozilla/xpcom/ds/nsString2.cpp
@@ -1,4 +1,3 @@
-
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  *
  * The contents of this file are subject to the Netscape Public
@@ -737,75 +736,23 @@ char* nsString::ToNewCString() const {
  * http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html
  */
 char* nsString::ToNewUTF8String() const {
-  nsCString temp("");
-  temp.SetCapacity(8); //ensure that we get an allocated buffer instead of the common empty one.
+  NS_ConvertUCS2toUTF8 temp(mUStr);
 
-  // Caculate how many bytes we need
-  PRUnichar* p;
-  PRInt32 utf8len;
-  for(p = this->mUStr, utf8len=0; 0 != (*p);p++)
-  {
-     if(0x0000 == ((*p) & 0x007F))
-        utf8len += 1; // 0000 0000 - 0000 007F
-     else if(0x0000 == ((*p) & 0x07FF))
-        utf8len += 2; // 0000 0080 - 0000 07FF
-     else 
-        utf8len += 3; // 0000 0800 - 0000 FFFF
-     // Note: Surrogate pair need 4 bytes, but in this caculation
-     // we count as 6 bytes. It will wast 2 bytes per surrogate pair
+  char* result;
+  if (temp.mOwnsBuffer) {
+    // We allocated. Trick the string into not freeing its buffer to
+    // avoid an extra allocation.
+    result = temp.mStr;
+
+    temp.mStr=0;
+    temp.mOwnsBuffer = PR_FALSE;
+  }
+  else {
+    // We didn't allocate a buffer, so we need to copy it out of the
+    // nsCAutoString's storage.
+    result = nsCRT::strdup(temp.mStr);
   }
 
-  if((utf8len+1) > 8)
-     temp.SetCapacity(utf8len+1); 
-
-  char* result=temp.mStr;
-  char* out = result;
-  PRUint32 ucs4=0;
-
-  for(p = this->mUStr, utf8len=0; 0 != (*p);p++)
-  {
-     if(0 == ucs4) {
-       if(0x0000 == ((*p) & 0xFF80)) {
-          *out++ = (char)*p;
-       } else if(0x0000 == ((*p) & 0xF800)) {
-          *out++ = 0xC0 | (char)((*p) >> 6);
-          *out++ = 0x80 | (char)(0x003F & (*p));
-       } else {
-          if( 0xD800 == ( 0xFC00 & (*p))) 
-          { // D800- DBFF - High Surrogate 
-            // N = (H- D800) *400 + 10000 + ...
-            ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
-          } else if( 0xDC00 == ( 0xFC00 & (*p))) { 
-            // DC00- DFFF - Low Surrogate 
-            // error here. We should hit High Surrogate first
-            // Do not output any thing in this case
-          } else {
-            *out++ = 0xE0 | (char)((*p) >> 12);
-            *out++ = 0x80 | (char)(0x003F & (*p >> 6));
-            *out++ = 0x80 | (char)(0x003F & (*p) );
-          }
-       }
-     } else {
-       if( 0xDC00 == (0xFC00 & (*p))) { 
-         // DC00- DFFF - Low Surrogate 
-         // N += ( L - DC00 )  
-         ucs4 |= (0x03FF & (*p));
-         // 0001 0000-001F FFFF
-         *out++ = 0xF0 | (char)(ucs4 >> 18);
-         *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
-         *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
-         *out++ = 0x80 | (char)(0x003F & ucs4) ;
-       } else {
-         // Got a High Surrogate but no low surrogate
-         // output nothing.
-       }
-       ucs4 = 0;
-     }
-  }
-  *out = '\0'; // null terminate
-  temp.mStr=0;
-  temp.mOwnsBuffer=PR_FALSE;
-  
   return result;
 }
 
diff --git a/mozilla/xpcom/string/obsolete/nsString.cpp b/mozilla/xpcom/string/obsolete/nsString.cpp
index 127827e8a46..524e02bc5e2 100644
--- a/mozilla/xpcom/string/obsolete/nsString.cpp
+++ b/mozilla/xpcom/string/obsolete/nsString.cpp
@@ -1,4 +1,3 @@
-
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  *
  * The contents of this file are subject to the Netscape Public
@@ -1862,6 +1861,94 @@ void nsCString::DebugDump(void) const {
   }
 }
        
+//----------------------------------------------------------------------
+
+NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength )
+  {
+    // Caculate how many bytes we need
+    const PRUnichar* p;
+    PRInt32 count, utf8len;
+    for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
+      {
+        if (! ((*p) & 0xFF80))
+          utf8len += 1; // 0000 0000 - 0000 007F
+        else if (! ((*p) & 0xF800))
+          utf8len += 2; // 0000 0080 - 0000 07FF
+        else 
+          utf8len += 3; // 0000 0800 - 0000 FFFF
+        // Note: Surrogate pair needs 4 bytes, but in this calcuation
+        // we count it as 6 bytes. It will waste 2 bytes per surrogate pair
+      }
+
+    // Make sure our buffer's big enough, so we don't need to do
+    // multiple allocations.
+    if((utf8len+1) > sizeof(mBuffer))
+      SetCapacity(utf8len+1); 
+
+    char* out = mStr;
+    PRUint32 ucs4=0;
+
+    for (p = aString, utf8len=0, count = aLength; 0 != count && 0 != (*p); count--, p++)
+      {
+        if (0 == ucs4)
+          {
+            if (! ((*p) & 0xFF80))
+              {
+                *out++ = (char)*p;
+              } 
+            else if (! ((*p) & 0xF800))
+              {
+                *out++ = 0xC0 | (char)((*p) >> 6);
+                *out++ = 0x80 | (char)(0x003F & (*p));
+              }
+            else
+              {
+                if (0xD800 == (0xFC00 & (*p))) 
+                  {
+                    // D800- DBFF - High Surrogate 
+                    // N = (H- D800) *400 + 10000 + ...
+                    ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
+                  }
+                else if (0xDC00 == (0xFC00 & (*p)))
+                  { 
+                    // DC00- DFFF - Low Surrogate 
+                    // error here. We should hit High Surrogate first
+                    // Do not output any thing in this case
+                  }
+                else
+                  {
+                    *out++ = 0xE0 | (char)((*p) >> 12);
+                    *out++ = 0x80 | (char)(0x003F & (*p >> 6));
+                    *out++ = 0x80 | (char)(0x003F & (*p) );
+                  }
+              }
+          }
+        else
+          {
+            if (0xDC00 == (0xFC00 & (*p)))
+              { 
+                // DC00- DFFF - Low Surrogate 
+                // N += ( L - DC00 )  
+                ucs4 |= (0x03FF & (*p));
+
+                // 0001 0000-001F FFFF
+                *out++ = 0xF0 | (char)(ucs4 >> 18);
+                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
+                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
+                *out++ = 0x80 | (char)(0x003F & ucs4) ;
+              }
+            else
+              {
+                // Got a High Surrogate but no low surrogate
+                // output nothing.
+              }
+            ucs4 = 0;
+          }
+      }
+
+    *out = '\0'; // null terminate
+  }
+
 
 /***********************************************************************
   IMPLEMENTATION NOTES: AUTOSTRING...
diff --git a/mozilla/xpcom/string/obsolete/nsString.h b/mozilla/xpcom/string/obsolete/nsString.h
index 1eff5732764..00c6200a2fe 100644
--- a/mozilla/xpcom/string/obsolete/nsString.h
+++ b/mozilla/xpcom/string/obsolete/nsString.h
@@ -870,6 +870,34 @@ NS_DEF_STRING_COMPARISON_OPERATORS(nsCAutoString, char)
 NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char)
 #endif
 
+/**
+ * A helper class that converts a UCS2 string to UTF8
+ */
+class NS_COM NS_ConvertUCS2toUTF8
+      : public nsCAutoString
+    /*
+      ...
+    */
+  {
+    public:
+      NS_ConvertUCS2toUTF8( const PRUnichar* aString )
+        { Init( aString, ~PRUint32(0) /* MAXINT */); }
+
+      NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
+        { Init( aString, aLength ); }
+
+      NS_ConvertUCS2toUTF8( PRUnichar aChar )
+        { Init( &aChar, 1 ); }
+
+    protected:
+      Init( const PRUnichar* aString, PRUint32 aLength );
+
+    private:
+        // NOT TO BE IMPLEMENTED
+      NS_ConvertUCS2toUTF8( char );
+  };
+
+
 /***************************************************************
   The subsumestr class is very unusual. 
   It differs from a normal string in that it doesn't use normal
diff --git a/mozilla/xpcom/string/obsolete/nsString2.cpp b/mozilla/xpcom/string/obsolete/nsString2.cpp
index 76e2dd67c1c..aed93b24b5a 100644
--- a/mozilla/xpcom/string/obsolete/nsString2.cpp
+++ b/mozilla/xpcom/string/obsolete/nsString2.cpp
@@ -1,4 +1,3 @@
-
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  *
  * The contents of this file are subject to the Netscape Public
@@ -737,75 +736,23 @@ char* nsString::ToNewCString() const {
  * http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html
  */
 char* nsString::ToNewUTF8String() const {
-  nsCString temp("");
-  temp.SetCapacity(8); //ensure that we get an allocated buffer instead of the common empty one.
+  NS_ConvertUCS2toUTF8 temp(mUStr);
 
-  // Caculate how many bytes we need
-  PRUnichar* p;
-  PRInt32 utf8len;
-  for(p = this->mUStr, utf8len=0; 0 != (*p);p++)
-  {
-     if(0x0000 == ((*p) & 0x007F))
-        utf8len += 1; // 0000 0000 - 0000 007F
-     else if(0x0000 == ((*p) & 0x07FF))
-        utf8len += 2; // 0000 0080 - 0000 07FF
-     else 
-        utf8len += 3; // 0000 0800 - 0000 FFFF
-     // Note: Surrogate pair need 4 bytes, but in this caculation
-     // we count as 6 bytes. It will wast 2 bytes per surrogate pair
+  char* result;
+  if (temp.mOwnsBuffer) {
+    // We allocated. Trick the string into not freeing its buffer to
+    // avoid an extra allocation.
+    result = temp.mStr;
+
+    temp.mStr=0;
+    temp.mOwnsBuffer = PR_FALSE;
+  }
+  else {
+    // We didn't allocate a buffer, so we need to copy it out of the
+    // nsCAutoString's storage.
+    result = nsCRT::strdup(temp.mStr);
   }
 
-  if((utf8len+1) > 8)
-     temp.SetCapacity(utf8len+1); 
-
-  char* result=temp.mStr;
-  char* out = result;
-  PRUint32 ucs4=0;
-
-  for(p = this->mUStr, utf8len=0; 0 != (*p);p++)
-  {
-     if(0 == ucs4) {
-       if(0x0000 == ((*p) & 0xFF80)) {
-          *out++ = (char)*p;
-       } else if(0x0000 == ((*p) & 0xF800)) {
-          *out++ = 0xC0 | (char)((*p) >> 6);
-          *out++ = 0x80 | (char)(0x003F & (*p));
-       } else {
-          if( 0xD800 == ( 0xFC00 & (*p))) 
-          { // D800- DBFF - High Surrogate 
-            // N = (H- D800) *400 + 10000 + ...
-            ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
-          } else if( 0xDC00 == ( 0xFC00 & (*p))) { 
-            // DC00- DFFF - Low Surrogate 
-            // error here. We should hit High Surrogate first
-            // Do not output any thing in this case
-          } else {
-            *out++ = 0xE0 | (char)((*p) >> 12);
-            *out++ = 0x80 | (char)(0x003F & (*p >> 6));
-            *out++ = 0x80 | (char)(0x003F & (*p) );
-          }
-       }
-     } else {
-       if( 0xDC00 == (0xFC00 & (*p))) { 
-         // DC00- DFFF - Low Surrogate 
-         // N += ( L - DC00 )  
-         ucs4 |= (0x03FF & (*p));
-         // 0001 0000-001F FFFF
-         *out++ = 0xF0 | (char)(ucs4 >> 18);
-         *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
-         *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
-         *out++ = 0x80 | (char)(0x003F & ucs4) ;
-       } else {
-         // Got a High Surrogate but no low surrogate
-         // output nothing.
-       }
-       ucs4 = 0;
-     }
-  }
-  *out = '\0'; // null terminate
-  temp.mStr=0;
-  temp.mOwnsBuffer=PR_FALSE;
-  
   return result;
 }