Move the core of NS_ConvertUCS2toUTF8 into character sinks in nsUTF8Utils.h, and use them to make ToNewUTF8String faster. Fix bug in surrogate handling in the moved code. Fix null-termination bug in UTF8ToNewUnicode. b=206682 r=jag sr=alecf a=brendan

git-svn-id: svn://10.0.0.236/trunk@142764 18797224-902f-48f8-a5cc-f745e15eee43
2003-05-22 21:25:43 +00:00
parent 4013205ecb
commit ec7a16fd6d
10 changed files with 424 additions and 258 deletions
--- a/mozilla/xpcom/string/obsolete/nsString.cpp
+++ b/mozilla/xpcom/string/obsolete/nsString.cpp
@@ -45,6 +45,7 @@
 #include "nsString.h"
 #include "nsReadableUtils.h"
 #include "nsDebug.h"
+#include "nsUTF8Utils.h"

 #ifndef nsCharTraits_h___
 #include "nsCharTraits.h"
@@ -54,8 +55,10 @@
 #include "prdtoa.h"
 #endif

+#ifdef DEBUG
 static const char* kPossibleNull = "Error: possible unintended null in string";
 static const char* kNullPointerError = "Error: unexpected null ptr";
+#endif
 static const char* kWhitespace="\b\t\r\n ";

 const nsBufferHandle<char>*
@@ -1084,111 +1087,47 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P

 //----------------------------------------------------------------------

-NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
+NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
  {
-    nsAString::const_iterator start; aString.BeginReading(start);
-    nsAString::const_iterator end;   aString.EndReading(end);
-    
-    while (start != end) {
-      nsReadableFragment<PRUnichar> frag(start.fragment());
-      Append(frag.mStart, frag.mEnd - frag.mStart);
-      start.advance(start.size_forward());
-    }
+    if (!aString)
+      // Leave us as an uninitialized nsCAutoString.
+      return;
+    Init(nsDependentString(aString));
  }

-void
-NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength )
+NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
  {
-    // Handle null string by just leaving us as a brand-new
-    // uninitialized nsCAutoString.
-    if (! aString)
+    if (!aString)
+      // Leave us as an uninitialized nsCAutoString.
      return;
+    Init(Substring(aString, aString + aLength));
+  }

-    // Calculate how many bytes we need
-    const PRUnichar* p;
-    PRInt32 count, utf8len;
-    for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
-      {
-        if (! ((*p) & 0xFF80))
-          utf8len += 1; // 0000 0000 - 0000 007F
-        else if (! ((*p) & 0xF800))
-          utf8len += 2; // 0000 0080 - 0000 07FF
-        else 
-          utf8len += 3; // 0000 0800 - 0000 FFFF
-        // Note: Surrogate pair needs 4 bytes, but in this calcuation
-        // we count it as 6 bytes. It will waste 2 bytes per surrogate pair
+void NS_ConvertUCS2toUTF8::Init( const nsAString& aString )
+  {
+    // Compute space required: do this once so we don't incur multiple
+    // allocations. This "optimization" is probably of dubious value...
+
+    nsAString::const_iterator start, end;
+    CalculateUTF8Size calculator;
+    copy_string(aString.BeginReading(start), aString.EndReading(end), calculator);
+
+    PRUint32 count = calculator.Size();
+
+    if (count) {
+      // Grow the buffer if we need to.
+      SetLength(count);
+
+      // All ready? Time to convert
+
+      ConvertUCS2toUTF8 converter(mStr);
+      copy_string(aString.BeginReading(start), aString.EndReading(end), converter);
+      mLength = converter.Size();
+      if (mLength != count) {
+        NS_ERROR("Input invalid or incorrect length was calculated");
+        Truncate();
      }
-
-    // Make sure our buffer's big enough, so we don't need to do
-    // multiple allocations.
-    if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer))
-      SetCapacity(mLength+utf8len+1);
-    // |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h),
-    //  we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want
-
-    char* out = mStr+mLength;
-    PRUint32 ucs4=0;
-
-    for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++)
-      {
-        if (0 == ucs4)
-          {
-            if (! ((*p) & 0xFF80))
-              {
-                *out++ = (char)*p;
-              } 
-            else if (! ((*p) & 0xF800))
-              {
-                *out++ = 0xC0 | (char)((*p) >> 6);
-                *out++ = 0x80 | (char)(0x003F & (*p));
-              }
-            else
-              {
-                if (0xD800 == (0xFC00 & (*p))) 
-                  {
-                    // D800- DBFF - High Surrogate 
-                    // N = (H- D800) *400 + 10000 + ...
-                    ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
-                  }
-                else if (0xDC00 == (0xFC00 & (*p)))
-                  { 
-                    // DC00- DFFF - Low Surrogate 
-                    // error here. We should hit High Surrogate first
-                    // Do not output any thing in this case
-                  }
-                else
-                  {
-                    *out++ = 0xE0 | (char)((*p) >> 12);
-                    *out++ = 0x80 | (char)(0x003F & (*p >> 6));
-                    *out++ = 0x80 | (char)(0x003F & (*p) );
-                  }
-              }
-          }
-        else
-          {
-            if (0xDC00 == (0xFC00 & (*p)))
-              { 
-                // DC00- DFFF - Low Surrogate 
-                // N += ( L - DC00 )  
-                ucs4 |= (0x03FF & (*p));
-
-                // 0001 0000-001F FFFF
-                *out++ = 0xF0 | (char)(ucs4 >> 18);
-                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
-                *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
-                *out++ = 0x80 | (char)(0x003F & ucs4) ;
-              }
-            else
-              {
-                // Got a High Surrogate but no low surrogate
-                // output nothing.
-              }
-            ucs4 = 0;
-          }
-      }
-
-    *out = '\0'; // null terminate
-    mLength += utf8len;
+    }
  }

 NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )