diff --git a/mozilla/string/obsolete/nsString.cpp b/mozilla/string/obsolete/nsString.cpp index e816e7beee8..d7e982f177f 100644 --- a/mozilla/string/obsolete/nsString.cpp +++ b/mozilla/string/obsolete/nsString.cpp @@ -45,6 +45,7 @@ #include "nsString.h" #include "nsReadableUtils.h" #include "nsDebug.h" +#include "nsUTF8Utils.h" #ifndef nsCharTraits_h___ #include "nsCharTraits.h" @@ -54,8 +55,10 @@ #include "prdtoa.h" #endif +#ifdef DEBUG static const char* kPossibleNull = "Error: possible unintended null in string"; static const char* kNullPointerError = "Error: unexpected null ptr"; +#endif static const char* kWhitespace="\b\t\r\n "; const nsBufferHandle* @@ -1084,111 +1087,47 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P //---------------------------------------------------------------------- -NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString ) +NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString ) { - nsAString::const_iterator start; aString.BeginReading(start); - nsAString::const_iterator end; aString.EndReading(end); - - while (start != end) { - nsReadableFragment frag(start.fragment()); - Append(frag.mStart, frag.mEnd - frag.mStart); - start.advance(start.size_forward()); - } + if (!aString) + // Leave us as an uninitialized nsCAutoString. + return; + Init(nsDependentString(aString)); } -void -NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength ) +NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) { - // Handle null string by just leaving us as a brand-new - // uninitialized nsCAutoString. - if (! aString) + if (!aString) + // Leave us as an uninitialized nsCAutoString. return; + Init(Substring(aString, aString + aLength)); + } - // Calculate how many bytes we need - const PRUnichar* p; - PRInt32 count, utf8len; - for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++) - { - if (! ((*p) & 0xFF80)) - utf8len += 1; // 0000 0000 - 0000 007F - else if (! ((*p) & 0xF800)) - utf8len += 2; // 0000 0080 - 0000 07FF - else - utf8len += 3; // 0000 0800 - 0000 FFFF - // Note: Surrogate pair needs 4 bytes, but in this calcuation - // we count it as 6 bytes. It will waste 2 bytes per surrogate pair +void NS_ConvertUCS2toUTF8::Init( const nsAString& aString ) + { + // Compute space required: do this once so we don't incur multiple + // allocations. This "optimization" is probably of dubious value... + + nsAString::const_iterator start, end; + CalculateUTF8Size calculator; + copy_string(aString.BeginReading(start), aString.EndReading(end), calculator); + + PRUint32 count = calculator.Size(); + + if (count) { + // Grow the buffer if we need to. + SetLength(count); + + // All ready? Time to convert + + ConvertUCS2toUTF8 converter(mStr); + copy_string(aString.BeginReading(start), aString.EndReading(end), converter); + mLength = converter.Size(); + if (mLength != count) { + NS_ERROR("Input invalid or incorrect length was calculated"); + Truncate(); } - - // Make sure our buffer's big enough, so we don't need to do - // multiple allocations. - if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer)) - SetCapacity(mLength+utf8len+1); - // |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h), - // we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want - - char* out = mStr+mLength; - PRUint32 ucs4=0; - - for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++) - { - if (0 == ucs4) - { - if (! ((*p) & 0xFF80)) - { - *out++ = (char)*p; - } - else if (! ((*p) & 0xF800)) - { - *out++ = 0xC0 | (char)((*p) >> 6); - *out++ = 0x80 | (char)(0x003F & (*p)); - } - else - { - if (0xD800 == (0xFC00 & (*p))) - { - // D800- DBFF - High Surrogate - // N = (H- D800) *400 + 10000 + ... - ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); - } - else if (0xDC00 == (0xFC00 & (*p))) - { - // DC00- DFFF - Low Surrogate - // error here. We should hit High Surrogate first - // Do not output any thing in this case - } - else - { - *out++ = 0xE0 | (char)((*p) >> 12); - *out++ = 0x80 | (char)(0x003F & (*p >> 6)); - *out++ = 0x80 | (char)(0x003F & (*p) ); - } - } - } - else - { - if (0xDC00 == (0xFC00 & (*p))) - { - // DC00- DFFF - Low Surrogate - // N += ( L - DC00 ) - ucs4 |= (0x03FF & (*p)); - - // 0001 0000-001F FFFF - *out++ = 0xF0 | (char)(ucs4 >> 18); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); - *out++ = 0x80 | (char)(0x003F & ucs4) ; - } - else - { - // Got a High Surrogate but no low surrogate - // output nothing. - } - ucs4 = 0; - } - } - - *out = '\0'; // null terminate - mLength += utf8len; + } } NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString ) diff --git a/mozilla/string/obsolete/nsString.h b/mozilla/string/obsolete/nsString.h index 5ce0fe71a1e..26253147f63 100644 --- a/mozilla/string/obsolete/nsString.h +++ b/mozilla/string/obsolete/nsString.h @@ -431,24 +431,15 @@ class NS_COM NS_ConvertUCS2toUTF8 */ { public: - friend NS_COM char* ToNewUTF8String( const nsAString& aSource ); - - public: - explicit - NS_ConvertUCS2toUTF8( const PRUnichar* aString ) + explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString ); + NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ); + explicit NS_ConvertUCS2toUTF8( const nsAString& aString ) { - Append( aString, ~PRUint32(0) /* MAXINT */); + Init(aString); } - NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) - { - Append( aString, aLength ); - } - - explicit NS_ConvertUCS2toUTF8( const nsAString& aString ); - protected: - void Append( const PRUnichar* aString, PRUint32 aLength ); + void Init( const nsAString& aString ); private: // NOT TO BE IMPLEMENTED diff --git a/mozilla/string/obsolete/nsString2.cpp b/mozilla/string/obsolete/nsString2.cpp index c0edfa08f75..ef304455a1e 100644 --- a/mozilla/string/obsolete/nsString2.cpp +++ b/mozilla/string/obsolete/nsString2.cpp @@ -54,8 +54,10 @@ #include "prdtoa.h" #endif +#ifdef DEBUG static const char* kPossibleNull = "Error: possible unintended null in string"; static const char* kNullPointerError = "Error: unexpected null ptr"; +#endif static const char* kWhitespace="\b\t\r\n "; const nsBufferHandle* diff --git a/mozilla/string/public/nsUTF8Utils.h b/mozilla/string/public/nsUTF8Utils.h index 7d3db81d58d..a575564198a 100644 --- a/mozilla/string/public/nsUTF8Utils.h +++ b/mozilla/string/public/nsUTF8Utils.h @@ -54,6 +54,10 @@ class UTF8traits #define PLANE1_BASE 0x00010000 #define UCS2_REPLACEMENT_CHAR 0xfffd +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for converting + * UTF-8 to UCS2 (really UTF-16). + */ class ConvertUTF8toUCS2 { public: @@ -181,12 +185,21 @@ class ConvertUTF8toUCS2 return p - start; } + void write_terminator() + { + *mBuffer = buffer_type(0); + } + private: buffer_type* mStart; buffer_type* mBuffer; PRBool mErrorEncountered; }; +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for computing + * the length of a UTF-8 string. + */ class CalculateUTF8Length { public: @@ -242,4 +255,148 @@ class CalculateUTF8Length PRBool mErrorEncountered; }; +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for converting + * UCS2 (really UTF-16) to UTF-8. + */ +class ConvertUCS2toUTF8 + { + public: + typedef nsAString::char_type value_type; + typedef nsACString::char_type buffer_type; + + // The error handling here is more lenient than that in + // |ConvertUTF8toUCS2|, but it's that way for backwards + // compatibility. + + ConvertUCS2toUTF8( buffer_type* aBuffer ) + : mStart(aBuffer), mBuffer(aBuffer) {} + + size_t Size() const { return mBuffer - mStart; } + + PRUint32 write( const value_type* start, PRUint32 N ) + { + for (const value_type *p = start, *end = start + N; p < end; ++p ) + { + value_type c = *p; + if (! (c & 0xFF80)) // U+0000 - U+007F + { + *mBuffer++ = (char)c; + } + else if (! (c & 0xF800)) // U+0100 - U+07FF + { + *mBuffer++ = 0xC0 | (char)(c >> 6); + *mBuffer++ = 0x80 | (char)(0x003F & c); + } + else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF + { + // D800- DBFF - High Surrogate + // N = (H- D800) *400 + 10000 + ... + PRUint32 ucs4 = 0x10000 + ((0x03FF & c) << 10); + + ++p; + if (p == end) + { + NS_ERROR("Surrogate pair split between fragments"); + return N; + } + c = *p; + + if (0xDC00 == (0xFC00 & c)) + { + // DC00- DFFF - Low Surrogate + // N += ( L - DC00 ) + ucs4 |= (0x03FF & c); + + // 0001 0000-001F FFFF + *mBuffer++ = 0xF0 | (char)(ucs4 >> 18); + *mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); + *mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); + *mBuffer++ = 0x80 | (char)(0x003F & ucs4) ; + } + else + { + NS_ERROR("got a High Surrogate but no low surrogate"); + // output nothing. + } + } + else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF + { + // DC00- DFFF - Low Surrogate + NS_ERROR("got a low Surrogate but no high surrogate"); + // output nothing. + } + else // U+0800 - U+D7FF, U+E000 - U+FFFF + { + *mBuffer++ = 0xE0 | (char)(c >> 12); + *mBuffer++ = 0x80 | (char)(0x003F & (c >> 6)); + *mBuffer++ = 0x80 | (char)(0x003F & c ); + } + } + + return N; + } + + void write_terminator() + { + *mBuffer = buffer_type(0); + } + + private: + buffer_type* mStart; + buffer_type* mBuffer; + }; + +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for computing + * the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8. + */ +class CalculateUTF8Size + { + public: + typedef nsAString::char_type value_type; + + CalculateUTF8Size() + : mSize(0) { } + + size_t Size() const { return mSize; } + + PRUint32 write( const value_type* start, PRUint32 N ) + { + // Assume UCS2 surrogate pairs won't be spread across fragments. + for (const value_type *p = start, *end = start + N; p < end; ++p ) + { + value_type c = *p; + if (! (c & 0xFF80)) // U+0000 - U+007F + mSize += 1; + else if (! (c & 0xF800)) // U+0100 - U+07FF + mSize += 2; + else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF + { + ++p; + if (p == end) + { + NS_ERROR("Surrogate pair split between fragments"); + return N; + } + c = *p; + + if (0xDC00 == (0xFC00 & c)) + mSize += 4; + else + NS_ERROR("got a high Surrogate but no low surrogate"); + } + else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF + NS_ERROR("got a low Surrogate but no high surrogate"); + else // U+0800 - U+D7FF, U+E000 - U+FFFF + mSize += 3; + } + + return N; + } + + private: + size_t mSize; + }; + #endif /* !defined(nsUTF8Utils_h_) */ diff --git a/mozilla/string/src/nsReadableUtils.cpp b/mozilla/string/src/nsReadableUtils.cpp index b9a74ec62dc..0cf14c09dd2 100755 --- a/mozilla/string/src/nsReadableUtils.cpp +++ b/mozilla/string/src/nsReadableUtils.cpp @@ -209,24 +209,18 @@ NS_COM char* ToNewUTF8String( const nsAString& aSource ) { - // XXX The conversion code in NS_ConvertUCS2toUTF8 needs to be - // refactored so that we can use it here without a double-copy. - NS_ConvertUCS2toUTF8 temp(aSource); + nsAString::const_iterator start, end; + CalculateUTF8Size calculator; + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + calculator); - char* result; - if (temp.GetOwnsBuffer()) { - // We allocated. Trick the string into not freeing its buffer to - // avoid an extra allocation. - result = temp.mStr; + char *result = NS_STATIC_CAST(char*, + nsMemory::Alloc(calculator.Size() + 1)); - temp.mStr=0; - temp.SetOwnsBuffer(PR_FALSE); - } - else { - // We didn't allocate a buffer, so we need to copy it out of the - // nsCAutoString's storage. - result = ToNewCString(temp); - } + ConvertUCS2toUTF8 converter(result); + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + converter).write_terminator(); + NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); return result; } @@ -285,7 +279,7 @@ UTF8ToNewUnicode( const nsACString& aSource ) ConvertUTF8toUCS2 converter(result); copy_string(aSource.BeginReading(start), aSource.EndReading(end), - converter); + converter).write_terminator(); NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch"); return result; diff --git a/mozilla/xpcom/string/obsolete/nsString.cpp b/mozilla/xpcom/string/obsolete/nsString.cpp index e816e7beee8..d7e982f177f 100644 --- a/mozilla/xpcom/string/obsolete/nsString.cpp +++ b/mozilla/xpcom/string/obsolete/nsString.cpp @@ -45,6 +45,7 @@ #include "nsString.h" #include "nsReadableUtils.h" #include "nsDebug.h" +#include "nsUTF8Utils.h" #ifndef nsCharTraits_h___ #include "nsCharTraits.h" @@ -54,8 +55,10 @@ #include "prdtoa.h" #endif +#ifdef DEBUG static const char* kPossibleNull = "Error: possible unintended null in string"; static const char* kNullPointerError = "Error: unexpected null ptr"; +#endif static const char* kWhitespace="\b\t\r\n "; const nsBufferHandle* @@ -1084,111 +1087,47 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P //---------------------------------------------------------------------- -NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString ) +NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString ) { - nsAString::const_iterator start; aString.BeginReading(start); - nsAString::const_iterator end; aString.EndReading(end); - - while (start != end) { - nsReadableFragment frag(start.fragment()); - Append(frag.mStart, frag.mEnd - frag.mStart); - start.advance(start.size_forward()); - } + if (!aString) + // Leave us as an uninitialized nsCAutoString. + return; + Init(nsDependentString(aString)); } -void -NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength ) +NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) { - // Handle null string by just leaving us as a brand-new - // uninitialized nsCAutoString. - if (! aString) + if (!aString) + // Leave us as an uninitialized nsCAutoString. return; + Init(Substring(aString, aString + aLength)); + } - // Calculate how many bytes we need - const PRUnichar* p; - PRInt32 count, utf8len; - for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++) - { - if (! ((*p) & 0xFF80)) - utf8len += 1; // 0000 0000 - 0000 007F - else if (! ((*p) & 0xF800)) - utf8len += 2; // 0000 0080 - 0000 07FF - else - utf8len += 3; // 0000 0800 - 0000 FFFF - // Note: Surrogate pair needs 4 bytes, but in this calcuation - // we count it as 6 bytes. It will waste 2 bytes per surrogate pair +void NS_ConvertUCS2toUTF8::Init( const nsAString& aString ) + { + // Compute space required: do this once so we don't incur multiple + // allocations. This "optimization" is probably of dubious value... + + nsAString::const_iterator start, end; + CalculateUTF8Size calculator; + copy_string(aString.BeginReading(start), aString.EndReading(end), calculator); + + PRUint32 count = calculator.Size(); + + if (count) { + // Grow the buffer if we need to. + SetLength(count); + + // All ready? Time to convert + + ConvertUCS2toUTF8 converter(mStr); + copy_string(aString.BeginReading(start), aString.EndReading(end), converter); + mLength = converter.Size(); + if (mLength != count) { + NS_ERROR("Input invalid or incorrect length was calculated"); + Truncate(); } - - // Make sure our buffer's big enough, so we don't need to do - // multiple allocations. - if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer)) - SetCapacity(mLength+utf8len+1); - // |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h), - // we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want - - char* out = mStr+mLength; - PRUint32 ucs4=0; - - for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++) - { - if (0 == ucs4) - { - if (! ((*p) & 0xFF80)) - { - *out++ = (char)*p; - } - else if (! ((*p) & 0xF800)) - { - *out++ = 0xC0 | (char)((*p) >> 6); - *out++ = 0x80 | (char)(0x003F & (*p)); - } - else - { - if (0xD800 == (0xFC00 & (*p))) - { - // D800- DBFF - High Surrogate - // N = (H- D800) *400 + 10000 + ... - ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); - } - else if (0xDC00 == (0xFC00 & (*p))) - { - // DC00- DFFF - Low Surrogate - // error here. We should hit High Surrogate first - // Do not output any thing in this case - } - else - { - *out++ = 0xE0 | (char)((*p) >> 12); - *out++ = 0x80 | (char)(0x003F & (*p >> 6)); - *out++ = 0x80 | (char)(0x003F & (*p) ); - } - } - } - else - { - if (0xDC00 == (0xFC00 & (*p))) - { - // DC00- DFFF - Low Surrogate - // N += ( L - DC00 ) - ucs4 |= (0x03FF & (*p)); - - // 0001 0000-001F FFFF - *out++ = 0xF0 | (char)(ucs4 >> 18); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); - *out++ = 0x80 | (char)(0x003F & ucs4) ; - } - else - { - // Got a High Surrogate but no low surrogate - // output nothing. - } - ucs4 = 0; - } - } - - *out = '\0'; // null terminate - mLength += utf8len; + } } NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString ) diff --git a/mozilla/xpcom/string/obsolete/nsString.h b/mozilla/xpcom/string/obsolete/nsString.h index 5ce0fe71a1e..26253147f63 100644 --- a/mozilla/xpcom/string/obsolete/nsString.h +++ b/mozilla/xpcom/string/obsolete/nsString.h @@ -431,24 +431,15 @@ class NS_COM NS_ConvertUCS2toUTF8 */ { public: - friend NS_COM char* ToNewUTF8String( const nsAString& aSource ); - - public: - explicit - NS_ConvertUCS2toUTF8( const PRUnichar* aString ) + explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString ); + NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ); + explicit NS_ConvertUCS2toUTF8( const nsAString& aString ) { - Append( aString, ~PRUint32(0) /* MAXINT */); + Init(aString); } - NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) - { - Append( aString, aLength ); - } - - explicit NS_ConvertUCS2toUTF8( const nsAString& aString ); - protected: - void Append( const PRUnichar* aString, PRUint32 aLength ); + void Init( const nsAString& aString ); private: // NOT TO BE IMPLEMENTED diff --git a/mozilla/xpcom/string/obsolete/nsString2.cpp b/mozilla/xpcom/string/obsolete/nsString2.cpp index c0edfa08f75..ef304455a1e 100644 --- a/mozilla/xpcom/string/obsolete/nsString2.cpp +++ b/mozilla/xpcom/string/obsolete/nsString2.cpp @@ -54,8 +54,10 @@ #include "prdtoa.h" #endif +#ifdef DEBUG static const char* kPossibleNull = "Error: possible unintended null in string"; static const char* kNullPointerError = "Error: unexpected null ptr"; +#endif static const char* kWhitespace="\b\t\r\n "; const nsBufferHandle* diff --git a/mozilla/xpcom/string/public/nsUTF8Utils.h b/mozilla/xpcom/string/public/nsUTF8Utils.h index 7d3db81d58d..a575564198a 100644 --- a/mozilla/xpcom/string/public/nsUTF8Utils.h +++ b/mozilla/xpcom/string/public/nsUTF8Utils.h @@ -54,6 +54,10 @@ class UTF8traits #define PLANE1_BASE 0x00010000 #define UCS2_REPLACEMENT_CHAR 0xfffd +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for converting + * UTF-8 to UCS2 (really UTF-16). + */ class ConvertUTF8toUCS2 { public: @@ -181,12 +185,21 @@ class ConvertUTF8toUCS2 return p - start; } + void write_terminator() + { + *mBuffer = buffer_type(0); + } + private: buffer_type* mStart; buffer_type* mBuffer; PRBool mErrorEncountered; }; +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for computing + * the length of a UTF-8 string. + */ class CalculateUTF8Length { public: @@ -242,4 +255,148 @@ class CalculateUTF8Length PRBool mErrorEncountered; }; +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for converting + * UCS2 (really UTF-16) to UTF-8. + */ +class ConvertUCS2toUTF8 + { + public: + typedef nsAString::char_type value_type; + typedef nsACString::char_type buffer_type; + + // The error handling here is more lenient than that in + // |ConvertUTF8toUCS2|, but it's that way for backwards + // compatibility. + + ConvertUCS2toUTF8( buffer_type* aBuffer ) + : mStart(aBuffer), mBuffer(aBuffer) {} + + size_t Size() const { return mBuffer - mStart; } + + PRUint32 write( const value_type* start, PRUint32 N ) + { + for (const value_type *p = start, *end = start + N; p < end; ++p ) + { + value_type c = *p; + if (! (c & 0xFF80)) // U+0000 - U+007F + { + *mBuffer++ = (char)c; + } + else if (! (c & 0xF800)) // U+0100 - U+07FF + { + *mBuffer++ = 0xC0 | (char)(c >> 6); + *mBuffer++ = 0x80 | (char)(0x003F & c); + } + else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF + { + // D800- DBFF - High Surrogate + // N = (H- D800) *400 + 10000 + ... + PRUint32 ucs4 = 0x10000 + ((0x03FF & c) << 10); + + ++p; + if (p == end) + { + NS_ERROR("Surrogate pair split between fragments"); + return N; + } + c = *p; + + if (0xDC00 == (0xFC00 & c)) + { + // DC00- DFFF - Low Surrogate + // N += ( L - DC00 ) + ucs4 |= (0x03FF & c); + + // 0001 0000-001F FFFF + *mBuffer++ = 0xF0 | (char)(ucs4 >> 18); + *mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); + *mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); + *mBuffer++ = 0x80 | (char)(0x003F & ucs4) ; + } + else + { + NS_ERROR("got a High Surrogate but no low surrogate"); + // output nothing. + } + } + else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF + { + // DC00- DFFF - Low Surrogate + NS_ERROR("got a low Surrogate but no high surrogate"); + // output nothing. + } + else // U+0800 - U+D7FF, U+E000 - U+FFFF + { + *mBuffer++ = 0xE0 | (char)(c >> 12); + *mBuffer++ = 0x80 | (char)(0x003F & (c >> 6)); + *mBuffer++ = 0x80 | (char)(0x003F & c ); + } + } + + return N; + } + + void write_terminator() + { + *mBuffer = buffer_type(0); + } + + private: + buffer_type* mStart; + buffer_type* mBuffer; + }; + +/** + * A character sink (see |copy_string| in nsAlgorithm.h) for computing + * the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8. + */ +class CalculateUTF8Size + { + public: + typedef nsAString::char_type value_type; + + CalculateUTF8Size() + : mSize(0) { } + + size_t Size() const { return mSize; } + + PRUint32 write( const value_type* start, PRUint32 N ) + { + // Assume UCS2 surrogate pairs won't be spread across fragments. + for (const value_type *p = start, *end = start + N; p < end; ++p ) + { + value_type c = *p; + if (! (c & 0xFF80)) // U+0000 - U+007F + mSize += 1; + else if (! (c & 0xF800)) // U+0100 - U+07FF + mSize += 2; + else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF + { + ++p; + if (p == end) + { + NS_ERROR("Surrogate pair split between fragments"); + return N; + } + c = *p; + + if (0xDC00 == (0xFC00 & c)) + mSize += 4; + else + NS_ERROR("got a high Surrogate but no low surrogate"); + } + else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF + NS_ERROR("got a low Surrogate but no high surrogate"); + else // U+0800 - U+D7FF, U+E000 - U+FFFF + mSize += 3; + } + + return N; + } + + private: + size_t mSize; + }; + #endif /* !defined(nsUTF8Utils_h_) */ diff --git a/mozilla/xpcom/string/src/nsReadableUtils.cpp b/mozilla/xpcom/string/src/nsReadableUtils.cpp index b9a74ec62dc..0cf14c09dd2 100755 --- a/mozilla/xpcom/string/src/nsReadableUtils.cpp +++ b/mozilla/xpcom/string/src/nsReadableUtils.cpp @@ -209,24 +209,18 @@ NS_COM char* ToNewUTF8String( const nsAString& aSource ) { - // XXX The conversion code in NS_ConvertUCS2toUTF8 needs to be - // refactored so that we can use it here without a double-copy. - NS_ConvertUCS2toUTF8 temp(aSource); + nsAString::const_iterator start, end; + CalculateUTF8Size calculator; + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + calculator); - char* result; - if (temp.GetOwnsBuffer()) { - // We allocated. Trick the string into not freeing its buffer to - // avoid an extra allocation. - result = temp.mStr; + char *result = NS_STATIC_CAST(char*, + nsMemory::Alloc(calculator.Size() + 1)); - temp.mStr=0; - temp.SetOwnsBuffer(PR_FALSE); - } - else { - // We didn't allocate a buffer, so we need to copy it out of the - // nsCAutoString's storage. - result = ToNewCString(temp); - } + ConvertUCS2toUTF8 converter(result); + copy_string(aSource.BeginReading(start), aSource.EndReading(end), + converter).write_terminator(); + NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); return result; } @@ -285,7 +279,7 @@ UTF8ToNewUnicode( const nsACString& aSource ) ConvertUTF8toUCS2 converter(result); copy_string(aSource.BeginReading(start), aSource.EndReading(end), - converter); + converter).write_terminator(); NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch"); return result;