From 11ea188ef27acae0fa4fc48c58146bd6633017a7 Mon Sep 17 00:00:00 2001 From: "waterson%netscape.com" Date: Fri, 26 May 2000 22:24:34 +0000 Subject: [PATCH] Bug 40461. Implement NS_ConvertUCS2toUTF8, r=scc,brendan git-svn-id: svn://10.0.0.236/trunk@70949 18797224-902f-48f8-a5cc-f745e15eee43 --- mozilla/string/obsolete/nsString.cpp | 89 ++++++++++++++++++++- mozilla/string/obsolete/nsString.h | 28 +++++++ mozilla/string/obsolete/nsString2.cpp | 81 ++++--------------- mozilla/xpcom/ds/nsString.cpp | 89 ++++++++++++++++++++- mozilla/xpcom/ds/nsString.h | 28 +++++++ mozilla/xpcom/ds/nsString2.cpp | 81 ++++--------------- mozilla/xpcom/string/obsolete/nsString.cpp | 89 ++++++++++++++++++++- mozilla/xpcom/string/obsolete/nsString.h | 28 +++++++ mozilla/xpcom/string/obsolete/nsString2.cpp | 81 ++++--------------- 9 files changed, 390 insertions(+), 204 deletions(-) diff --git a/mozilla/string/obsolete/nsString.cpp b/mozilla/string/obsolete/nsString.cpp index 127827e8a46..524e02bc5e2 100644 --- a/mozilla/string/obsolete/nsString.cpp +++ b/mozilla/string/obsolete/nsString.cpp @@ -1,4 +1,3 @@ - /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public @@ -1862,6 +1861,94 @@ void nsCString::DebugDump(void) const { } } +//---------------------------------------------------------------------- + +NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength ) + { + // Caculate how many bytes we need + const PRUnichar* p; + PRInt32 count, utf8len; + for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++) + { + if (! ((*p) & 0xFF80)) + utf8len += 1; // 0000 0000 - 0000 007F + else if (! ((*p) & 0xF800)) + utf8len += 2; // 0000 0080 - 0000 07FF + else + utf8len += 3; // 0000 0800 - 0000 FFFF + // Note: Surrogate pair needs 4 bytes, but in this calcuation + // we count it as 6 bytes. It will waste 2 bytes per surrogate pair + } + + // Make sure our buffer's big enough, so we don't need to do + // multiple allocations. + if((utf8len+1) > sizeof(mBuffer)) + SetCapacity(utf8len+1); + + char* out = mStr; + PRUint32 ucs4=0; + + for (p = aString, utf8len=0, count = aLength; 0 != count && 0 != (*p); count--, p++) + { + if (0 == ucs4) + { + if (! ((*p) & 0xFF80)) + { + *out++ = (char)*p; + } + else if (! ((*p) & 0xF800)) + { + *out++ = 0xC0 | (char)((*p) >> 6); + *out++ = 0x80 | (char)(0x003F & (*p)); + } + else + { + if (0xD800 == (0xFC00 & (*p))) + { + // D800- DBFF - High Surrogate + // N = (H- D800) *400 + 10000 + ... + ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); + } + else if (0xDC00 == (0xFC00 & (*p))) + { + // DC00- DFFF - Low Surrogate + // error here. We should hit High Surrogate first + // Do not output any thing in this case + } + else + { + *out++ = 0xE0 | (char)((*p) >> 12); + *out++ = 0x80 | (char)(0x003F & (*p >> 6)); + *out++ = 0x80 | (char)(0x003F & (*p) ); + } + } + } + else + { + if (0xDC00 == (0xFC00 & (*p))) + { + // DC00- DFFF - Low Surrogate + // N += ( L - DC00 ) + ucs4 |= (0x03FF & (*p)); + + // 0001 0000-001F FFFF + *out++ = 0xF0 | (char)(ucs4 >> 18); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); + *out++ = 0x80 | (char)(0x003F & ucs4) ; + } + else + { + // Got a High Surrogate but no low surrogate + // output nothing. + } + ucs4 = 0; + } + } + + *out = '\0'; // null terminate + } + /*********************************************************************** IMPLEMENTATION NOTES: AUTOSTRING... diff --git a/mozilla/string/obsolete/nsString.h b/mozilla/string/obsolete/nsString.h index 1eff5732764..00c6200a2fe 100644 --- a/mozilla/string/obsolete/nsString.h +++ b/mozilla/string/obsolete/nsString.h @@ -870,6 +870,34 @@ NS_DEF_STRING_COMPARISON_OPERATORS(nsCAutoString, char) NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char) #endif +/** + * A helper class that converts a UCS2 string to UTF8 + */ +class NS_COM NS_ConvertUCS2toUTF8 + : public nsCAutoString + /* + ... + */ + { + public: + NS_ConvertUCS2toUTF8( const PRUnichar* aString ) + { Init( aString, ~PRUint32(0) /* MAXINT */); } + + NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) + { Init( aString, aLength ); } + + NS_ConvertUCS2toUTF8( PRUnichar aChar ) + { Init( &aChar, 1 ); } + + protected: + Init( const PRUnichar* aString, PRUint32 aLength ); + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUCS2toUTF8( char ); + }; + + /*************************************************************** The subsumestr class is very unusual. It differs from a normal string in that it doesn't use normal diff --git a/mozilla/string/obsolete/nsString2.cpp b/mozilla/string/obsolete/nsString2.cpp index 76e2dd67c1c..aed93b24b5a 100644 --- a/mozilla/string/obsolete/nsString2.cpp +++ b/mozilla/string/obsolete/nsString2.cpp @@ -1,4 +1,3 @@ - /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public @@ -737,75 +736,23 @@ char* nsString::ToNewCString() const { * http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html */ char* nsString::ToNewUTF8String() const { - nsCString temp(""); - temp.SetCapacity(8); //ensure that we get an allocated buffer instead of the common empty one. + NS_ConvertUCS2toUTF8 temp(mUStr); - // Caculate how many bytes we need - PRUnichar* p; - PRInt32 utf8len; - for(p = this->mUStr, utf8len=0; 0 != (*p);p++) - { - if(0x0000 == ((*p) & 0x007F)) - utf8len += 1; // 0000 0000 - 0000 007F - else if(0x0000 == ((*p) & 0x07FF)) - utf8len += 2; // 0000 0080 - 0000 07FF - else - utf8len += 3; // 0000 0800 - 0000 FFFF - // Note: Surrogate pair need 4 bytes, but in this caculation - // we count as 6 bytes. It will wast 2 bytes per surrogate pair + char* result; + if (temp.mOwnsBuffer) { + // We allocated. Trick the string into not freeing its buffer to + // avoid an extra allocation. + result = temp.mStr; + + temp.mStr=0; + temp.mOwnsBuffer = PR_FALSE; + } + else { + // We didn't allocate a buffer, so we need to copy it out of the + // nsCAutoString's storage. + result = nsCRT::strdup(temp.mStr); } - if((utf8len+1) > 8) - temp.SetCapacity(utf8len+1); - - char* result=temp.mStr; - char* out = result; - PRUint32 ucs4=0; - - for(p = this->mUStr, utf8len=0; 0 != (*p);p++) - { - if(0 == ucs4) { - if(0x0000 == ((*p) & 0xFF80)) { - *out++ = (char)*p; - } else if(0x0000 == ((*p) & 0xF800)) { - *out++ = 0xC0 | (char)((*p) >> 6); - *out++ = 0x80 | (char)(0x003F & (*p)); - } else { - if( 0xD800 == ( 0xFC00 & (*p))) - { // D800- DBFF - High Surrogate - // N = (H- D800) *400 + 10000 + ... - ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); - } else if( 0xDC00 == ( 0xFC00 & (*p))) { - // DC00- DFFF - Low Surrogate - // error here. We should hit High Surrogate first - // Do not output any thing in this case - } else { - *out++ = 0xE0 | (char)((*p) >> 12); - *out++ = 0x80 | (char)(0x003F & (*p >> 6)); - *out++ = 0x80 | (char)(0x003F & (*p) ); - } - } - } else { - if( 0xDC00 == (0xFC00 & (*p))) { - // DC00- DFFF - Low Surrogate - // N += ( L - DC00 ) - ucs4 |= (0x03FF & (*p)); - // 0001 0000-001F FFFF - *out++ = 0xF0 | (char)(ucs4 >> 18); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); - *out++ = 0x80 | (char)(0x003F & ucs4) ; - } else { - // Got a High Surrogate but no low surrogate - // output nothing. - } - ucs4 = 0; - } - } - *out = '\0'; // null terminate - temp.mStr=0; - temp.mOwnsBuffer=PR_FALSE; - return result; } diff --git a/mozilla/xpcom/ds/nsString.cpp b/mozilla/xpcom/ds/nsString.cpp index 127827e8a46..524e02bc5e2 100644 --- a/mozilla/xpcom/ds/nsString.cpp +++ b/mozilla/xpcom/ds/nsString.cpp @@ -1,4 +1,3 @@ - /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public @@ -1862,6 +1861,94 @@ void nsCString::DebugDump(void) const { } } +//---------------------------------------------------------------------- + +NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength ) + { + // Caculate how many bytes we need + const PRUnichar* p; + PRInt32 count, utf8len; + for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++) + { + if (! ((*p) & 0xFF80)) + utf8len += 1; // 0000 0000 - 0000 007F + else if (! ((*p) & 0xF800)) + utf8len += 2; // 0000 0080 - 0000 07FF + else + utf8len += 3; // 0000 0800 - 0000 FFFF + // Note: Surrogate pair needs 4 bytes, but in this calcuation + // we count it as 6 bytes. It will waste 2 bytes per surrogate pair + } + + // Make sure our buffer's big enough, so we don't need to do + // multiple allocations. + if((utf8len+1) > sizeof(mBuffer)) + SetCapacity(utf8len+1); + + char* out = mStr; + PRUint32 ucs4=0; + + for (p = aString, utf8len=0, count = aLength; 0 != count && 0 != (*p); count--, p++) + { + if (0 == ucs4) + { + if (! ((*p) & 0xFF80)) + { + *out++ = (char)*p; + } + else if (! ((*p) & 0xF800)) + { + *out++ = 0xC0 | (char)((*p) >> 6); + *out++ = 0x80 | (char)(0x003F & (*p)); + } + else + { + if (0xD800 == (0xFC00 & (*p))) + { + // D800- DBFF - High Surrogate + // N = (H- D800) *400 + 10000 + ... + ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); + } + else if (0xDC00 == (0xFC00 & (*p))) + { + // DC00- DFFF - Low Surrogate + // error here. We should hit High Surrogate first + // Do not output any thing in this case + } + else + { + *out++ = 0xE0 | (char)((*p) >> 12); + *out++ = 0x80 | (char)(0x003F & (*p >> 6)); + *out++ = 0x80 | (char)(0x003F & (*p) ); + } + } + } + else + { + if (0xDC00 == (0xFC00 & (*p))) + { + // DC00- DFFF - Low Surrogate + // N += ( L - DC00 ) + ucs4 |= (0x03FF & (*p)); + + // 0001 0000-001F FFFF + *out++ = 0xF0 | (char)(ucs4 >> 18); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); + *out++ = 0x80 | (char)(0x003F & ucs4) ; + } + else + { + // Got a High Surrogate but no low surrogate + // output nothing. + } + ucs4 = 0; + } + } + + *out = '\0'; // null terminate + } + /*********************************************************************** IMPLEMENTATION NOTES: AUTOSTRING... diff --git a/mozilla/xpcom/ds/nsString.h b/mozilla/xpcom/ds/nsString.h index 1eff5732764..00c6200a2fe 100644 --- a/mozilla/xpcom/ds/nsString.h +++ b/mozilla/xpcom/ds/nsString.h @@ -870,6 +870,34 @@ NS_DEF_STRING_COMPARISON_OPERATORS(nsCAutoString, char) NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char) #endif +/** + * A helper class that converts a UCS2 string to UTF8 + */ +class NS_COM NS_ConvertUCS2toUTF8 + : public nsCAutoString + /* + ... + */ + { + public: + NS_ConvertUCS2toUTF8( const PRUnichar* aString ) + { Init( aString, ~PRUint32(0) /* MAXINT */); } + + NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) + { Init( aString, aLength ); } + + NS_ConvertUCS2toUTF8( PRUnichar aChar ) + { Init( &aChar, 1 ); } + + protected: + Init( const PRUnichar* aString, PRUint32 aLength ); + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUCS2toUTF8( char ); + }; + + /*************************************************************** The subsumestr class is very unusual. It differs from a normal string in that it doesn't use normal diff --git a/mozilla/xpcom/ds/nsString2.cpp b/mozilla/xpcom/ds/nsString2.cpp index 76e2dd67c1c..aed93b24b5a 100644 --- a/mozilla/xpcom/ds/nsString2.cpp +++ b/mozilla/xpcom/ds/nsString2.cpp @@ -1,4 +1,3 @@ - /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public @@ -737,75 +736,23 @@ char* nsString::ToNewCString() const { * http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html */ char* nsString::ToNewUTF8String() const { - nsCString temp(""); - temp.SetCapacity(8); //ensure that we get an allocated buffer instead of the common empty one. + NS_ConvertUCS2toUTF8 temp(mUStr); - // Caculate how many bytes we need - PRUnichar* p; - PRInt32 utf8len; - for(p = this->mUStr, utf8len=0; 0 != (*p);p++) - { - if(0x0000 == ((*p) & 0x007F)) - utf8len += 1; // 0000 0000 - 0000 007F - else if(0x0000 == ((*p) & 0x07FF)) - utf8len += 2; // 0000 0080 - 0000 07FF - else - utf8len += 3; // 0000 0800 - 0000 FFFF - // Note: Surrogate pair need 4 bytes, but in this caculation - // we count as 6 bytes. It will wast 2 bytes per surrogate pair + char* result; + if (temp.mOwnsBuffer) { + // We allocated. Trick the string into not freeing its buffer to + // avoid an extra allocation. + result = temp.mStr; + + temp.mStr=0; + temp.mOwnsBuffer = PR_FALSE; + } + else { + // We didn't allocate a buffer, so we need to copy it out of the + // nsCAutoString's storage. + result = nsCRT::strdup(temp.mStr); } - if((utf8len+1) > 8) - temp.SetCapacity(utf8len+1); - - char* result=temp.mStr; - char* out = result; - PRUint32 ucs4=0; - - for(p = this->mUStr, utf8len=0; 0 != (*p);p++) - { - if(0 == ucs4) { - if(0x0000 == ((*p) & 0xFF80)) { - *out++ = (char)*p; - } else if(0x0000 == ((*p) & 0xF800)) { - *out++ = 0xC0 | (char)((*p) >> 6); - *out++ = 0x80 | (char)(0x003F & (*p)); - } else { - if( 0xD800 == ( 0xFC00 & (*p))) - { // D800- DBFF - High Surrogate - // N = (H- D800) *400 + 10000 + ... - ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); - } else if( 0xDC00 == ( 0xFC00 & (*p))) { - // DC00- DFFF - Low Surrogate - // error here. We should hit High Surrogate first - // Do not output any thing in this case - } else { - *out++ = 0xE0 | (char)((*p) >> 12); - *out++ = 0x80 | (char)(0x003F & (*p >> 6)); - *out++ = 0x80 | (char)(0x003F & (*p) ); - } - } - } else { - if( 0xDC00 == (0xFC00 & (*p))) { - // DC00- DFFF - Low Surrogate - // N += ( L - DC00 ) - ucs4 |= (0x03FF & (*p)); - // 0001 0000-001F FFFF - *out++ = 0xF0 | (char)(ucs4 >> 18); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); - *out++ = 0x80 | (char)(0x003F & ucs4) ; - } else { - // Got a High Surrogate but no low surrogate - // output nothing. - } - ucs4 = 0; - } - } - *out = '\0'; // null terminate - temp.mStr=0; - temp.mOwnsBuffer=PR_FALSE; - return result; } diff --git a/mozilla/xpcom/string/obsolete/nsString.cpp b/mozilla/xpcom/string/obsolete/nsString.cpp index 127827e8a46..524e02bc5e2 100644 --- a/mozilla/xpcom/string/obsolete/nsString.cpp +++ b/mozilla/xpcom/string/obsolete/nsString.cpp @@ -1,4 +1,3 @@ - /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public @@ -1862,6 +1861,94 @@ void nsCString::DebugDump(void) const { } } +//---------------------------------------------------------------------- + +NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength ) + { + // Caculate how many bytes we need + const PRUnichar* p; + PRInt32 count, utf8len; + for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++) + { + if (! ((*p) & 0xFF80)) + utf8len += 1; // 0000 0000 - 0000 007F + else if (! ((*p) & 0xF800)) + utf8len += 2; // 0000 0080 - 0000 07FF + else + utf8len += 3; // 0000 0800 - 0000 FFFF + // Note: Surrogate pair needs 4 bytes, but in this calcuation + // we count it as 6 bytes. It will waste 2 bytes per surrogate pair + } + + // Make sure our buffer's big enough, so we don't need to do + // multiple allocations. + if((utf8len+1) > sizeof(mBuffer)) + SetCapacity(utf8len+1); + + char* out = mStr; + PRUint32 ucs4=0; + + for (p = aString, utf8len=0, count = aLength; 0 != count && 0 != (*p); count--, p++) + { + if (0 == ucs4) + { + if (! ((*p) & 0xFF80)) + { + *out++ = (char)*p; + } + else if (! ((*p) & 0xF800)) + { + *out++ = 0xC0 | (char)((*p) >> 6); + *out++ = 0x80 | (char)(0x003F & (*p)); + } + else + { + if (0xD800 == (0xFC00 & (*p))) + { + // D800- DBFF - High Surrogate + // N = (H- D800) *400 + 10000 + ... + ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); + } + else if (0xDC00 == (0xFC00 & (*p))) + { + // DC00- DFFF - Low Surrogate + // error here. We should hit High Surrogate first + // Do not output any thing in this case + } + else + { + *out++ = 0xE0 | (char)((*p) >> 12); + *out++ = 0x80 | (char)(0x003F & (*p >> 6)); + *out++ = 0x80 | (char)(0x003F & (*p) ); + } + } + } + else + { + if (0xDC00 == (0xFC00 & (*p))) + { + // DC00- DFFF - Low Surrogate + // N += ( L - DC00 ) + ucs4 |= (0x03FF & (*p)); + + // 0001 0000-001F FFFF + *out++ = 0xF0 | (char)(ucs4 >> 18); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); + *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); + *out++ = 0x80 | (char)(0x003F & ucs4) ; + } + else + { + // Got a High Surrogate but no low surrogate + // output nothing. + } + ucs4 = 0; + } + } + + *out = '\0'; // null terminate + } + /*********************************************************************** IMPLEMENTATION NOTES: AUTOSTRING... diff --git a/mozilla/xpcom/string/obsolete/nsString.h b/mozilla/xpcom/string/obsolete/nsString.h index 1eff5732764..00c6200a2fe 100644 --- a/mozilla/xpcom/string/obsolete/nsString.h +++ b/mozilla/xpcom/string/obsolete/nsString.h @@ -870,6 +870,34 @@ NS_DEF_STRING_COMPARISON_OPERATORS(nsCAutoString, char) NS_DEF_DERIVED_STRING_OPERATOR_PLUS(nsCAutoString, char) #endif +/** + * A helper class that converts a UCS2 string to UTF8 + */ +class NS_COM NS_ConvertUCS2toUTF8 + : public nsCAutoString + /* + ... + */ + { + public: + NS_ConvertUCS2toUTF8( const PRUnichar* aString ) + { Init( aString, ~PRUint32(0) /* MAXINT */); } + + NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength ) + { Init( aString, aLength ); } + + NS_ConvertUCS2toUTF8( PRUnichar aChar ) + { Init( &aChar, 1 ); } + + protected: + Init( const PRUnichar* aString, PRUint32 aLength ); + + private: + // NOT TO BE IMPLEMENTED + NS_ConvertUCS2toUTF8( char ); + }; + + /*************************************************************** The subsumestr class is very unusual. It differs from a normal string in that it doesn't use normal diff --git a/mozilla/xpcom/string/obsolete/nsString2.cpp b/mozilla/xpcom/string/obsolete/nsString2.cpp index 76e2dd67c1c..aed93b24b5a 100644 --- a/mozilla/xpcom/string/obsolete/nsString2.cpp +++ b/mozilla/xpcom/string/obsolete/nsString2.cpp @@ -1,4 +1,3 @@ - /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public @@ -737,75 +736,23 @@ char* nsString::ToNewCString() const { * http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html */ char* nsString::ToNewUTF8String() const { - nsCString temp(""); - temp.SetCapacity(8); //ensure that we get an allocated buffer instead of the common empty one. + NS_ConvertUCS2toUTF8 temp(mUStr); - // Caculate how many bytes we need - PRUnichar* p; - PRInt32 utf8len; - for(p = this->mUStr, utf8len=0; 0 != (*p);p++) - { - if(0x0000 == ((*p) & 0x007F)) - utf8len += 1; // 0000 0000 - 0000 007F - else if(0x0000 == ((*p) & 0x07FF)) - utf8len += 2; // 0000 0080 - 0000 07FF - else - utf8len += 3; // 0000 0800 - 0000 FFFF - // Note: Surrogate pair need 4 bytes, but in this caculation - // we count as 6 bytes. It will wast 2 bytes per surrogate pair + char* result; + if (temp.mOwnsBuffer) { + // We allocated. Trick the string into not freeing its buffer to + // avoid an extra allocation. + result = temp.mStr; + + temp.mStr=0; + temp.mOwnsBuffer = PR_FALSE; + } + else { + // We didn't allocate a buffer, so we need to copy it out of the + // nsCAutoString's storage. + result = nsCRT::strdup(temp.mStr); } - if((utf8len+1) > 8) - temp.SetCapacity(utf8len+1); - - char* result=temp.mStr; - char* out = result; - PRUint32 ucs4=0; - - for(p = this->mUStr, utf8len=0; 0 != (*p);p++) - { - if(0 == ucs4) { - if(0x0000 == ((*p) & 0xFF80)) { - *out++ = (char)*p; - } else if(0x0000 == ((*p) & 0xF800)) { - *out++ = 0xC0 | (char)((*p) >> 6); - *out++ = 0x80 | (char)(0x003F & (*p)); - } else { - if( 0xD800 == ( 0xFC00 & (*p))) - { // D800- DBFF - High Surrogate - // N = (H- D800) *400 + 10000 + ... - ucs4 = 0x10000 | ((0x03FF & (*p)) << 10); - } else if( 0xDC00 == ( 0xFC00 & (*p))) { - // DC00- DFFF - Low Surrogate - // error here. We should hit High Surrogate first - // Do not output any thing in this case - } else { - *out++ = 0xE0 | (char)((*p) >> 12); - *out++ = 0x80 | (char)(0x003F & (*p >> 6)); - *out++ = 0x80 | (char)(0x003F & (*p) ); - } - } - } else { - if( 0xDC00 == (0xFC00 & (*p))) { - // DC00- DFFF - Low Surrogate - // N += ( L - DC00 ) - ucs4 |= (0x03FF & (*p)); - // 0001 0000-001F FFFF - *out++ = 0xF0 | (char)(ucs4 >> 18); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12)); - *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6)); - *out++ = 0x80 | (char)(0x003F & ucs4) ; - } else { - // Got a High Surrogate but no low surrogate - // output nothing. - } - ucs4 = 0; - } - } - *out = '\0'; // null terminate - temp.mStr=0; - temp.mOwnsBuffer=PR_FALSE; - return result; }