diff --git a/mozilla/string/obsolete/nsString.cpp b/mozilla/string/obsolete/nsString.cpp index dfcbc351a5b..38f5624e0de 100644 --- a/mozilla/string/obsolete/nsString.cpp +++ b/mozilla/string/obsolete/nsString.cpp @@ -652,11 +652,7 @@ nsCString* nsCString::ToNewString() const { * @return ptr to new ascii string */ char* nsCString::ToNewCString() const { - nsCString temp(*this); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - char* result=temp.mStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - return result; //and return the char* + return nsCRT::strdup(mStr); } /** @@ -666,12 +662,12 @@ char* nsCString::ToNewCString() const { * @return ptr to new ascii string */ PRUnichar* nsCString::ToNewUnicode() const { - nsString temp; - temp.AssignWithConversion(*this); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force temp to have an allocated buffer, even if this is empty. - PRUnichar* result=temp.mUStr; //steal temp's buffer - temp.mStr=0; //now clear temp's buffer to prevent deallocation - temp.mOwnsBuffer=PR_FALSE; //and return the PRUnichar* + PRUnichar* result = NS_STATIC_CAST(PRUnichar*, nsAllocator::Alloc(sizeof(PRUnichar) * (mLength + 1))); + if (result) { + CBufDescriptor desc(result, PR_TRUE, mLength + 1, 0); + nsAutoString temp(desc); + temp.AssignWithConversion(*this); + } return result; } @@ -1948,6 +1944,7 @@ NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength ) } *out = '\0'; // null terminate + mLength = utf8len; } diff --git a/mozilla/string/obsolete/nsString2.cpp b/mozilla/string/obsolete/nsString2.cpp index aed93b24b5a..af9d4aff01f 100644 --- a/mozilla/string/obsolete/nsString2.cpp +++ b/mozilla/string/obsolete/nsString2.cpp @@ -719,13 +719,13 @@ nsString* nsString::ToNewString() const { * @return ptr to new ascii string */ char* nsString::ToNewCString() const { - - nsCString temp; - temp.AssignWithConversion(GetUnicode(), Length()); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - char* result=temp.mStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - return result; //and return the char* + char* result = NS_STATIC_CAST(char*, nsAllocator::Alloc(mLength + 1)); + if (result) { + CBufDescriptor desc(result, PR_TRUE, mLength + 1, 0); + nsCAutoString temp(desc); + temp.AssignWithConversion(*this); + } + return result; } /** @@ -763,13 +763,7 @@ char* nsString::ToNewUTF8String() const { * @return ptr to new ascii string */ PRUnichar* nsString::ToNewUnicode() const { - - nsString temp(*this); //construct nsString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - PRUnichar* result=temp.mUStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - temp.mOwnsBuffer=PR_FALSE; - return result; //and return the PRUnichar* to the caller + return nsCRT::strdup(mUStr); } /** @@ -2308,6 +2302,116 @@ NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsCString& ) #endif #endif + +void +NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength ) +{ + // Handle null string by just leaving us as a brand-new + // uninitialized nsAutoString. + if (! aCString) + return; + + // Compute space required: do this once so we don't incur multiple + // allocations. This "optimization" is probably of dubious value... + const char* p; + PRUint32 count; + for (p = aCString, count = 0; *p && count < aLength; ++count) { + if ( 0 == (*p & 0x80) ) + p += 1; // ASCII + else if ( 0xC0 == (*p & 0xE0) ) + p += 2; // 2 byte UTF8 + else if ( 0xE0 == (*p & 0xF0) ) + p += 3; // 3 byte UTF8 + else if ( 0xF0 == (*p & 0xF8) ) + p += 4; // 4 byte UTF8 + else if ( 0xF8 == (*p & 0xFC) ) + p += 5; // 5 byte UTF8 + else if ( 0xFC == (*p & 0xFE) ) + p += 6; + else { + NS_ERROR("not a UTF-8 string"); + return; + } + } + + // Grow the buffer if we need to. + if ((count * sizeof(PRUnichar)) >= sizeof(mBuffer)) + SetCapacity(count + 1); + + // We'll write directly into the new string's buffer + PRUnichar* out = mUStr; + + // Convert the characters. + for (p = aCString, count = 0; *p && count < aLength; ++count) { + char c = *p++; + + if( 0 == (0x80 & c)) { // ASCII + *out++ = PRUnichar(c); + continue; + } + + PRUint32 ucs4; + PRInt32 state = 0; + + if ( 0xC0 == (0xE0 & c) ) { // 2 bytes UTF8 + ucs4 = (PRUint32(c) << 6) & 0x000007C0L; + state = 1; + } + else if ( 0xE0 == (0xF0 & c) ) { // 3 bytes UTF8 + ucs4 = (PRUint32(c) << 12) & 0x0000F000L; + state = 2; + } + else if ( 0xF0 == (0xF8 & c) ) { // 4 bytes UTF8 + ucs4 = (PRUint32(c) << 18) & 0x001F0000L; + state = 3; + } + else if ( 0xF8 == (0xFC & c) ) { // 5 bytes UTF8 + ucs4 = (PRUint32(c) << 24) & 0x03000000L; + state = 4; + } + else if ( 0xFC == (0xFE & c) ) { // 6 bytes UTF8 + ucs4 = (PRUint32(c) << 30) & 0x40000000L; + state = 5; + } + else { + NS_ERROR("not a UTF8 string"); + break; + } + + while (state--) { + c = *p++; + + if ( 0x80 == (0xC0 & c) ) { + PRInt32 shift = state * 6; + ucs4 |= (PRUint32(c) & 0x3F) << shift; + } + else { + NS_ERROR("not a UTF8 string"); + goto done; // so we minimally clean up + } + } + + if (ucs4 >= 0x00010000) { + if (ucs4 >= 0x001F0000) { + *out++ = 0xFFFD; + } + else { + ucs4 -= 0x00010000; + *out++ = 0xD800 | (0x000003FF & (ucs4 >> 10)); + *out++ = 0xDC00 | (0x000003FF & ucs4); + } + } + else { + if (0xfeff != ucs4) // ignore BOM + *out++ = ucs4; + } + } + + done: + *out = '\0'; // null terminate + mLength = count; +} + #if 0 /** * Copy construct from ascii c-string diff --git a/mozilla/string/obsolete/nsString2.h b/mozilla/string/obsolete/nsString2.h index 58bb62d3c97..227b9291029 100644 --- a/mozilla/string/obsolete/nsString2.h +++ b/mozilla/string/obsolete/nsString2.h @@ -1002,6 +1002,27 @@ NS_ConvertToString( const char* aCString, PRUint32 aLength ) } #endif + +class NS_COM NS_ConvertUTF8toUCS2 + : public nsAutoString + { + public: + NS_ConvertUTF8toUCS2( const char* aCString ) + { Init( aCString, ~PRUint32(0) /* MAXINT */ ); } + + NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength ) + { Init( aCString, aLength ); } + + NS_ConvertUTF8toUCS2( char aChar ) + { Init( &aChar, 1 ); } + + protected: + void Init( const char* aCString, PRUint32 aLength ); + + private: + NS_ConvertUTF8toUCS2( PRUnichar ); + }; + /*************************************************************** The subsumestr class is very unusual. It differs from a normal string in that it doesn't use normal diff --git a/mozilla/xpcom/ds/nsString.cpp b/mozilla/xpcom/ds/nsString.cpp index dfcbc351a5b..38f5624e0de 100644 --- a/mozilla/xpcom/ds/nsString.cpp +++ b/mozilla/xpcom/ds/nsString.cpp @@ -652,11 +652,7 @@ nsCString* nsCString::ToNewString() const { * @return ptr to new ascii string */ char* nsCString::ToNewCString() const { - nsCString temp(*this); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - char* result=temp.mStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - return result; //and return the char* + return nsCRT::strdup(mStr); } /** @@ -666,12 +662,12 @@ char* nsCString::ToNewCString() const { * @return ptr to new ascii string */ PRUnichar* nsCString::ToNewUnicode() const { - nsString temp; - temp.AssignWithConversion(*this); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force temp to have an allocated buffer, even if this is empty. - PRUnichar* result=temp.mUStr; //steal temp's buffer - temp.mStr=0; //now clear temp's buffer to prevent deallocation - temp.mOwnsBuffer=PR_FALSE; //and return the PRUnichar* + PRUnichar* result = NS_STATIC_CAST(PRUnichar*, nsAllocator::Alloc(sizeof(PRUnichar) * (mLength + 1))); + if (result) { + CBufDescriptor desc(result, PR_TRUE, mLength + 1, 0); + nsAutoString temp(desc); + temp.AssignWithConversion(*this); + } return result; } @@ -1948,6 +1944,7 @@ NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength ) } *out = '\0'; // null terminate + mLength = utf8len; } diff --git a/mozilla/xpcom/ds/nsString2.cpp b/mozilla/xpcom/ds/nsString2.cpp index aed93b24b5a..af9d4aff01f 100644 --- a/mozilla/xpcom/ds/nsString2.cpp +++ b/mozilla/xpcom/ds/nsString2.cpp @@ -719,13 +719,13 @@ nsString* nsString::ToNewString() const { * @return ptr to new ascii string */ char* nsString::ToNewCString() const { - - nsCString temp; - temp.AssignWithConversion(GetUnicode(), Length()); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - char* result=temp.mStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - return result; //and return the char* + char* result = NS_STATIC_CAST(char*, nsAllocator::Alloc(mLength + 1)); + if (result) { + CBufDescriptor desc(result, PR_TRUE, mLength + 1, 0); + nsCAutoString temp(desc); + temp.AssignWithConversion(*this); + } + return result; } /** @@ -763,13 +763,7 @@ char* nsString::ToNewUTF8String() const { * @return ptr to new ascii string */ PRUnichar* nsString::ToNewUnicode() const { - - nsString temp(*this); //construct nsString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - PRUnichar* result=temp.mUStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - temp.mOwnsBuffer=PR_FALSE; - return result; //and return the PRUnichar* to the caller + return nsCRT::strdup(mUStr); } /** @@ -2308,6 +2302,116 @@ NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsCString& ) #endif #endif + +void +NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength ) +{ + // Handle null string by just leaving us as a brand-new + // uninitialized nsAutoString. + if (! aCString) + return; + + // Compute space required: do this once so we don't incur multiple + // allocations. This "optimization" is probably of dubious value... + const char* p; + PRUint32 count; + for (p = aCString, count = 0; *p && count < aLength; ++count) { + if ( 0 == (*p & 0x80) ) + p += 1; // ASCII + else if ( 0xC0 == (*p & 0xE0) ) + p += 2; // 2 byte UTF8 + else if ( 0xE0 == (*p & 0xF0) ) + p += 3; // 3 byte UTF8 + else if ( 0xF0 == (*p & 0xF8) ) + p += 4; // 4 byte UTF8 + else if ( 0xF8 == (*p & 0xFC) ) + p += 5; // 5 byte UTF8 + else if ( 0xFC == (*p & 0xFE) ) + p += 6; + else { + NS_ERROR("not a UTF-8 string"); + return; + } + } + + // Grow the buffer if we need to. + if ((count * sizeof(PRUnichar)) >= sizeof(mBuffer)) + SetCapacity(count + 1); + + // We'll write directly into the new string's buffer + PRUnichar* out = mUStr; + + // Convert the characters. + for (p = aCString, count = 0; *p && count < aLength; ++count) { + char c = *p++; + + if( 0 == (0x80 & c)) { // ASCII + *out++ = PRUnichar(c); + continue; + } + + PRUint32 ucs4; + PRInt32 state = 0; + + if ( 0xC0 == (0xE0 & c) ) { // 2 bytes UTF8 + ucs4 = (PRUint32(c) << 6) & 0x000007C0L; + state = 1; + } + else if ( 0xE0 == (0xF0 & c) ) { // 3 bytes UTF8 + ucs4 = (PRUint32(c) << 12) & 0x0000F000L; + state = 2; + } + else if ( 0xF0 == (0xF8 & c) ) { // 4 bytes UTF8 + ucs4 = (PRUint32(c) << 18) & 0x001F0000L; + state = 3; + } + else if ( 0xF8 == (0xFC & c) ) { // 5 bytes UTF8 + ucs4 = (PRUint32(c) << 24) & 0x03000000L; + state = 4; + } + else if ( 0xFC == (0xFE & c) ) { // 6 bytes UTF8 + ucs4 = (PRUint32(c) << 30) & 0x40000000L; + state = 5; + } + else { + NS_ERROR("not a UTF8 string"); + break; + } + + while (state--) { + c = *p++; + + if ( 0x80 == (0xC0 & c) ) { + PRInt32 shift = state * 6; + ucs4 |= (PRUint32(c) & 0x3F) << shift; + } + else { + NS_ERROR("not a UTF8 string"); + goto done; // so we minimally clean up + } + } + + if (ucs4 >= 0x00010000) { + if (ucs4 >= 0x001F0000) { + *out++ = 0xFFFD; + } + else { + ucs4 -= 0x00010000; + *out++ = 0xD800 | (0x000003FF & (ucs4 >> 10)); + *out++ = 0xDC00 | (0x000003FF & ucs4); + } + } + else { + if (0xfeff != ucs4) // ignore BOM + *out++ = ucs4; + } + } + + done: + *out = '\0'; // null terminate + mLength = count; +} + #if 0 /** * Copy construct from ascii c-string diff --git a/mozilla/xpcom/ds/nsString2.h b/mozilla/xpcom/ds/nsString2.h index 58bb62d3c97..227b9291029 100644 --- a/mozilla/xpcom/ds/nsString2.h +++ b/mozilla/xpcom/ds/nsString2.h @@ -1002,6 +1002,27 @@ NS_ConvertToString( const char* aCString, PRUint32 aLength ) } #endif + +class NS_COM NS_ConvertUTF8toUCS2 + : public nsAutoString + { + public: + NS_ConvertUTF8toUCS2( const char* aCString ) + { Init( aCString, ~PRUint32(0) /* MAXINT */ ); } + + NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength ) + { Init( aCString, aLength ); } + + NS_ConvertUTF8toUCS2( char aChar ) + { Init( &aChar, 1 ); } + + protected: + void Init( const char* aCString, PRUint32 aLength ); + + private: + NS_ConvertUTF8toUCS2( PRUnichar ); + }; + /*************************************************************** The subsumestr class is very unusual. It differs from a normal string in that it doesn't use normal diff --git a/mozilla/xpcom/string/obsolete/nsString.cpp b/mozilla/xpcom/string/obsolete/nsString.cpp index dfcbc351a5b..38f5624e0de 100644 --- a/mozilla/xpcom/string/obsolete/nsString.cpp +++ b/mozilla/xpcom/string/obsolete/nsString.cpp @@ -652,11 +652,7 @@ nsCString* nsCString::ToNewString() const { * @return ptr to new ascii string */ char* nsCString::ToNewCString() const { - nsCString temp(*this); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - char* result=temp.mStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - return result; //and return the char* + return nsCRT::strdup(mStr); } /** @@ -666,12 +662,12 @@ char* nsCString::ToNewCString() const { * @return ptr to new ascii string */ PRUnichar* nsCString::ToNewUnicode() const { - nsString temp; - temp.AssignWithConversion(*this); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force temp to have an allocated buffer, even if this is empty. - PRUnichar* result=temp.mUStr; //steal temp's buffer - temp.mStr=0; //now clear temp's buffer to prevent deallocation - temp.mOwnsBuffer=PR_FALSE; //and return the PRUnichar* + PRUnichar* result = NS_STATIC_CAST(PRUnichar*, nsAllocator::Alloc(sizeof(PRUnichar) * (mLength + 1))); + if (result) { + CBufDescriptor desc(result, PR_TRUE, mLength + 1, 0); + nsAutoString temp(desc); + temp.AssignWithConversion(*this); + } return result; } @@ -1948,6 +1944,7 @@ NS_ConvertUCS2toUTF8::Init( const PRUnichar* aString, PRUint32 aLength ) } *out = '\0'; // null terminate + mLength = utf8len; } diff --git a/mozilla/xpcom/string/obsolete/nsString2.cpp b/mozilla/xpcom/string/obsolete/nsString2.cpp index aed93b24b5a..af9d4aff01f 100644 --- a/mozilla/xpcom/string/obsolete/nsString2.cpp +++ b/mozilla/xpcom/string/obsolete/nsString2.cpp @@ -719,13 +719,13 @@ nsString* nsString::ToNewString() const { * @return ptr to new ascii string */ char* nsString::ToNewCString() const { - - nsCString temp; - temp.AssignWithConversion(GetUnicode(), Length()); //construct nsCString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - char* result=temp.mStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - return result; //and return the char* + char* result = NS_STATIC_CAST(char*, nsAllocator::Alloc(mLength + 1)); + if (result) { + CBufDescriptor desc(result, PR_TRUE, mLength + 1, 0); + nsCAutoString temp(desc); + temp.AssignWithConversion(*this); + } + return result; } /** @@ -763,13 +763,7 @@ char* nsString::ToNewUTF8String() const { * @return ptr to new ascii string */ PRUnichar* nsString::ToNewUnicode() const { - - nsString temp(*this); //construct nsString with alloc on heap (which we'll steal in a moment) - temp.SetCapacity(8); //force it to have an allocated buffer, even if this is empty. - PRUnichar* result=temp.mUStr; //steal temp's buffer - temp.mStr=0; //clear temp's buffer to prevent deallocation - temp.mOwnsBuffer=PR_FALSE; - return result; //and return the PRUnichar* to the caller + return nsCRT::strdup(mUStr); } /** @@ -2308,6 +2302,116 @@ NS_ConvertASCIItoUCS2::NS_ConvertASCIItoUCS2( const nsCString& ) #endif #endif + +void +NS_ConvertUTF8toUCS2::Init( const char* aCString, PRUint32 aLength ) +{ + // Handle null string by just leaving us as a brand-new + // uninitialized nsAutoString. + if (! aCString) + return; + + // Compute space required: do this once so we don't incur multiple + // allocations. This "optimization" is probably of dubious value... + const char* p; + PRUint32 count; + for (p = aCString, count = 0; *p && count < aLength; ++count) { + if ( 0 == (*p & 0x80) ) + p += 1; // ASCII + else if ( 0xC0 == (*p & 0xE0) ) + p += 2; // 2 byte UTF8 + else if ( 0xE0 == (*p & 0xF0) ) + p += 3; // 3 byte UTF8 + else if ( 0xF0 == (*p & 0xF8) ) + p += 4; // 4 byte UTF8 + else if ( 0xF8 == (*p & 0xFC) ) + p += 5; // 5 byte UTF8 + else if ( 0xFC == (*p & 0xFE) ) + p += 6; + else { + NS_ERROR("not a UTF-8 string"); + return; + } + } + + // Grow the buffer if we need to. + if ((count * sizeof(PRUnichar)) >= sizeof(mBuffer)) + SetCapacity(count + 1); + + // We'll write directly into the new string's buffer + PRUnichar* out = mUStr; + + // Convert the characters. + for (p = aCString, count = 0; *p && count < aLength; ++count) { + char c = *p++; + + if( 0 == (0x80 & c)) { // ASCII + *out++ = PRUnichar(c); + continue; + } + + PRUint32 ucs4; + PRInt32 state = 0; + + if ( 0xC0 == (0xE0 & c) ) { // 2 bytes UTF8 + ucs4 = (PRUint32(c) << 6) & 0x000007C0L; + state = 1; + } + else if ( 0xE0 == (0xF0 & c) ) { // 3 bytes UTF8 + ucs4 = (PRUint32(c) << 12) & 0x0000F000L; + state = 2; + } + else if ( 0xF0 == (0xF8 & c) ) { // 4 bytes UTF8 + ucs4 = (PRUint32(c) << 18) & 0x001F0000L; + state = 3; + } + else if ( 0xF8 == (0xFC & c) ) { // 5 bytes UTF8 + ucs4 = (PRUint32(c) << 24) & 0x03000000L; + state = 4; + } + else if ( 0xFC == (0xFE & c) ) { // 6 bytes UTF8 + ucs4 = (PRUint32(c) << 30) & 0x40000000L; + state = 5; + } + else { + NS_ERROR("not a UTF8 string"); + break; + } + + while (state--) { + c = *p++; + + if ( 0x80 == (0xC0 & c) ) { + PRInt32 shift = state * 6; + ucs4 |= (PRUint32(c) & 0x3F) << shift; + } + else { + NS_ERROR("not a UTF8 string"); + goto done; // so we minimally clean up + } + } + + if (ucs4 >= 0x00010000) { + if (ucs4 >= 0x001F0000) { + *out++ = 0xFFFD; + } + else { + ucs4 -= 0x00010000; + *out++ = 0xD800 | (0x000003FF & (ucs4 >> 10)); + *out++ = 0xDC00 | (0x000003FF & ucs4); + } + } + else { + if (0xfeff != ucs4) // ignore BOM + *out++ = ucs4; + } + } + + done: + *out = '\0'; // null terminate + mLength = count; +} + #if 0 /** * Copy construct from ascii c-string diff --git a/mozilla/xpcom/string/obsolete/nsString2.h b/mozilla/xpcom/string/obsolete/nsString2.h index 58bb62d3c97..227b9291029 100644 --- a/mozilla/xpcom/string/obsolete/nsString2.h +++ b/mozilla/xpcom/string/obsolete/nsString2.h @@ -1002,6 +1002,27 @@ NS_ConvertToString( const char* aCString, PRUint32 aLength ) } #endif + +class NS_COM NS_ConvertUTF8toUCS2 + : public nsAutoString + { + public: + NS_ConvertUTF8toUCS2( const char* aCString ) + { Init( aCString, ~PRUint32(0) /* MAXINT */ ); } + + NS_ConvertUTF8toUCS2( const char* aCString, PRUint32 aLength ) + { Init( aCString, aLength ); } + + NS_ConvertUTF8toUCS2( char aChar ) + { Init( &aChar, 1 ); } + + protected: + void Init( const char* aCString, PRUint32 aLength ); + + private: + NS_ConvertUTF8toUCS2( PRUnichar ); + }; + /*************************************************************** The subsumestr class is very unusual. It differs from a normal string in that it doesn't use normal