Backing out my checkin for bug 206682 due to performance regression (slowing down NS_ConvertUCS2toUTF8).

git-svn-id: svn://10.0.0.236/trunk@142777 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
dbaron%dbaron.org 2003-05-23 00:32:54 +00:00
parent 5305218645
commit bce712ae10
8 changed files with 266 additions and 398 deletions

View File

@ -45,7 +45,6 @@
#include "nsString.h"
#include "nsReadableUtils.h"
#include "nsDebug.h"
#include "nsUTF8Utils.h"
#ifndef nsCharTraits_h___
#include "nsCharTraits.h"
@ -1087,49 +1086,113 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P
//----------------------------------------------------------------------
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
{
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(nsDependentString(aString));
}
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(Substring(aString, aString + aLength));
}
void NS_ConvertUCS2toUTF8::Init( const nsAString& aString )
{
// Compute space required: do this once so we don't incur multiple
// allocations. This "optimization" is probably of dubious value...
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aString.BeginReading(start), aString.EndReading(end), calculator);
PRUint32 count = calculator.Size();
if (count) {
// Grow the buffer if we need to.
SetLength(count);
// All ready? Time to convert
ConvertUCS2toUTF8 converter(mStr);
copy_string(aString.BeginReading(start), aString.EndReading(end), converter);
mLength = converter.Size();
if (mLength != count) {
NS_ERROR("Input invalid or incorrect length was calculated");
Truncate();
}
nsAString::const_iterator start; aString.BeginReading(start);
nsAString::const_iterator end; aString.EndReading(end);
while (start != end) {
nsReadableFragment<PRUnichar> frag(start.fragment());
Append(frag.mStart, frag.mEnd - frag.mStart);
start.advance(start.size_forward());
}
}
void
NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength )
{
// Handle null string by just leaving us as a brand-new
// uninitialized nsCAutoString.
if (! aString)
return;
// Calculate how many bytes we need
const PRUnichar* p;
PRInt32 count, utf8len;
for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (! ((*p) & 0xFF80))
utf8len += 1; // 0000 0000 - 0000 007F
else if (! ((*p) & 0xF800))
utf8len += 2; // 0000 0080 - 0000 07FF
else
utf8len += 3; // 0000 0800 - 0000 FFFF
// Note: Surrogate pair needs 4 bytes, but in this calcuation
// we count it as 6 bytes. It will waste 2 bytes per surrogate pair
}
// Make sure our buffer's big enough, so we don't need to do
// multiple allocations.
if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer))
SetCapacity(mLength+utf8len+1);
// |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h),
// we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want
char* out = mStr+mLength;
PRUint32 ucs4=0;
for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (0 == ucs4)
{
if (! ((*p) & 0xFF80))
{
*out++ = (char)*p;
}
else if (! ((*p) & 0xF800))
{
*out++ = 0xC0 | (char)((*p) >> 6);
*out++ = 0x80 | (char)(0x003F & (*p));
}
else
{
if (0xD800 == (0xFC00 & (*p)))
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
}
else if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// error here. We should hit High Surrogate first
// Do not output any thing in this case
}
else
{
*out++ = 0xE0 | (char)((*p) >> 12);
*out++ = 0x80 | (char)(0x003F & (*p >> 6));
*out++ = 0x80 | (char)(0x003F & (*p) );
}
}
}
else
{
if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & (*p));
// 0001 0000-001F FFFF
*out++ = 0xF0 | (char)(ucs4 >> 18);
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*out++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
// Got a High Surrogate but no low surrogate
// output nothing.
}
ucs4 = 0;
}
}
*out = '\0'; // null terminate
mLength += utf8len;
}
NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )
{
SetCapacity(aString.Length());

View File

@ -431,15 +431,24 @@ class NS_COM NS_ConvertUCS2toUTF8
*/
{
public:
explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString );
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength );
explicit NS_ConvertUCS2toUTF8( const nsAString& aString )
friend NS_COM char* ToNewUTF8String( const nsAString& aSource );
public:
explicit
NS_ConvertUCS2toUTF8( const PRUnichar* aString )
{
Init(aString);
Append( aString, ~PRUint32(0) /* MAXINT */);
}
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
Append( aString, aLength );
}
explicit NS_ConvertUCS2toUTF8( const nsAString& aString );
protected:
void Init( const nsAString& aString );
void Append( const PRUnichar* aString, PRUint32 aLength );
private:
// NOT TO BE IMPLEMENTED

View File

@ -255,148 +255,4 @@ class CalculateUTF8Length
PRBool mErrorEncountered;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
* UCS2 (really UTF-16) to UTF-8.
*/
class ConvertUCS2toUTF8
{
public:
typedef nsAString::char_type value_type;
typedef nsACString::char_type buffer_type;
// The error handling here is more lenient than that in
// |ConvertUTF8toUCS2|, but it's that way for backwards
// compatibility.
ConvertUCS2toUTF8( buffer_type* aBuffer )
: mStart(aBuffer), mBuffer(aBuffer) {}
size_t Size() const { return mBuffer - mStart; }
PRUint32 write( const value_type* start, PRUint32 N )
{
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
{
*mBuffer++ = (char)c;
}
else if (! (c & 0xF800)) // U+0100 - U+07FF
{
*mBuffer++ = 0xC0 | (char)(c >> 6);
*mBuffer++ = 0x80 | (char)(0x003F & c);
}
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
PRUint32 ucs4 = 0x10000 + ((0x03FF & c) << 10);
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & c);
// 0001 0000-001F FFFF
*mBuffer++ = 0xF0 | (char)(ucs4 >> 18);
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
NS_ERROR("got a High Surrogate but no low surrogate");
// output nothing.
}
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
NS_ERROR("got a low Surrogate but no high surrogate");
// output nothing.
}
else // U+0800 - U+D7FF, U+E000 - U+FFFF
{
*mBuffer++ = 0xE0 | (char)(c >> 12);
*mBuffer++ = 0x80 | (char)(0x003F & (c >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & c );
}
}
return N;
}
void write_terminator()
{
*mBuffer = buffer_type(0);
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8.
*/
class CalculateUTF8Size
{
public:
typedef nsAString::char_type value_type;
CalculateUTF8Size()
: mSize(0) { }
size_t Size() const { return mSize; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// Assume UCS2 surrogate pairs won't be spread across fragments.
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
mSize += 1;
else if (! (c & 0xF800)) // U+0100 - U+07FF
mSize += 2;
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
mSize += 4;
else
NS_ERROR("got a high Surrogate but no low surrogate");
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
NS_ERROR("got a low Surrogate but no high surrogate");
else // U+0800 - U+D7FF, U+E000 - U+FFFF
mSize += 3;
}
return N;
}
private:
size_t mSize;
};
#endif /* !defined(nsUTF8Utils_h_) */

View File

@ -209,18 +209,24 @@ NS_COM
char*
ToNewUTF8String( const nsAString& aSource )
{
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
calculator);
// XXX The conversion code in NS_ConvertUCS2toUTF8 needs to be
// refactored so that we can use it here without a double-copy.
NS_ConvertUCS2toUTF8 temp(aSource);
char *result = NS_STATIC_CAST(char*,
nsMemory::Alloc(calculator.Size() + 1));
char* result;
if (temp.GetOwnsBuffer()) {
// We allocated. Trick the string into not freeing its buffer to
// avoid an extra allocation.
result = temp.mStr;
ConvertUCS2toUTF8 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter).write_terminator();
NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
temp.mStr=0;
temp.SetOwnsBuffer(PR_FALSE);
}
else {
// We didn't allocate a buffer, so we need to copy it out of the
// nsCAutoString's storage.
result = ToNewCString(temp);
}
return result;
}

View File

@ -45,7 +45,6 @@
#include "nsString.h"
#include "nsReadableUtils.h"
#include "nsDebug.h"
#include "nsUTF8Utils.h"
#ifndef nsCharTraits_h___
#include "nsCharTraits.h"
@ -1087,49 +1086,113 @@ PRBool nsCString::EqualsWithConversion(const char* aCString,PRBool aIgnoreCase,P
//----------------------------------------------------------------------
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString )
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const nsAString& aString )
{
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(nsDependentString(aString));
}
NS_ConvertUCS2toUTF8::NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
if (!aString)
// Leave us as an uninitialized nsCAutoString.
return;
Init(Substring(aString, aString + aLength));
}
void NS_ConvertUCS2toUTF8::Init( const nsAString& aString )
{
// Compute space required: do this once so we don't incur multiple
// allocations. This "optimization" is probably of dubious value...
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aString.BeginReading(start), aString.EndReading(end), calculator);
PRUint32 count = calculator.Size();
if (count) {
// Grow the buffer if we need to.
SetLength(count);
// All ready? Time to convert
ConvertUCS2toUTF8 converter(mStr);
copy_string(aString.BeginReading(start), aString.EndReading(end), converter);
mLength = converter.Size();
if (mLength != count) {
NS_ERROR("Input invalid or incorrect length was calculated");
Truncate();
}
nsAString::const_iterator start; aString.BeginReading(start);
nsAString::const_iterator end; aString.EndReading(end);
while (start != end) {
nsReadableFragment<PRUnichar> frag(start.fragment());
Append(frag.mStart, frag.mEnd - frag.mStart);
start.advance(start.size_forward());
}
}
void
NS_ConvertUCS2toUTF8::Append( const PRUnichar* aString, PRUint32 aLength )
{
// Handle null string by just leaving us as a brand-new
// uninitialized nsCAutoString.
if (! aString)
return;
// Calculate how many bytes we need
const PRUnichar* p;
PRInt32 count, utf8len;
for (p = aString, utf8len = 0, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (! ((*p) & 0xFF80))
utf8len += 1; // 0000 0000 - 0000 007F
else if (! ((*p) & 0xF800))
utf8len += 2; // 0000 0080 - 0000 07FF
else
utf8len += 3; // 0000 0800 - 0000 FFFF
// Note: Surrogate pair needs 4 bytes, but in this calcuation
// we count it as 6 bytes. It will waste 2 bytes per surrogate pair
}
// Make sure our buffer's big enough, so we don't need to do
// multiple allocations.
if(mLength+PRUint32(utf8len+1) > sizeof(mBuffer))
SetCapacity(mLength+utf8len+1);
// |SetCapacity| normally doesn't guarantee the use we are putting it to here (see its interface comment in nsAString.h),
// we can only use it since our local implementation, |nsCString::SetCapacity|, is known to do what we want
char* out = mStr+mLength;
PRUint32 ucs4=0;
for (p = aString, count = aLength; 0 != count && 0 != (*p); count--, p++)
{
if (0 == ucs4)
{
if (! ((*p) & 0xFF80))
{
*out++ = (char)*p;
}
else if (! ((*p) & 0xF800))
{
*out++ = 0xC0 | (char)((*p) >> 6);
*out++ = 0x80 | (char)(0x003F & (*p));
}
else
{
if (0xD800 == (0xFC00 & (*p)))
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
ucs4 = 0x10000 | ((0x03FF & (*p)) << 10);
}
else if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// error here. We should hit High Surrogate first
// Do not output any thing in this case
}
else
{
*out++ = 0xE0 | (char)((*p) >> 12);
*out++ = 0x80 | (char)(0x003F & (*p >> 6));
*out++ = 0x80 | (char)(0x003F & (*p) );
}
}
}
else
{
if (0xDC00 == (0xFC00 & (*p)))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & (*p));
// 0001 0000-001F FFFF
*out++ = 0xF0 | (char)(ucs4 >> 18);
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*out++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
// Got a High Surrogate but no low surrogate
// output nothing.
}
ucs4 = 0;
}
}
*out = '\0'; // null terminate
mLength += utf8len;
}
NS_LossyConvertUCS2toASCII::NS_LossyConvertUCS2toASCII( const nsAString& aString )
{
SetCapacity(aString.Length());

View File

@ -431,15 +431,24 @@ class NS_COM NS_ConvertUCS2toUTF8
*/
{
public:
explicit NS_ConvertUCS2toUTF8( const PRUnichar* aString );
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength );
explicit NS_ConvertUCS2toUTF8( const nsAString& aString )
friend NS_COM char* ToNewUTF8String( const nsAString& aSource );
public:
explicit
NS_ConvertUCS2toUTF8( const PRUnichar* aString )
{
Init(aString);
Append( aString, ~PRUint32(0) /* MAXINT */);
}
NS_ConvertUCS2toUTF8( const PRUnichar* aString, PRUint32 aLength )
{
Append( aString, aLength );
}
explicit NS_ConvertUCS2toUTF8( const nsAString& aString );
protected:
void Init( const nsAString& aString );
void Append( const PRUnichar* aString, PRUint32 aLength );
private:
// NOT TO BE IMPLEMENTED

View File

@ -255,148 +255,4 @@ class CalculateUTF8Length
PRBool mErrorEncountered;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for converting
* UCS2 (really UTF-16) to UTF-8.
*/
class ConvertUCS2toUTF8
{
public:
typedef nsAString::char_type value_type;
typedef nsACString::char_type buffer_type;
// The error handling here is more lenient than that in
// |ConvertUTF8toUCS2|, but it's that way for backwards
// compatibility.
ConvertUCS2toUTF8( buffer_type* aBuffer )
: mStart(aBuffer), mBuffer(aBuffer) {}
size_t Size() const { return mBuffer - mStart; }
PRUint32 write( const value_type* start, PRUint32 N )
{
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
{
*mBuffer++ = (char)c;
}
else if (! (c & 0xF800)) // U+0100 - U+07FF
{
*mBuffer++ = 0xC0 | (char)(c >> 6);
*mBuffer++ = 0x80 | (char)(0x003F & c);
}
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
// D800- DBFF - High Surrogate
// N = (H- D800) *400 + 10000 + ...
PRUint32 ucs4 = 0x10000 + ((0x03FF & c) << 10);
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
{
// DC00- DFFF - Low Surrogate
// N += ( L - DC00 )
ucs4 |= (0x03FF & c);
// 0001 0000-001F FFFF
*mBuffer++ = 0xF0 | (char)(ucs4 >> 18);
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
*mBuffer++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & ucs4) ;
}
else
{
NS_ERROR("got a High Surrogate but no low surrogate");
// output nothing.
}
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
NS_ERROR("got a low Surrogate but no high surrogate");
// output nothing.
}
else // U+0800 - U+D7FF, U+E000 - U+FFFF
{
*mBuffer++ = 0xE0 | (char)(c >> 12);
*mBuffer++ = 0x80 | (char)(0x003F & (c >> 6));
*mBuffer++ = 0x80 | (char)(0x003F & c );
}
}
return N;
}
void write_terminator()
{
*mBuffer = buffer_type(0);
}
private:
buffer_type* mStart;
buffer_type* mBuffer;
};
/**
* A character sink (see |copy_string| in nsAlgorithm.h) for computing
* the number of bytes a UCS2 (really UTF-16) would occupy in UTF-8.
*/
class CalculateUTF8Size
{
public:
typedef nsAString::char_type value_type;
CalculateUTF8Size()
: mSize(0) { }
size_t Size() const { return mSize; }
PRUint32 write( const value_type* start, PRUint32 N )
{
// Assume UCS2 surrogate pairs won't be spread across fragments.
for (const value_type *p = start, *end = start + N; p < end; ++p )
{
value_type c = *p;
if (! (c & 0xFF80)) // U+0000 - U+007F
mSize += 1;
else if (! (c & 0xF800)) // U+0100 - U+07FF
mSize += 2;
else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
{
++p;
if (p == end)
{
NS_ERROR("Surrogate pair split between fragments");
return N;
}
c = *p;
if (0xDC00 == (0xFC00 & c))
mSize += 4;
else
NS_ERROR("got a high Surrogate but no low surrogate");
}
else if (0xDC00 == (0xFC00 & c)) // U+DC00 - U+DFFF
NS_ERROR("got a low Surrogate but no high surrogate");
else // U+0800 - U+D7FF, U+E000 - U+FFFF
mSize += 3;
}
return N;
}
private:
size_t mSize;
};
#endif /* !defined(nsUTF8Utils_h_) */

View File

@ -209,18 +209,24 @@ NS_COM
char*
ToNewUTF8String( const nsAString& aSource )
{
nsAString::const_iterator start, end;
CalculateUTF8Size calculator;
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
calculator);
// XXX The conversion code in NS_ConvertUCS2toUTF8 needs to be
// refactored so that we can use it here without a double-copy.
NS_ConvertUCS2toUTF8 temp(aSource);
char *result = NS_STATIC_CAST(char*,
nsMemory::Alloc(calculator.Size() + 1));
char* result;
if (temp.GetOwnsBuffer()) {
// We allocated. Trick the string into not freeing its buffer to
// avoid an extra allocation.
result = temp.mStr;
ConvertUCS2toUTF8 converter(result);
copy_string(aSource.BeginReading(start), aSource.EndReading(end),
converter).write_terminator();
NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
temp.mStr=0;
temp.SetOwnsBuffer(PR_FALSE);
}
else {
// We didn't allocate a buffer, so we need to copy it out of the
// nsCAutoString's storage.
result = ToNewCString(temp);
}
return result;
}