update copy_string() to the single-fragment string world, and optimize write() to not return a value. b=282083, r+sr=jst, a=schrep

git-svn-id: svn://10.0.0.236/trunk@242333 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
dwitte%stanford.edu 2008-01-04 00:07:07 +00:00
parent 59f190655b
commit a8f5008a68
11 changed files with 123 additions and 170 deletions

View File

@ -579,7 +579,7 @@ class CopyNormalizeNewlines
return mLastCharCR;
}
PRUint32 write(const typename OutputIterator::value_type* aSource, PRUint32 aSourceLength) {
void write(const typename OutputIterator::value_type* aSource, PRUint32 aSourceLength) {
const typename OutputIterator::value_type* done_writing = aSource + aSourceLength;
@ -615,7 +615,6 @@ class CopyNormalizeNewlines
}
mWritten += num_written;
return aSourceLength;
}
private:

View File

@ -100,11 +100,11 @@ public:
typedef PRUnichar value_type;
txStringToDouble(): mState(eWhitestart), mSign(ePositive) {}
PRUint32
void
write(const input_type* aSource, PRUint32 aSourceLength)
{
if (mState == eIllegal) {
return aSourceLength;
return;
}
PRUint32 i = 0;
PRUnichar c;
@ -126,7 +126,7 @@ public:
}
else if (!XMLUtils::isWhitespace(c)) {
mState = eIllegal;
return aSourceLength;
return;
}
break;
case eDecimal:
@ -142,7 +142,7 @@ public:
}
else {
mState = eIllegal;
return aSourceLength;
return;
}
break;
case eMantissa:
@ -160,14 +160,13 @@ public:
case eWhiteend:
if (!XMLUtils::isWhitespace(c)) {
mState = eIllegal;
return aSourceLength;
return;
}
break;
default:
break;
}
}
return aSourceLength;
}
double

View File

@ -86,7 +86,7 @@ class ConvertToLowerCase
public:
typedef PRUnichar value_type;
PRUint32 write( const PRUnichar* aSource, PRUint32 aSourceLength)
void write( const PRUnichar* aSource, PRUint32 aSourceLength)
{
PRUnichar* cp = const_cast<PRUnichar*>(aSource);
const PRUnichar* end = aSource + aSourceLength;
@ -96,7 +96,6 @@ public:
*cp = ch + ('a' - 'A');
++cp;
}
return aSourceLength;
}
};
@ -120,7 +119,7 @@ public:
{
}
PRUint32 write(const PRUnichar* aSource, PRUint32 aSourceLength)
void write(const PRUnichar* aSource, PRUint32 aSourceLength)
{
PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
PRUnichar* cp = mIter.get();
@ -135,7 +134,6 @@ public:
++cp;
}
mIter.advance(len);
return len;
}
protected:

View File

@ -483,6 +483,25 @@ nsScannerSharedSubstring::MakeMutable()
* utils -- based on code from nsReadableUtils.cpp
*/
// private helper function
static inline
nsAString::iterator&
copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
{
typedef nsCharSourceTraits<nsScannerIterator> source_traits;
typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
while ( first != last )
{
PRUint32 distance = source_traits::readable_distance(first, last);
sink_traits::write(result, source_traits::read(first), distance);
NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
source_traits::advance(first, distance);
}
return result;
}
void
CopyUnicodeTo( const nsScannerIterator& aSrcStart,
const nsScannerIterator& aSrcEnd,
@ -496,7 +515,7 @@ CopyUnicodeTo( const nsScannerIterator& aSrcStart,
aDest.BeginWriting(writer);
nsScannerIterator fromBegin(aSrcStart);
copy_string(fromBegin, aSrcEnd, writer);
copy_multifragment_string(fromBegin, aSrcEnd, writer);
}
void
@ -527,7 +546,7 @@ AppendUnicodeTo( const nsScannerIterator& aSrcStart,
aDest.BeginWriting(writer).advance(oldLength);
nsScannerIterator fromBegin(aSrcStart);
copy_string(fromBegin, aSrcEnd, writer);
copy_multifragment_string(fromBegin, aSrcEnd, writer);
}
PRBool

View File

@ -234,7 +234,7 @@ AllocConvertUTF8toUTF16(const char *arg)
return NULL;
ConvertUTF8toUTF16 convert(s);
len = convert.write(arg, len);
convert.write(arg, len);
s[len] = '\0';
return s;
}

View File

@ -22,7 +22,7 @@ AllocConvertUTF16toUTF8(const WCHAR *arg)
return NULL;
ConvertUTF16toUTF8 convert(s);
len = convert.write(arg, len);
convert.write(arg, len);
s[len] = '\0';
return s;
}

View File

@ -85,46 +85,12 @@ NS_COUNT( InputIterator& first, const InputIterator& last, const T& value )
template <class InputIterator, class OutputIterator>
inline
OutputIterator&
copy_string( InputIterator& first, const InputIterator& last, OutputIterator& result )
copy_string( const InputIterator& first, const InputIterator& last, OutputIterator& result )
{
typedef nsCharSourceTraits<InputIterator> source_traits;
typedef nsCharSinkTraits<OutputIterator> sink_traits;
while ( first != last )
{
PRInt32 count_copied = PRInt32(sink_traits::write(result, source_traits::read(first), source_traits::readable_distance(first, last)));
NS_ASSERTION(count_copied > 0, "|copy_string| will never terminate");
source_traits::advance(first, count_copied);
}
return result;
}
template <class InputIterator, class OutputIterator>
OutputIterator&
copy_string_backward( const InputIterator& first, InputIterator& last, OutputIterator& result )
{
while ( first != last )
{
last.normalize_backward();
result.normalize_backward();
PRUint32 lengthToCopy = PRUint32( NS_MIN(last.size_backward(), result.size_backward()) );
if ( first.fragment().mStart == last.fragment().mStart )
lengthToCopy = NS_MIN(lengthToCopy, PRUint32(last.get() - first.get()));
NS_ASSERTION(lengthToCopy, "|copy_string_backward| will never terminate");
#ifdef _MSC_VER
// XXX Visual C++ can't stomach 'typename' where it rightfully should
nsCharTraits<OutputIterator::value_type>::move(result.get()-lengthToCopy, last.get()-lengthToCopy, lengthToCopy);
#else
nsCharTraits<typename OutputIterator::value_type>::move(result.get()-lengthToCopy, last.get()-lengthToCopy, lengthToCopy);
#endif
last.advance( -PRInt32(lengthToCopy) );
result.advance( -PRInt32(lengthToCopy) );
}
sink_traits::write(result, source_traits::read(first), source_traits::readable_distance(first, last));
return result;
}

View File

@ -798,10 +798,10 @@ template <class OutputIterator>
struct nsCharSinkTraits
{
static
PRUint32
void
write( OutputIterator& iter, const typename OutputIterator::value_type* s, PRUint32 n )
{
return iter.write(s, n);
iter.write(s, n);
}
};
@ -811,12 +811,11 @@ template <class CharT>
struct nsCharSinkTraits<CharT*>
{
static
PRUint32
void
write( CharT*& iter, const CharT* s, PRUint32 n )
{
nsCharTraits<CharT>::move(iter, s, n);
iter += n;
return n;
}
};
@ -826,12 +825,11 @@ NS_SPECIALIZE_TEMPLATE
struct nsCharSinkTraits<char*>
{
static
PRUint32
void
write( char*& iter, const char* s, PRUint32 n )
{
nsCharTraits<char>::move(iter, s, n);
iter += n;
return n;
}
};
@ -839,12 +837,11 @@ NS_SPECIALIZE_TEMPLATE
struct nsCharSinkTraits<PRUnichar*>
{
static
PRUint32
void
write( PRUnichar*& iter, const PRUnichar* s, PRUint32 n )
{
nsCharTraits<PRUnichar>::move(iter, s, n);
iter += n;
return n;
}
};

View File

@ -326,14 +326,13 @@ class nsWritingIterator
return *this;
}
PRUint32
void
write( const value_type* s, PRUint32 n )
{
NS_ASSERTION(size_forward() > 0, "You can't |write| into an |nsWritingIterator| with no space!");
nsCharTraits<value_type>::move(mPosition, s, n);
advance( difference_type(n) );
return n;
}
};

View File

@ -467,10 +467,10 @@ class ConvertUTF8toUTF16
size_t Length() const { return mBuffer - mStart; }
PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
void NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
{
if ( mErrorEncountered )
return N;
return;
// algorithm assumes utf8 units won't
// be spread across fragments
@ -487,7 +487,7 @@ class ConvertUTF8toUTF16
{
mErrorEncountered = PR_TRUE;
mBuffer = out;
return N;
return;
}
if ( overlong )
@ -524,7 +524,6 @@ class ConvertUTF8toUTF16
}
}
mBuffer = out;
return p - start;
}
void write_terminator()
@ -551,11 +550,11 @@ class CalculateUTF8Length
size_t Length() const { return mLength; }
PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
void NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
{
// ignore any further requests
if ( mErrorEncountered )
return N;
return;
// algorithm assumes utf8 units won't
// be spread across fragments
@ -596,9 +595,7 @@ class CalculateUTF8Length
{
NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
mErrorEncountered = PR_TRUE;
return N;
}
return p - start;
}
private:
@ -626,7 +623,7 @@ class ConvertUTF16toUTF8
size_t Size() const { return mBuffer - mStart; }
PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
void NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
{
buffer_type *out = mBuffer; // gcc isn't smart enough to do this!
@ -707,7 +704,6 @@ class ConvertUTF16toUTF8
}
mBuffer = out;
return N;
}
void write_terminator()
@ -735,7 +731,7 @@ class CalculateUTF8Size
size_t Size() const { return mSize; }
PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
void NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
{
// Assume UCS2 surrogate pairs won't be spread across fragments.
for (const value_type *p = start, *end = start + N; p < end; ++p )
@ -784,8 +780,6 @@ class CalculateUTF8Size
NS_WARNING("got a low Surrogate but no high surrogate");
}
}
return N;
}
private:
@ -811,13 +805,12 @@ class LossyConvertEncoding
public:
LossyConvertEncoding( output_type* aDestination ) : mDestination(aDestination) { }
PRUint32
void
write( const input_type* aSource, PRUint32 aSourceLength )
{
const input_type* done_writing = aSource + aSourceLength;
while ( aSource < done_writing )
*mDestination++ = (output_type)(unsigned_input_type)(*aSource++); // use old-style cast to mimic old |ns[C]String| behavior
return aSourceLength;
}
void

View File

@ -498,22 +498,17 @@ IsASCII( const nsAString& aString )
// Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
nsAString::const_iterator done_reading;
nsAString::const_iterator iter, done_reading;
aString.BeginReading(iter);
aString.EndReading(done_reading);
// for each chunk of |aString|...
PRUint32 fragmentLength = 0;
nsAString::const_iterator iter;
for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
const PRUnichar* c = iter.get();
const PRUnichar* end = done_reading.get();
while ( c < end )
{
fragmentLength = PRUint32(iter.size_forward());
const PRUnichar* c = iter.get();
const PRUnichar* fragmentEnd = c + fragmentLength;
// for each character in this chunk...
while ( c < fragmentEnd )
if ( *c++ & NOT_ASCII )
return PR_FALSE;
if ( *c++ & NOT_ASCII )
return PR_FALSE;
}
return PR_TRUE;
@ -528,22 +523,17 @@ IsASCII( const nsACString& aString )
// Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
nsACString::const_iterator done_reading;
nsACString::const_iterator iter, done_reading;
aString.BeginReading(iter);
aString.EndReading(done_reading);
// for each chunk of |aString|...
PRUint32 fragmentLength = 0;
nsACString::const_iterator iter;
for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
const char* c = iter.get();
const char* end = done_reading.get();
while ( c < end )
{
fragmentLength = PRUint32(iter.size_forward());
const char* c = iter.get();
const char* fragmentEnd = c + fragmentLength;
// for each character in this chunk...
while ( c < fragmentEnd )
if ( *c++ & NOT_ASCII )
return PR_FALSE;
if ( *c++ & NOT_ASCII )
return PR_FALSE;
}
return PR_TRUE;
@ -563,85 +553,78 @@ IsUTF8( const nsACString& aString )
PRUint16 olupper = 0; // overlong byte upper bound.
PRUint16 slower = 0; // surrogate byte lower bound.
// for each chunk of |aString|...
PRUint32 fragmentLength = 0;
nsReadingIterator<char> iter;
aString.BeginReading(iter);
for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
const char* ptr = iter.get();
const char* end = done_reading.get();
while ( ptr < end )
{
fragmentLength = PRUint32(iter.size_forward());
const char* ptr = iter.get();
const char* fragmentEnd = ptr + fragmentLength;
// for each character in this chunk...
while ( ptr < fragmentEnd )
PRUint8 c;
if (0 == state)
{
PRUint8 c;
if (0 == state)
c = *ptr++;
if ( UTF8traits::isASCII(c) )
continue;
if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
return PR_FALSE;
else if ( UTF8traits::is2byte(c) )
state = 1;
else if ( UTF8traits::is3byte(c) )
{
c = *ptr++;
if ( UTF8traits::isASCII(c) )
continue;
if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
return PR_FALSE;
else if ( UTF8traits::is2byte(c) )
state = 1;
else if ( UTF8traits::is3byte(c) )
state = 2;
if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
{
state = 2;
if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
{
overlong = PR_TRUE;
olupper = 0x9F;
}
else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
{
surrogate = PR_TRUE;
slower = 0xA0;
}
else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
nonchar = PR_TRUE;
overlong = PR_TRUE;
olupper = 0x9F;
}
else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
{
state = 3;
nonchar = PR_TRUE;
if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
{
overlong = PR_TRUE;
olupper = 0x8F;
}
else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
{
// actually not surrogates but codepoints beyond 0x10FFFF
surrogate = PR_TRUE;
slower = 0x90;
}
surrogate = PR_TRUE;
slower = 0xA0;
}
else
return PR_FALSE; // Not UTF-8 string
else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
nonchar = PR_TRUE;
}
while (ptr < fragmentEnd && state)
{
c = *ptr++;
--state;
else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
{
state = 3;
nonchar = PR_TRUE;
if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
{
overlong = PR_TRUE;
olupper = 0x8F;
}
else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
{
// actually not surrogates but codepoints beyond 0x10FFFF
surrogate = PR_TRUE;
slower = 0x90;
}
}
else
return PR_FALSE; // Not UTF-8 string
}
while ( ptr < end && state )
{
c = *ptr++;
--state;
// non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
if ( nonchar && ( !state && c < 0xBE ||
state == 1 && c != 0xBF ||
state == 2 && 0x0F != (0x0F & c) ))
nonchar = PR_FALSE;
// non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
if ( nonchar && ( !state && c < 0xBE ||
state == 1 && c != 0xBF ||
state == 2 && 0x0F != (0x0F & c) ))
nonchar = PR_FALSE;
if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
surrogate && slower <= c || nonchar && !state )
return PR_FALSE; // Not UTF-8 string
overlong = surrogate = PR_FALSE;
}
}
if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
surrogate && slower <= c || nonchar && !state )
return PR_FALSE; // Not UTF-8 string
overlong = surrogate = PR_FALSE;
}
}
return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
}