Mozilla/mozilla/js/tamarin/core/StringObject.h
wsharp%adobe.com 039e5a464d bug 375561. stejohns+ review. Various fixes from Flash player codebase
git-svn-id: svn://10.0.0.236/trunk@222472 18797224-902f-48f8-a5cc-f745e15eee43
2007-03-27 18:37:45 +00:00

474 lines
13 KiB
C++

/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine.].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 2004-2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef __avmplus_String__
#define __avmplus_String__
namespace avmplus
{
#ifdef DEBUGGER
class GCHashtableScriptObject;
#endif
/**
* A string in UTF-8 encoding.
*
* UTF8String's are immutable and garbage collected, which makes
* it easy to pass them around.
*/
class UTF8String : public MMgc::GCObject
{
public:
/**
* Constructs a UTF8String. This should not be called
* directly; use the toUTF8String method of String.
*/
UTF8String(int _length) { m_length = _length; }
/**
* Operator overload; returns a pointer to the
* null-terminated string.
*/
operator const char* () const { return m_buffer; }
/**
* Returns a pointer to the null-terminated string.
*/
const char *c_str() const { return m_buffer; }
/**
* Returns the length of the string in bytes,
* excluding the null terminator.
*/
int length() const { return m_length; }
/**
* This is an advanced method which returns a non-const
* pointer to the UTF8String's internal buffer. This
* can be used to mutate a string that is known to not
* have any other references. Use with caution.
*/
char *lockBuffer() { return m_buffer; }
/**
* Unlocks the buffer previously returned by lockBuffer.
* Currently a no-op, but may change in the future,
* so call after using lockBuffer.
*/
void unlockBuffer() {}
private:
int m_length;
char m_buffer[1];
};
/**
* A string in UTF-16 encoding. This is the basic string
* class used by AVM+ code.
*/
class String : public AvmPlusScriptableObject
{
public:
String(const wchar *str, int len); // wchar[] -> string
String(const char *str, int utf8len, int utf16len); // utf8->string
String(Stringp s1, Stringp s0); // concat
String(Stringp s, int pos, int len);// substr
String(int len); // preallocated empty
~String()
{
#ifdef MMGC_DRC
setBuf(NULL);
setPrefixOrOffsetOrNumber(0);
m_length = 0;
#endif
}
/**
* Converts this string to a UTF-8 string. Allocates
* a new UTF8 string object containing the result,
* and returns it.
*/
UTF8String* toUTF8String();
/**
* Returns the Atom equivalent of this String. This is
* done by or'ing the proper type bits into the pointer.
*/
Atom atom() const { return AtomConstants::kStringType | (Atom)this; }
/**
* virtual version of atom():
*/
virtual Atom toAtom() const { return atom(); }
/**
* Returns the length of the string in characters.
* The null terminator is not included.
*/
int length() const {
return m_length & 0x7FFFFFFF;
}
/**
* Operator overload; returns a pointer to the
* null-terminated string.
*/
operator const wchar* ()
{
// For offset too since our string needs to be null terminated
if (needsNormalization()) normalize();
return getData();
}
/**
* Returns a pointer to the null-terminated string.
*/
const wchar* c_str()
{
// For offset too since our string needs to be null terminated
if (needsNormalization()) normalize();
return getData();
}
/**
* Returns the index'th character of the string.
* @param index zero-based index into the string
*/
wchar operator[] (int index);
/**
* This is an advanced method which returns a non-const
* pointer to the String's internal buffer. This
* can be used to mutate a string that is known to not
* have any other references. Use with caution.
*/
wchar* lockBuffer()
{
// For offset too since our string needs to be null terminated
if (needsNormalization()) normalize();
return (wchar*) getData();
}
/**
* Unlocks the buffer previously returned by lockBuffer.
* Must call after using lockBuffer to mutate the buffer.
*/
void unlockBuffer(int newLen)
{
AvmAssert(!isInterned());
m_length = newLen;
}
void unlockBuffer() {}
/**
* Returns a new string object which is a copy of this
* string object, with all characters in the string
* converted to uppercase.
*
* Unicode character classes for uppercase and lowercase
* are used. The conversion behavior is compliant with
* the String.toUpperCase method.
*/
Stringp toUpperCase();
/**
* Returns a new string object which is a copy of this
* string object, with all characters in the string
* converted to lowercase.
*
* Unicode character classes for uppercase and lowercase
* are used. The conversion behavior is compliant with
* the String.toLowerCase method.
*/
Stringp toLowerCase();
/*@{*/
/**
* Compare the String with toCompare.
* @return = 0 if the strings are identical.
* < 0 if this string is less than toCompare
* > 0 if this string is greater than toCompare
*/
int Compare(String& toCompare)
{
if (hasPrefix()) normalize();
if (toCompare.hasPrefix()) toCompare.normalize();
return String::Compare(getData() + getOffset(), length(), toCompare.getData() + toCompare.getOffset(), toCompare.length());
}
/*@{*/
/**
* Does String contain wchar?
* @return = 0 if the strings are identical.
* < 0 if this string is less than toCompare
* > 0 if this string is greater than toCompare
*/
bool Contains(wchar c);
// compare this string to (other,len)
bool Equals(const wchar *toCompare, int len)
{
AvmAssert(toCompare[len]==0);
int sLen = length();
if (len != sLen) return false;
if (hasPrefix()) normalize();
return String::Compare(getData() + getOffset(), sLen, toCompare, len)==0;
}
// toCompare is not necessarily zero-terminated at toCompare[len]
bool FastEquals(const wchar *toCompare, int len)
{
int sLen = length();
if (len != sLen) return false;
// This is only for intern strings which are never offset or prefix
AvmAssert(needsNormalization() == false);
const wchar *src = getData();
// !! could we compare two WORDS at a time? Our toCompare
// string is not necessarily DWORD aligned. (Offset strings, etc.)
while (sLen)
{
sLen--;
if (src[sLen] != toCompare[sLen])
return false;
}
AvmAssert(sLen == 0);
return true;
}
// compare this string to null-terminated 8bit string
bool Equals(const char *other8)
{
if (hasPrefix()) normalize();
return !Compare(getData() + getOffset(), other8, length());
}
/*@{*/
/**
* Compares s1 and s2.
* @return = 0 if the strings are identical.
* < 0 if s1 is less than s2
* > 0 if s1 is greater than s2
*/
static int Compare(const wchar *s1,
int len1,
const wchar *s2,
int len2);
static int Compare(const wchar *s1,
const char *s2,
int len);
/*@}*/
/*@{*/
/**
* Returns the length of str, in # of characters.
*/
static int Length(const wchar *str);
static int Length(const char *str);
/*@}*/
void setInterned(AvmCore *core)
{
m_length |= 0x80000000;
generateIntegerEquivalent (core);
}
static bool isSpace(wchar ch)
{
return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
}
bool isWhitespace();
int isInterned() const
{
return m_length & 0x80000000;
}
// handles hex, octal, base 10 integer, float, and "Infinity"/"-Infinity"
double toNumber()
{
// For offset too since convertStringToNumber expects a null terminated string
if (needsNormalization()) normalize();
return MathUtils::convertStringToNumber(getData() + getOffset(), length());
}
// native functions
int indexOf(Stringp s, int i=0);
int indexOfDouble(Stringp s, double i=0);
int lastIndexOf(Stringp s, int i=0x7fffffff);
int lastIndexOfDouble(Stringp s, double i=0x7fffffff);
Stringp charAt(int i=0);
Stringp charAtDouble(double i=0);
double charCodeAt(int i); // returns NaN for out-of-bounds
double charCodeAtDouble(double i); // returns NaN for out-of-bounds
int localeCompare(Stringp other, Atom *argv, int argc);
Stringp substring(int i_start, int i_end);
Stringp substringDouble(double d_start, double d_end);
Stringp slice(int dStart, int dEnd);
Stringp sliceDouble(double dStart, double dEnd);
Stringp substr(int dStart, int dEnd);
Stringp substrDouble(double dStart, double dEnd);
// Useful utilities used by the core code.
static wchar wCharToUpper (wchar ch);
static wchar wCharToLower (wchar ch);
#ifdef DEBUGGER
uint32 size() const;
Stringp getTypeName() const;
#endif
private:
int m_length; // { interned: 1, length:31 }
class StringBuf : public MMgc::RCObject
{
public:
wchar m_buf[1];
#ifdef MMGC_DRC
~StringBuf()
{
memset(m_buf, 0, MMgc::GC::Size(this)-sizeof(MMgc::RCObject));
}
#endif
};
// no WB b/c manual WB is in setBuf, faster that way
StringBuf* m_buf;
// The low two bits control what type of value is stored in m_prefixOrOffsetOrNumber
// 0x00 nothing is stored (rest of value is 0)
// 0x01 the 29-bit numeric equivalent of this string is stored (same as kIntegerAtom format)
// 0x02 a prefix string is stored
// 0x03 a 30-bit offset is stored
// manual WB when needed
uintptr m_prefixOrOffsetOrNumber;
#define STRINGFLAGS 0x03
#define NUMBERFLAG 0x01
#define PREFIXFLAG 0x02
#define OFFSETFLAG 0x03
Stringp getPrefix() const
{
if ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == PREFIXFLAG)
return Stringp(m_prefixOrOffsetOrNumber & ~STRINGFLAGS);
else
return 0;
};
uint32 getOffset() const
{
if ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == OFFSETFLAG)
return urshift(m_prefixOrOffsetOrNumber & ~STRINGFLAGS, 2);
else
return 0;
};
bool hasPrefix() const { return ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == PREFIXFLAG); };
bool hasOffset() const { return ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == OFFSETFLAG); };
bool needsNormalization() const { return ((m_prefixOrOffsetOrNumber & STRINGFLAGS) >= 0x2); };
void normalize();
// If our string is a valid positive integer that fits in a kIntegerAtom, this
// will set our m_prefixOrOffsetOrNumber value to the int atom representation or'ed
// with NUMBERTYPE. This is only valid for non-prefix, non-offset interned strings.
void generateIntegerEquivalent(AvmCore *core);
void setPrefixOrOffsetOrNumber(uintptr value);
static const wchar lowerCaseBase[];
static const wchar upperCaseBase[];
static const wchar lowerCaseConversion[];
static const wchar upperCaseConversion[];
static const unsigned char tolower_map[];
static const unsigned char toupper_map[];
public:
// This returns a kIntegerAtom Atom
// for use in our ScriptObject HashTable implementation. If we have a valid
// integer equivalent, it will never be zero since kIntegerType tag != 0
Atom getIntAtom() const
{
if ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == NUMBERFLAG)
return m_prefixOrOffsetOrNumber & ~STRINGFLAGS | kIntegerType;
else
return 0;
};
StringBuf* allocBuf(int numChars);
wchar *getData() const { return m_buf->m_buf; }
void setBuf(StringBuf *buf) { WBRC(MMgc::GC::GetGC(this), this, &m_buf, buf); }
};
// Compare helpers
inline bool operator==(String& s1, String& s2)
{
return s1.length() == s2.length() && s1.Compare(s2) == 0;
}
inline bool operator!=(String& s1, String& s2)
{
return s1.length() != s2.length() || s1.Compare(s2) != 0;
}
inline bool operator<(String& s1, String& s2)
{
return s2.Compare(s1) < 0;
}
inline bool operator>(String& s1, String& s2)
{
return s2.Compare(s1) > 0;
}
inline bool operator<=(String& s1, String& s2)
{
return s2.Compare(s1) <= 0;
}
inline bool operator>=(String& s1, String& s2)
{
return s2.Compare(s1) >= 0;
}
}
#endif /* __avmplus_String__ */