474 lines
13 KiB
C++
474 lines
13 KiB
C++
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is [Open Source Virtual Machine.].
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Adobe System Incorporated.
|
|
* Portions created by the Initial Developer are Copyright (C) 2004-2006
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Adobe AS3 Team
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#ifndef __avmplus_String__
|
|
#define __avmplus_String__
|
|
|
|
|
|
namespace avmplus
|
|
{
|
|
#ifdef DEBUGGER
|
|
class GCHashtableScriptObject;
|
|
#endif
|
|
/**
|
|
* A string in UTF-8 encoding.
|
|
*
|
|
* UTF8String's are immutable and garbage collected, which makes
|
|
* it easy to pass them around.
|
|
*/
|
|
class UTF8String : public MMgc::GCObject
|
|
{
|
|
public:
|
|
/**
|
|
* Constructs a UTF8String. This should not be called
|
|
* directly; use the toUTF8String method of String.
|
|
*/
|
|
UTF8String(int _length) { m_length = _length; }
|
|
|
|
/**
|
|
* Operator overload; returns a pointer to the
|
|
* null-terminated string.
|
|
*/
|
|
operator const char* () const { return m_buffer; }
|
|
|
|
/**
|
|
* Returns a pointer to the null-terminated string.
|
|
*/
|
|
const char *c_str() const { return m_buffer; }
|
|
|
|
/**
|
|
* Returns the length of the string in bytes,
|
|
* excluding the null terminator.
|
|
*/
|
|
int length() const { return m_length; }
|
|
|
|
/**
|
|
* This is an advanced method which returns a non-const
|
|
* pointer to the UTF8String's internal buffer. This
|
|
* can be used to mutate a string that is known to not
|
|
* have any other references. Use with caution.
|
|
*/
|
|
char *lockBuffer() { return m_buffer; }
|
|
|
|
/**
|
|
* Unlocks the buffer previously returned by lockBuffer.
|
|
* Currently a no-op, but may change in the future,
|
|
* so call after using lockBuffer.
|
|
*/
|
|
void unlockBuffer() {}
|
|
|
|
private:
|
|
int m_length;
|
|
char m_buffer[1];
|
|
};
|
|
|
|
/**
|
|
* A string in UTF-16 encoding. This is the basic string
|
|
* class used by AVM+ code.
|
|
*/
|
|
class String : public AvmPlusScriptableObject
|
|
{
|
|
public:
|
|
String(const wchar *str, int len); // wchar[] -> string
|
|
String(const char *str, int utf8len, int utf16len); // utf8->string
|
|
String(Stringp s1, Stringp s0); // concat
|
|
String(Stringp s, int pos, int len);// substr
|
|
String(int len); // preallocated empty
|
|
|
|
~String()
|
|
{
|
|
#ifdef MMGC_DRC
|
|
setBuf(NULL);
|
|
setPrefixOrOffsetOrNumber(0);
|
|
m_length = 0;
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Converts this string to a UTF-8 string. Allocates
|
|
* a new UTF8 string object containing the result,
|
|
* and returns it.
|
|
*/
|
|
UTF8String* toUTF8String();
|
|
|
|
/**
|
|
* Returns the Atom equivalent of this String. This is
|
|
* done by or'ing the proper type bits into the pointer.
|
|
*/
|
|
Atom atom() const { return AtomConstants::kStringType | (Atom)this; }
|
|
|
|
/**
|
|
* virtual version of atom():
|
|
*/
|
|
virtual Atom toAtom() const { return atom(); }
|
|
|
|
/**
|
|
* Returns the length of the string in characters.
|
|
* The null terminator is not included.
|
|
*/
|
|
int length() const {
|
|
return m_length & 0x7FFFFFFF;
|
|
}
|
|
|
|
/**
|
|
* Operator overload; returns a pointer to the
|
|
* null-terminated string.
|
|
*/
|
|
operator const wchar* ()
|
|
{
|
|
// For offset too since our string needs to be null terminated
|
|
if (needsNormalization()) normalize();
|
|
return getData();
|
|
}
|
|
|
|
/**
|
|
* Returns a pointer to the null-terminated string.
|
|
*/
|
|
const wchar* c_str()
|
|
{
|
|
// For offset too since our string needs to be null terminated
|
|
if (needsNormalization()) normalize();
|
|
return getData();
|
|
}
|
|
|
|
/**
|
|
* Returns the index'th character of the string.
|
|
* @param index zero-based index into the string
|
|
*/
|
|
wchar operator[] (int index);
|
|
|
|
/**
|
|
* This is an advanced method which returns a non-const
|
|
* pointer to the String's internal buffer. This
|
|
* can be used to mutate a string that is known to not
|
|
* have any other references. Use with caution.
|
|
*/
|
|
wchar* lockBuffer()
|
|
{
|
|
// For offset too since our string needs to be null terminated
|
|
if (needsNormalization()) normalize();
|
|
return (wchar*) getData();
|
|
}
|
|
|
|
/**
|
|
* Unlocks the buffer previously returned by lockBuffer.
|
|
* Must call after using lockBuffer to mutate the buffer.
|
|
*/
|
|
void unlockBuffer(int newLen)
|
|
{
|
|
AvmAssert(!isInterned());
|
|
m_length = newLen;
|
|
}
|
|
void unlockBuffer() {}
|
|
|
|
/**
|
|
* Returns a new string object which is a copy of this
|
|
* string object, with all characters in the string
|
|
* converted to uppercase.
|
|
*
|
|
* Unicode character classes for uppercase and lowercase
|
|
* are used. The conversion behavior is compliant with
|
|
* the String.toUpperCase method.
|
|
*/
|
|
Stringp toUpperCase();
|
|
|
|
/**
|
|
* Returns a new string object which is a copy of this
|
|
* string object, with all characters in the string
|
|
* converted to lowercase.
|
|
*
|
|
* Unicode character classes for uppercase and lowercase
|
|
* are used. The conversion behavior is compliant with
|
|
* the String.toLowerCase method.
|
|
*/
|
|
Stringp toLowerCase();
|
|
|
|
/*@{*/
|
|
/**
|
|
* Compare the String with toCompare.
|
|
* @return = 0 if the strings are identical.
|
|
* < 0 if this string is less than toCompare
|
|
* > 0 if this string is greater than toCompare
|
|
*/
|
|
int Compare(String& toCompare)
|
|
{
|
|
if (hasPrefix()) normalize();
|
|
if (toCompare.hasPrefix()) toCompare.normalize();
|
|
|
|
return String::Compare(getData() + getOffset(), length(), toCompare.getData() + toCompare.getOffset(), toCompare.length());
|
|
}
|
|
|
|
/*@{*/
|
|
/**
|
|
* Does String contain wchar?
|
|
* @return = 0 if the strings are identical.
|
|
* < 0 if this string is less than toCompare
|
|
* > 0 if this string is greater than toCompare
|
|
*/
|
|
bool Contains(wchar c);
|
|
|
|
// compare this string to (other,len)
|
|
bool Equals(const wchar *toCompare, int len)
|
|
{
|
|
AvmAssert(toCompare[len]==0);
|
|
int sLen = length();
|
|
if (len != sLen) return false;
|
|
if (hasPrefix()) normalize();
|
|
return String::Compare(getData() + getOffset(), sLen, toCompare, len)==0;
|
|
}
|
|
|
|
// toCompare is not necessarily zero-terminated at toCompare[len]
|
|
bool FastEquals(const wchar *toCompare, int len)
|
|
{
|
|
int sLen = length();
|
|
if (len != sLen) return false;
|
|
// This is only for intern strings which are never offset or prefix
|
|
AvmAssert(needsNormalization() == false);
|
|
const wchar *src = getData();
|
|
|
|
// !! could we compare two WORDS at a time? Our toCompare
|
|
// string is not necessarily DWORD aligned. (Offset strings, etc.)
|
|
|
|
while (sLen)
|
|
{
|
|
sLen--;
|
|
if (src[sLen] != toCompare[sLen])
|
|
return false;
|
|
}
|
|
|
|
AvmAssert(sLen == 0);
|
|
return true;
|
|
}
|
|
|
|
// compare this string to null-terminated 8bit string
|
|
bool Equals(const char *other8)
|
|
{
|
|
if (hasPrefix()) normalize();
|
|
return !Compare(getData() + getOffset(), other8, length());
|
|
}
|
|
|
|
/*@{*/
|
|
/**
|
|
* Compares s1 and s2.
|
|
* @return = 0 if the strings are identical.
|
|
* < 0 if s1 is less than s2
|
|
* > 0 if s1 is greater than s2
|
|
*/
|
|
static int Compare(const wchar *s1,
|
|
int len1,
|
|
const wchar *s2,
|
|
int len2);
|
|
static int Compare(const wchar *s1,
|
|
const char *s2,
|
|
int len);
|
|
/*@}*/
|
|
|
|
/*@{*/
|
|
/**
|
|
* Returns the length of str, in # of characters.
|
|
*/
|
|
static int Length(const wchar *str);
|
|
static int Length(const char *str);
|
|
/*@}*/
|
|
|
|
void setInterned(AvmCore *core)
|
|
{
|
|
m_length |= 0x80000000;
|
|
generateIntegerEquivalent (core);
|
|
}
|
|
|
|
static bool isSpace(wchar ch)
|
|
{
|
|
return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
|
|
}
|
|
|
|
bool isWhitespace();
|
|
|
|
int isInterned() const
|
|
{
|
|
return m_length & 0x80000000;
|
|
}
|
|
|
|
// handles hex, octal, base 10 integer, float, and "Infinity"/"-Infinity"
|
|
double toNumber()
|
|
{
|
|
// For offset too since convertStringToNumber expects a null terminated string
|
|
if (needsNormalization()) normalize();
|
|
return MathUtils::convertStringToNumber(getData() + getOffset(), length());
|
|
}
|
|
|
|
// native functions
|
|
int indexOf(Stringp s, int i=0);
|
|
int indexOfDouble(Stringp s, double i=0);
|
|
int lastIndexOf(Stringp s, int i=0x7fffffff);
|
|
int lastIndexOfDouble(Stringp s, double i=0x7fffffff);
|
|
Stringp charAt(int i=0);
|
|
Stringp charAtDouble(double i=0);
|
|
double charCodeAt(int i); // returns NaN for out-of-bounds
|
|
double charCodeAtDouble(double i); // returns NaN for out-of-bounds
|
|
int localeCompare(Stringp other, Atom *argv, int argc);
|
|
|
|
Stringp substring(int i_start, int i_end);
|
|
Stringp substringDouble(double d_start, double d_end);
|
|
|
|
Stringp slice(int dStart, int dEnd);
|
|
Stringp sliceDouble(double dStart, double dEnd);
|
|
|
|
Stringp substr(int dStart, int dEnd);
|
|
Stringp substrDouble(double dStart, double dEnd);
|
|
|
|
// Useful utilities used by the core code.
|
|
static wchar wCharToUpper (wchar ch);
|
|
static wchar wCharToLower (wchar ch);
|
|
|
|
#ifdef DEBUGGER
|
|
uint32 size() const;
|
|
Stringp getTypeName() const;
|
|
#endif
|
|
|
|
private:
|
|
int m_length; // { interned: 1, length:31 }
|
|
class StringBuf : public MMgc::RCObject
|
|
{
|
|
public:
|
|
wchar m_buf[1];
|
|
#ifdef MMGC_DRC
|
|
~StringBuf()
|
|
{
|
|
memset(m_buf, 0, MMgc::GC::Size(this)-sizeof(MMgc::RCObject));
|
|
}
|
|
#endif
|
|
};
|
|
// no WB b/c manual WB is in setBuf, faster that way
|
|
StringBuf* m_buf;
|
|
|
|
// The low two bits control what type of value is stored in m_prefixOrOffsetOrNumber
|
|
// 0x00 nothing is stored (rest of value is 0)
|
|
// 0x01 the 29-bit numeric equivalent of this string is stored (same as kIntegerAtom format)
|
|
// 0x02 a prefix string is stored
|
|
// 0x03 a 30-bit offset is stored
|
|
// manual WB when needed
|
|
uintptr m_prefixOrOffsetOrNumber;
|
|
#define STRINGFLAGS 0x03
|
|
#define NUMBERFLAG 0x01
|
|
#define PREFIXFLAG 0x02
|
|
#define OFFSETFLAG 0x03
|
|
|
|
Stringp getPrefix() const
|
|
{
|
|
if ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == PREFIXFLAG)
|
|
return Stringp(m_prefixOrOffsetOrNumber & ~STRINGFLAGS);
|
|
else
|
|
return 0;
|
|
};
|
|
|
|
uint32 getOffset() const
|
|
{
|
|
if ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == OFFSETFLAG)
|
|
return urshift(m_prefixOrOffsetOrNumber & ~STRINGFLAGS, 2);
|
|
else
|
|
return 0;
|
|
};
|
|
|
|
bool hasPrefix() const { return ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == PREFIXFLAG); };
|
|
bool hasOffset() const { return ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == OFFSETFLAG); };
|
|
|
|
bool needsNormalization() const { return ((m_prefixOrOffsetOrNumber & STRINGFLAGS) >= 0x2); };
|
|
|
|
void normalize();
|
|
|
|
// If our string is a valid positive integer that fits in a kIntegerAtom, this
|
|
// will set our m_prefixOrOffsetOrNumber value to the int atom representation or'ed
|
|
// with NUMBERTYPE. This is only valid for non-prefix, non-offset interned strings.
|
|
void generateIntegerEquivalent(AvmCore *core);
|
|
|
|
void setPrefixOrOffsetOrNumber(uintptr value);
|
|
|
|
static const wchar lowerCaseBase[];
|
|
static const wchar upperCaseBase[];
|
|
static const wchar lowerCaseConversion[];
|
|
static const wchar upperCaseConversion[];
|
|
static const unsigned char tolower_map[];
|
|
static const unsigned char toupper_map[];
|
|
public:
|
|
// This returns a kIntegerAtom Atom
|
|
// for use in our ScriptObject HashTable implementation. If we have a valid
|
|
// integer equivalent, it will never be zero since kIntegerType tag != 0
|
|
Atom getIntAtom() const
|
|
{
|
|
if ((m_prefixOrOffsetOrNumber & STRINGFLAGS) == NUMBERFLAG)
|
|
return m_prefixOrOffsetOrNumber & ~STRINGFLAGS | kIntegerType;
|
|
else
|
|
return 0;
|
|
};
|
|
|
|
StringBuf* allocBuf(int numChars);
|
|
wchar *getData() const { return m_buf->m_buf; }
|
|
void setBuf(StringBuf *buf) { WBRC(MMgc::GC::GetGC(this), this, &m_buf, buf); }
|
|
|
|
|
|
};
|
|
|
|
// Compare helpers
|
|
inline bool operator==(String& s1, String& s2)
|
|
{
|
|
return s1.length() == s2.length() && s1.Compare(s2) == 0;
|
|
}
|
|
inline bool operator!=(String& s1, String& s2)
|
|
{
|
|
return s1.length() != s2.length() || s1.Compare(s2) != 0;
|
|
}
|
|
inline bool operator<(String& s1, String& s2)
|
|
{
|
|
return s2.Compare(s1) < 0;
|
|
}
|
|
inline bool operator>(String& s1, String& s2)
|
|
{
|
|
return s2.Compare(s1) > 0;
|
|
}
|
|
inline bool operator<=(String& s1, String& s2)
|
|
{
|
|
return s2.Compare(s1) <= 0;
|
|
}
|
|
inline bool operator>=(String& s1, String& s2)
|
|
{
|
|
return s2.Compare(s1) >= 0;
|
|
}
|
|
}
|
|
|
|
#endif /* __avmplus_String__ */
|