Mozilla/mozilla/js/tamarin/core/XMLParser16.cpp
gerv%gerv.net 0a3a70857a Bug 236613: fix formatting of MPL/LGPL/GPL tri-license.
git-svn-id: svn://10.0.0.236/trunk@220064 18797224-902f-48f8-a5cc-f745e15eee43
2007-02-13 18:05:02 +00:00

511 lines
13 KiB
C++

/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is [Open Source Virtual Machine.].
*
* The Initial Developer of the Original Code is
* Adobe System Incorporated.
* Portions created by the Initial Developer are Copyright (C) 1993-2006
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Adobe AS3 Team
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "avmplus.h"
namespace avmplus
{
wchar *stripPrefix(const wchar *str, const char *pre)
// If str begins with pre, return the first char after in str
{
if ((!str) || (!pre))
return 0;
for (;;) {
// Map to uppercase
wchar s = *str;
if ( s >= 'a' && s <= 'z' )
s -= 'a' - 'A';
unsigned char p = *pre;
if ( p >= 'a' && p <= 'z' )
p -= 'a' - 'A';
// See if the characters are not equal or we hit the end of the strings
if ( s != p || !s || !p )
break;
*pre++; *str++;
}
return *pre == 0 ? const_cast<wchar*>(str) : 0;
}
//
// XMLParser
//
// !!@ I'm not sure what this was supposed to do originally but I've rewritten it
// to remove the leading and trailing white space for text elements.
// " 5 4 3 " becomes "5 4 3"
// This is to simulate the E4X XML parser
void XMLParser::condenseWhitespace(Stringp text)
{
AvmAssert (!text->isInterned());
wchar *str = text->lockBuffer();
int len = text->length();
wchar *dst = str;
wchar *src = str;
bool leadingWhite = true;
wchar *lastChar = 0;
while (len--) {
if (String::isSpace(*src)) {
if (!leadingWhite) {
*dst++ = *src;
}
src++;
} else {
leadingWhite = false; // first non-space char, no more
lastChar = dst;
*dst++ = *src++;
}
}
if (lastChar)
lastChar[1] = 0;
*dst = 0;
text->unlockBuffer((lastChar ? (lastChar + 1) : dst)-str);
}
int XMLParser::getNext(XMLTag& tag)
{
tag.reset();
// If there's nothing left, exit.
if (!m_ptr || !*m_ptr) {
return XMLParser::kEndOfDocument;
}
// R41
// If the ignore whitespace flag is on, don't produce
// all-whitespace text nodes.
if (m_ignoreWhite) {
const wchar *ptr = m_ptr;
while (String::isSpace(*ptr)) {
ptr++;
}
if (*ptr == '<' || !*ptr) {
// If we reached the end of the document,
// or we reached a tag, skip all the
// whitesapce, because it would turn into
// an empty text node.
m_ptr = ptr;
}
// If there's nothing left, exit.
// But only do it for Flash 6 because we want
// to exactly preserve Flash 5 behavior.
if (!*m_ptr) {
return XMLParser::kEndOfDocument;
}
}
// end R41
// If it starts with <, it's an XML element.
// If it doesn't, it must be a text element.
if (*m_ptr != '<') {
// Treat it as text. Scan up to the next < or until EOF.
const wchar *start = m_ptr;
while (*m_ptr && *m_ptr != '<') {
m_ptr++;
}
tag.text = unescape(m_source, start, m_ptr-start, false);
// Condense whitespace if desired
if (m_ignoreWhite && m_condenseWhite) {
condenseWhitespace(tag.text);
}
tag.nodeType = XMLTag::kTextNodeType;
return XMLParser::kNoError;
}
// Is this a <?xml> declaration?
wchar *temp;
if ((temp = stripPrefix(m_ptr, "<?xml ")) != NULL) {
// Scan forward for "?>"
const wchar *start = m_ptr;
m_ptr = temp;
while (*m_ptr) {
if (m_ptr[0] == '?' && m_ptr[1] == '>')
{
// We have the end of the XML declaration
// !!@ changed to not return <?...?> parts
tag.text = new (core->GetGC()) String(start + 2, m_ptr - start - 2);
m_ptr += 2;
tag.nodeType = XMLTag::kXMLDeclaration;
return XMLParser::kNoError;
}
else
{
m_ptr++;
}
}
return XMLParser::kUnterminatedXMLDeclaration;
}
// Is this a <!DOCTYPE> declaration?
if ((temp = stripPrefix(m_ptr, "<!DOCTYPE")) != NULL) {
// Scan forward for '>'.
const wchar *start = m_ptr;
m_ptr = temp;
int depth = 0;
while (*m_ptr) {
if (*m_ptr == '<') {
depth++;
}
if (*m_ptr == '>') {
if (!depth) {
// We've reached the end of the DOCTYPE.
m_ptr++;
tag.text = new (core->GetGC()) String(start, m_ptr-start);
tag.nodeType = XMLTag::kDocTypeDeclaration;
return XMLParser::kNoError;
}
depth--;
}
m_ptr++;
}
return XMLParser::kUnterminatedDocTypeDeclaration;
}
// Is this a CDATA section?
wchar *cdata;
if ((cdata = stripPrefix(m_ptr, "<![CDATA[")) != NULL) {
// Scan forward for "]]>"
m_ptr = cdata;
while (*m_ptr) {
if (m_ptr[0] == ']' && m_ptr[1] == ']' && m_ptr[2] == '>') {
// We have the end of the CDATA section.
tag.text = new (core->GetGC()) String(cdata, m_ptr-cdata);
tag.nodeType = XMLTag::kCDataSection;
m_ptr += 3;
return XMLParser::kNoError;
}
m_ptr++;
}
return XMLParser::kUnterminatedCDataSection;
}
// Is this a processing instruction?
wchar *pi;
if ((pi = stripPrefix(m_ptr, "<?")) != NULL) {
// Scan forward for "?>"
m_ptr = pi;
while (*m_ptr) {
if (m_ptr[0] == '?' && m_ptr[1] == '>') {
// We have the end of the processing instruction.
tag.text = new (core->GetGC()) String(pi, m_ptr - pi);
tag.nodeType = XMLTag::kProcessingInstruction;
m_ptr += 2;
return XMLParser::kNoError;
}
m_ptr++;
}
return XMLParser::kUnterminatedProcessingInstruction;
}
// Advance past the "<"
m_ptr++;
// Is this a comment? Return a comment tag->
const wchar *comment;
if (m_ptr[0] == '!' && m_ptr[1] == '-' && m_ptr[2] == '-') {
// Skip up to '-->'.
m_ptr += 3;
comment = m_ptr;
while (*m_ptr) {
if (m_ptr[0] == '-' && m_ptr[1] == '-' && m_ptr[2] == '>')
{
tag.text = new (core->GetGC()) String(comment, m_ptr-comment);
tag.nodeType = XMLTag::kComment;
m_ptr += 3;
return XMLParser::kNoError;
}
m_ptr++;
}
// Got to the end of the buffer without finding a new tag->
return XMLParser::kUnterminatedComment;
}
// Extract the tag name. Scan up to ">" or whitespace.
const wchar *tagStart = m_ptr;
while (!String::isSpace(*m_ptr) && *m_ptr != '>') {
if (*m_ptr == '/' && *(m_ptr+1) == '>') {
// Found close of an empty element.
// Exit!
break;
}
if (!*m_ptr) {
// Premature end!
return XMLParser::kMalformedElement;
}
m_ptr++;
}
// Give up if tag name is empty
if (m_ptr == tagStart) {
return XMLParser::kMalformedElement;
}
tag.text = unescape(m_source, tagStart, m_ptr-tagStart, true);
tag.nodeType = XMLTag::kElementType;
// Extract attributes.
for (;;) {
if (!*m_ptr) {
// Premature end!
return XMLParser::kMalformedElement;
}
// Skip any whitespace.
while (String::isSpace(*m_ptr)) {
m_ptr++;
}
if (*m_ptr == '>') {
break;
}
if (*m_ptr == '/' && *(m_ptr+1) == '>') {
// Found close of an empty element.
// Exit!
tag.empty = true;
m_ptr++;
break;
}
// Extract the attribute name.
const wchar *nameStart = m_ptr;
while (!String::isSpace(*m_ptr) && *m_ptr != '=' && *m_ptr != '>') {
if (!*m_ptr) {
// Premature end!
return XMLParser::kMalformedElement;
}
m_ptr++;
}
if (m_ptr == nameStart) {
// Empty attribute name?
return XMLParser::kMalformedElement;
}
Stringp attributeName = unescape(m_source, nameStart, m_ptr-nameStart, true);
while (String::isSpace(*m_ptr)) {
m_ptr++;
}
if (*m_ptr != '=') {
// No '=' sign, no attribute value, error!
return XMLParser::kMalformedElement;
} else {
// Skip over whitespace.
while (String::isSpace(*++m_ptr))
;
const wchar *attrStart = m_ptr;
// Extract the attribute value.
if (*m_ptr != '"' && *m_ptr != '\'') {
// Error; no opening quote for attribute value.
return XMLParser::kMalformedElement;
}
wchar delimiter = *m_ptr;
// Extract up to the next quote.
attrStart++;
while (*++m_ptr != delimiter) {
if (*m_ptr == '<') {
// '<' is not permitted in an attribute value
// Changed this from kMalformedElement to kUnterminatedAttributeValue for bug 117058(105422)
return XMLParser::kUnterminatedAttributeValue;
}
if (!*m_ptr) {
// If at end of file,
// we have an unterminated attribute value on our hands.
return XMLParser::kUnterminatedAttributeValue;
}
}
const wchar *attrEnd = m_ptr;
m_ptr++;
Stringp attributeValue = unescape(m_source, attrStart, attrEnd-attrStart, false);
AvmAssert (attributeName->isInterned());
tag.attributes.add(attributeName);
tag.attributes.add(attributeValue);
}
}
// Advance past the end > of this element.
if (*m_ptr == '>') {
m_ptr++;
}
return XMLParser::kNoError;
}
Stringp XMLParser::unescape(Stringp text, const wchar *startChar, int len, bool bIntern)
{
bool bUseSubString = true;
for (int i = 0; i < len; i++)
{
if (startChar[i] == '&')
{
bUseSubString = false;
break;
}
}
if (bUseSubString)
{
if (bIntern)
{
return core->internAlloc (startChar, len);
}
else
{
MMgc::GC* gc = MMgc::GC::GetGC(text);
int start = startChar - text->c_str();
AvmAssert (start < text->length());
return new (gc) String (text, start, len);
}
}
MMgc::GC* gc = MMgc::GC::GetGC(text);
Stringp news = new (gc) String (startChar, len);
wchar *buffer = news->lockBuffer();
// Remove XML &#xx; escape entities, and &lt; &gt; &amp; &apos;
wchar *dst = buffer;
wchar *src = buffer;
while (*src) {
if (*src == '&') {
bool success = false;
// Scan forward to the ';'
wchar *endPtr = src;
while (*endPtr && *endPtr != ';') {
endPtr++;
}
if (*endPtr) {
*endPtr = 0;
int len = endPtr-src-1;
if (*(src+1) == '#') {
// Parse a &#xx; decimal sequence. Or a &#xDD hex sequence
double value = MathUtils::parseInt(src+2, len-1);
if (MathUtils::isNaN(value)) {
if (len > 2 && src[2] == 'x') {
// Handle xFF hex encoded tags, too
value = MathUtils::parseInt(src+3, len-2, 16);
}
}
if (!MathUtils::isNaN(value)) {
*dst++ = (wchar) (int) value;
success = true;
}
} else if (len <= 4) // Our xmlEntities are only 4 characters or less
{
Atom entityAtom = core->internAlloc(src+1, len)->atom();
Atom result = core->xmlEntities->get(entityAtom);
if (result != undefinedAtom) {
*dst++ = (wchar)(result>>3);
success = true;
}
}
*endPtr = ';';
}
if (success) {
// If successful, advance past the sequence
src = endPtr+1;
} else {
// Otherwise copy the sequence literally
*dst++ = *src++;
}
} else {
*dst++ = *src++;
}
}
*dst = 0;
news->unlockBuffer(dst-buffer);
return (bIntern) ? core->internString (news) : news;
}
XMLParser::XMLParser(AvmCore *core)
{
this->core = core;
if (!core->xmlEntities)
{
// Lazy creation of the XML entities table.
core->xmlEntities = new (core->GetGC()) Hashtable(core->GetGC());
const char *entities = "&amp\0\"quot\0'apos\0<lt\0>gt\0\xA0nbsp\0";
while (*entities)
{
core->xmlEntities->add(core->constant(entities+1),
(void*)core->intToAtom(*entities));
while (*entities++) {
// do nothing
}
}
}
}
void XMLParser::parse(Stringp source,
bool ignoreWhite /*=false*/ )
{
m_source = source;
m_ptr = m_source->c_str();
m_ignoreWhite = ignoreWhite;
}
bool XMLTag::nextAttribute(uint32& index,
Stringp& name,
Stringp& value)
{
if (index >= attributes.size()) {
return false;
}
name = attributes.get(index++);
value = attributes.get(index++);
return true;
}
} // namespace