490 lines
14 KiB
C++
490 lines
14 KiB
C++
/*
|
|
* The contents of this file are subject to the Mozilla Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is XSL:P XSLT processor.
|
|
*
|
|
* The Initial Developer of the Original Code is Keith Visco.
|
|
* Portions created by Keith Visco (C) 1999 Keith Visco.
|
|
* All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Keith Visco, kvisco@ziplink.net
|
|
* -- original author
|
|
* Majkel Kretschmar
|
|
* -- UTF-8 changes
|
|
* Bob Miller, kbob@oblix.com
|
|
* -- plugged core leak.
|
|
*
|
|
*/
|
|
|
|
#include "printers.h"
|
|
|
|
//--------------------------------/
|
|
//- Implementation of XMLPrinter -/
|
|
//--------------------------------/
|
|
|
|
/**
|
|
* A class for printing XML nodes.
|
|
* This class was ported from XSL:P Java source
|
|
**/
|
|
|
|
/**
|
|
* The default indent size
|
|
**/
|
|
const int XMLPrinter::DEFAULT_INDENT = 2;
|
|
|
|
|
|
const String XMLPrinter::AMP_ENTITY = "&";
|
|
const String XMLPrinter::GT_ENTITY = ">";
|
|
const String XMLPrinter::LT_ENTITY = "<";
|
|
const String XMLPrinter::HEX_ENTITY = "&#";
|
|
|
|
const String XMLPrinter::CDATA_END = "]]>";
|
|
const String XMLPrinter::CDATA_START = "<![CDATA[";
|
|
const String XMLPrinter::COMMENT_START = "<!--";
|
|
const String XMLPrinter::COMMENT_END = "-->";
|
|
const String XMLPrinter::DOCTYPE_START = "<!DOCTYPE ";
|
|
const String XMLPrinter::DOCTYPE_END = ">";
|
|
const String XMLPrinter::DOUBLE_QUOTE = "\"";
|
|
const String XMLPrinter::EQUALS = "=";
|
|
const String XMLPrinter::FORWARD_SLASH = "/";
|
|
const String XMLPrinter::L_ANGLE_BRACKET = "<";
|
|
const String XMLPrinter::PI_START = "<?";
|
|
const String XMLPrinter::PI_END = "?>";
|
|
const String XMLPrinter::PUBLIC = "PUBLIC";
|
|
const String XMLPrinter::R_ANGLE_BRACKET = ">";
|
|
const String XMLPrinter::SEMICOLON = ";";
|
|
const String XMLPrinter::SPACE = " ";
|
|
const String XMLPrinter::SYSTEM = "SYSTEM";
|
|
const String XMLPrinter::XML_DECL = "xml version=";
|
|
|
|
// chars
|
|
const char XMLPrinter::AMPERSAND = '&';
|
|
const char XMLPrinter::GT = '>';
|
|
const char XMLPrinter::LT = '<';
|
|
const char XMLPrinter::DASH = '-';
|
|
const char XMLPrinter::TX_CR = '\r';
|
|
const char XMLPrinter::TX_LF = '\n';
|
|
|
|
|
|
//---------------/
|
|
//- Contructors -/
|
|
//---------------/
|
|
|
|
/**
|
|
* Default Constructor. Creates a new XMLPrinter using cout as the ostream.
|
|
**/
|
|
XMLPrinter::XMLPrinter() {
|
|
initialize(cout, DEFAULT_INDENT);
|
|
} //-- XMLPrinter
|
|
|
|
/**
|
|
* Creates a new XML Printer using the given ostream for output
|
|
* @param os the out stream to use for output
|
|
**/
|
|
XMLPrinter::XMLPrinter(ostream& os) {
|
|
initialize(os, DEFAULT_INDENT);
|
|
} //-- XMLPrinter
|
|
|
|
/**
|
|
* Creates a new XML Printer using the given ostream
|
|
* for output, and nodes are indenting using the specified
|
|
* indent size
|
|
* @param os the out stream to use for output
|
|
* @param indent the number of spaces to indent
|
|
**/
|
|
XMLPrinter::XMLPrinter (ostream& os, int indent) {
|
|
initialize(os, indent);
|
|
} //-- XMLPrinter
|
|
|
|
void XMLPrinter::initialize(ostream& os, int indentSize) {
|
|
ostreamPtr = &os;
|
|
indentChar = ' ';
|
|
version = "1.0";
|
|
entityTokens = "&<>";
|
|
setIndentSize(indentSize);
|
|
unescapeCDATA = MB_FALSE;
|
|
useEmptyElementShorthand = MB_TRUE;
|
|
useFormat = MB_FALSE;
|
|
} //-- initialize
|
|
|
|
// destructor is needed so that subclasses are destroyed.
|
|
|
|
XMLPrinter::~XMLPrinter()
|
|
{ }
|
|
|
|
/**
|
|
* Prints the given Node
|
|
* @param node the Node to print
|
|
**/
|
|
void XMLPrinter::print(Node* node) {
|
|
String currentIndent;
|
|
print(node,currentIndent);
|
|
*ostreamPtr<<flush;
|
|
} //-- print
|
|
|
|
/* -- add later
|
|
void XMLPrinter::printDoctype(DocumentType docType);
|
|
*/
|
|
|
|
/**
|
|
* Sets the indent size
|
|
* @param indent the number of spaces to indent
|
|
**/
|
|
void XMLPrinter::setIndentSize(int indentSize) {
|
|
this->indentSize = indentSize;
|
|
indent.clear();
|
|
for (int i = 0; i < indentSize; i++) {
|
|
indent.append(indentChar);
|
|
}
|
|
} //-- setIndentSize
|
|
|
|
/**
|
|
* Sets whether or not to "unwrap" CDATA Sections
|
|
* when printing. By Default CDATA Sections are left as is.
|
|
* @param unescape the boolean indicating whether or not
|
|
* to unescape CDATA Sections
|
|
**/
|
|
void XMLPrinter::setUnescapeCDATA(MBool unescape) {
|
|
unescapeCDATA = unescape;
|
|
} //-- setUnescapeCDATA
|
|
|
|
|
|
void XMLPrinter::setUseEmptyElementShorthand(MBool useShorthand) {
|
|
useEmptyElementShorthand = useShorthand;
|
|
} //-- setUseEmptyElementShorthand
|
|
|
|
/**
|
|
* Sets whether or not this XMLPrinter should add whitespace
|
|
* to pretty print the XML tree
|
|
* @param useFormat a boolean to indicate whether to allow the
|
|
* XMLPrinter to add whitespace to the XML tree. (false by default)
|
|
**/
|
|
void XMLPrinter::setUseFormat(MBool useFormat) {
|
|
this->useFormat = useFormat;
|
|
} //-- setUseFormat
|
|
|
|
//---------------------/
|
|
//- Protected Methods -/
|
|
//---------------------/
|
|
|
|
/**
|
|
* prints the given node to this XMLPrinter's Writer. If the
|
|
* useFormat flag has been set, the node will be printed with
|
|
* indentation equal to currentIndent + indentSize
|
|
* @param node the Node to print
|
|
* @param currentIndent the current indent String
|
|
* @return true, if and only if a new line was printed at
|
|
* the end of printing the given node
|
|
**/
|
|
MBool XMLPrinter::print(Node* node, String& currentIndent) {
|
|
|
|
ostream& out = *this->ostreamPtr;
|
|
|
|
//-- if (node == null) return false;
|
|
|
|
switch(node->getNodeType()) {
|
|
|
|
//-- print Document Node
|
|
case Node::DOCUMENT_NODE:
|
|
{
|
|
Document* doc = (Document*)node;
|
|
out << PI_START << XML_DECL << DOUBLE_QUOTE;
|
|
out << version;
|
|
out << DOUBLE_QUOTE << PI_END << endl;
|
|
//-- printDoctype(doc.getDoctype());
|
|
Node *node = doc->getFirstChild();
|
|
while (node) {
|
|
print(node,currentIndent);
|
|
node = node->getNextSibling();
|
|
}
|
|
break;
|
|
}
|
|
//-- print Attribute Node
|
|
case Node::ATTRIBUTE_NODE:
|
|
{
|
|
Attr* attr = (Attr*)node;
|
|
//out << attr->getName();
|
|
out << attr->getNodeName();
|
|
const String& data = attr->getNodeValue();
|
|
if (&data != &NULL_STRING) {
|
|
out << EQUALS << DOUBLE_QUOTE;
|
|
out << data;
|
|
out << DOUBLE_QUOTE;
|
|
}
|
|
break;
|
|
}
|
|
//-- print Element
|
|
case Node::ELEMENT_NODE:
|
|
{
|
|
Element* element = (Element*)node;
|
|
out << L_ANGLE_BRACKET;
|
|
out << element->getNodeName();
|
|
|
|
NamedNodeMap* attList = element->getAttributes();
|
|
if (attList) {
|
|
//-- print attribute nodes
|
|
Attr* att;
|
|
for (int i = 0; i < attList->getLength(); i++) {
|
|
att = (Attr*)attList->item(i);
|
|
const String& data = att->getValue();
|
|
//out << SPACE << * (att->getName());
|
|
out << SPACE << att->getNodeName();
|
|
if (&data != &NULL_STRING) {
|
|
out << EQUALS << DOUBLE_QUOTE;
|
|
out << data;
|
|
out << DOUBLE_QUOTE;
|
|
}
|
|
}
|
|
}
|
|
|
|
Node* child = element->getFirstChild();
|
|
if (!child && (useEmptyElementShorthand))
|
|
{
|
|
out << FORWARD_SLASH << R_ANGLE_BRACKET;
|
|
if (useFormat) {
|
|
out << endl;
|
|
return MB_TRUE;
|
|
}
|
|
}
|
|
else {
|
|
// Either children, or no shorthand
|
|
MBool newLine = MB_FALSE;
|
|
out << R_ANGLE_BRACKET;
|
|
if (useFormat && child) {
|
|
// Fix formatting of PCDATA elements by Peter Marks and
|
|
// David King Lassman
|
|
// -- add if statement to check for text node before
|
|
// adding line break
|
|
if (child->getNodeType() != Node::TEXT_NODE) {
|
|
out << endl;
|
|
newLine = MB_TRUE;
|
|
}
|
|
}
|
|
|
|
String newIndent(indent);
|
|
newIndent.append(currentIndent);
|
|
Node *lastChild = child;
|
|
while (child) {
|
|
if (useFormat && newLine) {
|
|
out << newIndent;
|
|
}
|
|
newLine = print(child,newIndent);
|
|
lastChild = child;
|
|
child = child->getNextSibling();
|
|
}
|
|
if (useFormat) {
|
|
// Fix formatting of PCDATA elements by Peter Marks and
|
|
// David King Lassman
|
|
// -- add if statement to check for text node before
|
|
// adding line break
|
|
if (lastChild) {
|
|
if (lastChild->getNodeType() != Node::TEXT_NODE) {
|
|
out << currentIndent;
|
|
}
|
|
}
|
|
}
|
|
out << L_ANGLE_BRACKET << FORWARD_SLASH;
|
|
out << element->getNodeName();
|
|
out << R_ANGLE_BRACKET;
|
|
if (useFormat) {
|
|
Node* sibling = node->getNextSibling();
|
|
if (!sibling ||
|
|
sibling->getNodeType() != Node::TEXT_NODE)
|
|
{
|
|
out<<endl;
|
|
return MB_TRUE;
|
|
}
|
|
}
|
|
} //-- end if
|
|
break;
|
|
}
|
|
case Node::TEXT_NODE:
|
|
{
|
|
const String& data = ((Text*)node)->getData();
|
|
printWithXMLEntities(data);
|
|
break;
|
|
}
|
|
case Node::CDATA_SECTION_NODE:
|
|
if (unescapeCDATA)
|
|
printWithXMLEntities( ((Text*)node)->getData() );
|
|
else {
|
|
const String& data = ((Text*)node)->getData();
|
|
out << CDATA_START;
|
|
printUTF8Chars(data);
|
|
out << CDATA_END;
|
|
}
|
|
break;
|
|
case Node::COMMENT_NODE:
|
|
out << COMMENT_START;
|
|
printComment(((CharacterData*)node)->getData());
|
|
out << COMMENT_END;
|
|
if (useFormat) {
|
|
out <<endl;
|
|
return MB_TRUE;
|
|
}
|
|
break;
|
|
case Node::ENTITY_REFERENCE_NODE:
|
|
out << AMPERSAND << node->getNodeName() << SEMICOLON;
|
|
break;
|
|
case Node::PROCESSING_INSTRUCTION_NODE:
|
|
{
|
|
ProcessingInstruction* pi = (ProcessingInstruction*)node;
|
|
out << PI_START;
|
|
out << pi->getTarget();
|
|
out << SPACE;
|
|
out << pi->getData();
|
|
out << PI_END;
|
|
if (useFormat) {
|
|
out <<endl;
|
|
return MB_TRUE;
|
|
}
|
|
break;
|
|
}
|
|
case Node::DOCUMENT_TYPE_NODE:
|
|
//--printDoctype((DocumentType*)node);
|
|
break;
|
|
default:
|
|
break;
|
|
} //-- switch
|
|
|
|
//-- no new line, so return false;
|
|
return MB_FALSE;
|
|
} //-- print
|
|
|
|
/**
|
|
* Print the proper UTF8 characters (ISO 10646)
|
|
**/
|
|
void XMLPrinter::printUTF8Char(DOM_CHAR ch) const {
|
|
ostream& out = *this->ostreamPtr;
|
|
|
|
// DOM_CHAR is 16-bits so we only need to cover up to 0xFFFF
|
|
|
|
//-- 0x0000-0x007F
|
|
if (ch < 128)
|
|
out << (char)ch;
|
|
/*
|
|
else {
|
|
out << HEX_ENTITY;
|
|
out << ch;
|
|
out << SEMICOLON;
|
|
}
|
|
*/
|
|
//-- 0x0080-0x07FF
|
|
else if (ch < 2048) {
|
|
out << (char) (192+(ch/64)); // 0xC0 + x/64
|
|
out << (char) (128+(ch%64)); // 0x80 + x%64
|
|
}
|
|
//-- 0x800-0xFFFF
|
|
else {
|
|
out << (char) (224+(ch/4096)); // 0xE0 + x/64^2
|
|
out << (char) (128+((ch/64)%64)); // 0x80 + (x/64)%64
|
|
out << (char) (128+(ch%64)); // 0x80 + x%64
|
|
}
|
|
} //-- printUTF8Char
|
|
|
|
/**
|
|
* Print the proper UTF8 characters (ISO 10646)
|
|
* based on code submitted by Majkel Kretschmar
|
|
**/
|
|
void XMLPrinter::printUTF8Chars(const String& data) {
|
|
ostream& out = *this->ostreamPtr;
|
|
|
|
int i = 0;
|
|
while(i < data.length()) {
|
|
DOM_CHAR ch = data.charAt(i++);
|
|
// DOM_CHAR is 16-bits so we only need to cover up to 0xFFFF
|
|
|
|
//-- 0x0000-0x007F
|
|
if (ch < 128)
|
|
out << (char)ch;
|
|
/*
|
|
else {
|
|
out << HEX_ENTITY;
|
|
out << ch;
|
|
out << SEMICOLON;
|
|
}
|
|
*/
|
|
//-- 0x0080-0x07FF
|
|
else if (ch < 2048) {
|
|
out << (char)(192+(ch/64)); // 0xC0 + x/64
|
|
out << (char)(128+(ch%64)); // 0x80 + x%64
|
|
}
|
|
//-- 0x800-0xFFFF
|
|
else {
|
|
out << (char)(224+(ch/4096)); // 0xE0 + x/64^2
|
|
out << (char)(128+((ch/64)%64)); // 0x80 + (x/64)%64
|
|
out << (char)(128+(ch%64)); // 0x80 + x%64
|
|
}
|
|
}
|
|
|
|
} //-- printUTF8Chars
|
|
|
|
//-------------------/
|
|
//- Private Methods -/
|
|
//-------------------/
|
|
|
|
|
|
void XMLPrinter::printWithXMLEntities(const String& data) {
|
|
DOM_CHAR currChar;
|
|
|
|
if (&data == &NULL_STRING) return;
|
|
|
|
for (int i = 0; i < data.length(); i++) {
|
|
currChar = data.charAt(i);
|
|
switch (currChar) {
|
|
case AMPERSAND:
|
|
*ostreamPtr << AMP_ENTITY;
|
|
break;
|
|
case LT:
|
|
*ostreamPtr << LT_ENTITY;
|
|
break;
|
|
case GT:
|
|
*ostreamPtr << GT_ENTITY;
|
|
break;
|
|
default:
|
|
printUTF8Char(currChar);
|
|
break;
|
|
}
|
|
}
|
|
*ostreamPtr << flush;
|
|
} // -- printWithXMLEntities
|
|
|
|
/**
|
|
* Replaces any occurances of -- inside comment data with - -
|
|
* @param data the comment data (does not include start and end tags)
|
|
**/
|
|
void XMLPrinter::printComment(const String& data) {
|
|
DOM_CHAR prevChar;
|
|
DOM_CHAR currChar;
|
|
|
|
if (&data == &NULL_STRING) return;
|
|
|
|
//-- since comments will start with <!-- set prevChar to '-'
|
|
prevChar = DASH;
|
|
|
|
for (int i = 0; i < data.length(); i++) {
|
|
currChar = data.charAt(i);
|
|
|
|
if (currChar == DASH && prevChar == DASH)
|
|
*ostreamPtr << SPACE << DASH;
|
|
else
|
|
printUTF8Char(currChar);
|
|
|
|
prevChar = currChar;
|
|
}
|
|
|
|
//-- handle last char as a dash
|
|
if (prevChar == DASH) *ostreamPtr << SPACE;
|
|
|
|
} //-- printComment
|
|
|