/* * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is XSL:P XSLT processor. * * The Initial Developer of the Original Code is Keith Visco. * Portions created by Keith Visco (C) 1999 Keith Visco. * All Rights Reserved. * * Contributor(s): * Keith Visco, kvisco@ziplink.net * -- original author * Majkel Kretschmar * -- UTF-8 changes * Bob Miller, kbob@oblix.com * -- plugged core leak. * * $Id: XMLPrinter.cpp,v 1.8 2001-05-14 14:22:45 axel%pike.org Exp $ */ #include "printers.h" //--------------------------------/ //- Implementation of XMLPrinter -/ //--------------------------------/ /** * A class for printing XML nodes. * This class was ported from XSL:P Java source * @author Keith Visco * @version $Revision: 1.8 $ $Date: 2001-05-14 14:22:45 $ **/ /** * The default indent size **/ const int XMLPrinter::DEFAULT_INDENT = 2; const String XMLPrinter::AMP_ENTITY = "&"; const String XMLPrinter::GT_ENTITY = ">"; const String XMLPrinter::LT_ENTITY = "<"; const String XMLPrinter::HEX_ENTITY = "&#"; const String XMLPrinter::CDATA_END = "]]>"; const String XMLPrinter::CDATA_START = ""; const String XMLPrinter::DOCTYPE_START = ""; const String XMLPrinter::DOUBLE_QUOTE = "\""; const String XMLPrinter::EQUALS = "="; const String XMLPrinter::FORWARD_SLASH = "/"; const String XMLPrinter::L_ANGLE_BRACKET = "<"; const String XMLPrinter::PI_START = ""; const String XMLPrinter::PUBLIC = "PUBLIC"; const String XMLPrinter::R_ANGLE_BRACKET = ">"; const String XMLPrinter::SEMICOLON = ";"; const String XMLPrinter::SPACE = " "; const String XMLPrinter::SYSTEM = "SYSTEM"; const String XMLPrinter::XML_DECL = "xml version="; // chars const char XMLPrinter::AMPERSAND = '&'; const char XMLPrinter::GT = '>'; const char XMLPrinter::LT = '<'; const char XMLPrinter::DASH = '-'; const char XMLPrinter::TX_CR = '\r'; const char XMLPrinter::TX_LF = '\n'; //---------------/ //- Contructors -/ //---------------/ /** * Default Constructor. Creates a new XMLPrinter using cout as the ostream. **/ XMLPrinter::XMLPrinter() { initialize(cout, DEFAULT_INDENT); } //-- XMLPrinter /** * Creates a new XML Printer using the given ostream for output * @param os the out stream to use for output **/ XMLPrinter::XMLPrinter(ostream& os) { initialize(os, DEFAULT_INDENT); } //-- XMLPrinter /** * Creates a new XML Printer using the given ostream * for output, and nodes are indenting using the specified * indent size * @param os the out stream to use for output * @param indent the number of spaces to indent **/ XMLPrinter::XMLPrinter (ostream& os, int indent) { initialize(os, indent); } //-- XMLPrinter void XMLPrinter::initialize(ostream& os, int indentSize) { ostreamPtr = &os; indentChar = ' '; version = "1.0"; entityTokens = "&<>"; setIndentSize(indentSize); unescapeCDATA = MB_FALSE; useEmptyElementShorthand = MB_TRUE; useFormat = MB_FALSE; } //-- initialize // destructor is needed so that subclasses are destroyed. XMLPrinter::~XMLPrinter() { } /** * Prints the given Node * @param node the Node to print **/ void XMLPrinter::print(Node* node) { String currentIndent; print(node,currentIndent); *ostreamPtr<indentSize = indentSize; indent.clear(); for (int i = 0; i < indentSize; i++) { indent.append(indentChar); } } //-- setIndentSize /** * Sets whether or not to "unwrap" CDATA Sections * when printing. By Default CDATA Sections are left as is. * @param unescape the boolean indicating whether or not * to unescape CDATA Sections **/ void XMLPrinter::setUnescapeCDATA(MBool unescape) { unescapeCDATA = unescape; } //-- setUnescapeCDATA void XMLPrinter::setUseEmptyElementShorthand(MBool useShorthand) { useEmptyElementShorthand = useShorthand; } //-- setUseEmptyElementShorthand /** * Sets whether or not this XMLPrinter should add whitespace * to pretty print the XML tree * @param useFormat a boolean to indicate whether to allow the * XMLPrinter to add whitespace to the XML tree. (false by default) **/ void XMLPrinter::setUseFormat(MBool useFormat) { this->useFormat = useFormat; } //-- setUseFormat //---------------------/ //- Protected Methods -/ //---------------------/ /** * prints the given node to this XMLPrinter's Writer. If the * useFormat flag has been set, the node will be printed with * indentation equal to currentIndent + indentSize * @param node the Node to print * @param currentIndent the current indent String * @return true, if and only if a new line was printed at * the end of printing the given node **/ MBool XMLPrinter::print(Node* node, String& currentIndent) { ostream& out = *this->ostreamPtr; //-- if (node == null) return false; switch(node->getNodeType()) { //-- print Document Node case Node::DOCUMENT_NODE: { Document* doc = (Document*)node; out << PI_START << XML_DECL << DOUBLE_QUOTE; out << version; out << DOUBLE_QUOTE << PI_END << endl; //-- printDoctype(doc.getDoctype()); Node *node = doc->getFirstChild(); while (node) { print(node,currentIndent); node = node->getNextSibling(); } break; } //-- print Attribute Node case Node::ATTRIBUTE_NODE: { Attr* attr = (Attr*)node; //out << attr->getName(); out << attr->getNodeName(); const String& data = attr->getNodeValue(); if (&data != &NULL_STRING) { out << EQUALS << DOUBLE_QUOTE; out << data; out << DOUBLE_QUOTE; } break; } //-- print Element case Node::ELEMENT_NODE: { Element* element = (Element*)node; out << L_ANGLE_BRACKET; out << element->getNodeName(); NamedNodeMap* attList = element->getAttributes(); if (attList) { //-- print attribute nodes Attr* att; for (int i = 0; i < attList->getLength(); i++) { att = (Attr*)attList->item(i); const String& data = att->getValue(); //out << SPACE << * (att->getName()); out << SPACE << att->getNodeName(); if (&data != &NULL_STRING) { out << EQUALS << DOUBLE_QUOTE; out << data; out << DOUBLE_QUOTE; } } } Node* child = element->getFirstChild(); if (!child && (useEmptyElementShorthand)) { out << FORWARD_SLASH << R_ANGLE_BRACKET; if (useFormat) { out << endl; return MB_TRUE; } } else { // Either children, or no shorthand MBool newLine = MB_FALSE; out << R_ANGLE_BRACKET; if (useFormat && child) { // Fix formatting of PCDATA elements by Peter Marks and // David King Lassman // -- add if statement to check for text node before // adding line break if (child->getNodeType() != Node::TEXT_NODE) { out << endl; newLine = MB_TRUE; } } String newIndent(indent); newIndent.append(currentIndent); Node *lastChild = child; while (child) { if (useFormat && newLine) { out << newIndent; } newLine = print(child,newIndent); lastChild = child; child = child->getNextSibling(); } if (useFormat) { // Fix formatting of PCDATA elements by Peter Marks and // David King Lassman // -- add if statement to check for text node before // adding line break if (lastChild) { if (lastChild->getNodeType() != Node::TEXT_NODE) { out << currentIndent; } } } out << L_ANGLE_BRACKET << FORWARD_SLASH; out << element->getNodeName(); out << R_ANGLE_BRACKET; if (useFormat) { Node* sibling = node->getNextSibling(); if (!sibling || sibling->getNodeType() != Node::TEXT_NODE) { out<getData(); printWithXMLEntities(data); break; } case Node::CDATA_SECTION_NODE: if (unescapeCDATA) printWithXMLEntities( ((Text*)node)->getData() ); else { const String& data = ((Text*)node)->getData(); out << CDATA_START; printUTF8Chars(data); out << CDATA_END; } break; case Node::COMMENT_NODE: out << COMMENT_START; printComment(((CharacterData*)node)->getData()); out << COMMENT_END; if (useFormat) { out <getNodeName() << SEMICOLON; break; case Node::PROCESSING_INSTRUCTION_NODE: { ProcessingInstruction* pi = (ProcessingInstruction*)node; out << PI_START; out << pi->getTarget(); out << SPACE; out << pi->getData(); out << PI_END; if (useFormat) { out <ostreamPtr; // DOM_CHAR is 16-bits so we only need to cover up to 0xFFFF //-- 0x0000-0x007F if (ch < 128) out << (char)ch; /* else { out << HEX_ENTITY; out << ch; out << SEMICOLON; } */ //-- 0x0080-0x07FF else if (ch < 2048) { out << (char) (192+(ch/64)); // 0xC0 + x/64 out << (char) (128+(ch%64)); // 0x80 + x%64 } //-- 0x800-0xFFFF else { out << (char) (224+(ch/4096)); // 0xE0 + x/64^2 out << (char) (128+((ch/64)%64)); // 0x80 + (x/64)%64 out << (char) (128+(ch%64)); // 0x80 + x%64 } } //-- printUTF8Char /** * Print the proper UTF8 characters (ISO 10646) * based on code submitted by Majkel Kretschmar **/ void XMLPrinter::printUTF8Chars(const String& data) { ostream& out = *this->ostreamPtr; int i = 0; while(i < data.length()) { DOM_CHAR ch = data.charAt(i++); // DOM_CHAR is 16-bits so we only need to cover up to 0xFFFF //-- 0x0000-0x007F if (ch < 128) out << (char)ch; /* else { out << HEX_ENTITY; out << ch; out << SEMICOLON; } */ //-- 0x0080-0x07FF else if (ch < 2048) { out << (char)(192+(ch/64)); // 0xC0 + x/64 out << (char)(128+(ch%64)); // 0x80 + x%64 } //-- 0x800-0xFFFF else { out << (char)(224+(ch/4096)); // 0xE0 + x/64^2 out << (char)(128+((ch/64)%64)); // 0x80 + (x/64)%64 out << (char)(128+(ch%64)); // 0x80 + x%64 } } } //-- printUTF8Chars //-------------------/ //- Private Methods -/ //-------------------/ void XMLPrinter::printWithXMLEntities(const String& data) { DOM_CHAR currChar; if (&data == &NULL_STRING) return; for (int i = 0; i < data.length(); i++) { currChar = data.charAt(i); switch (currChar) { case AMPERSAND: *ostreamPtr << AMP_ENTITY; break; case LT: *ostreamPtr << LT_ENTITY; break; case GT: *ostreamPtr << GT_ENTITY; break; default: printUTF8Char(currChar); break; } } *ostreamPtr << flush; } // -- printWithXMLEntities /** * Replaces any occurances of -- inside comment data with - - * @param data the comment data (does not include start and end tags) **/ void XMLPrinter::printComment(const String& data) { DOM_CHAR prevChar; DOM_CHAR currChar; if (&data == &NULL_STRING) return; //-- since comments will start with