Add package.html, reformat code

git-svn-id: https://svn.apache.org/repos/asf/maven/maven-1/plugins/trunk@368518 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ltheussl 2006-01-12 23:35:13 +00:00
parent 69a739d291
commit 1dc421af44
4 changed files with 363 additions and 225 deletions

View File

@ -16,7 +16,6 @@ package org.apache.maven.html2xdoc;
* limitations under the License.
* ====================================================================
*/
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@ -38,15 +37,15 @@ import org.dom4j.Node;
*
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/
public class Html2XdocBean {
public class Html2XdocBean
{
/** The Log to which logging calls will be made. */
private static final Log log = LogFactory.getLog(Html2XdocBean.class);
private static final Log log = LogFactory.getLog( Html2XdocBean.class );
/**
* Used to create the output document
*/
private DocumentFactory factory = new DocumentFactory();
private DocumentFactory factory = new DocumentFactory( );
/**
* The current node to attach the sub-nodes.
@ -79,22 +78,29 @@ public class Html2XdocBean {
* @param html the input html document
* @return Document
*/
public Document convert(Document html) {
Document doc = factory.createDocument();
Element root = doc.addElement("document");
Element properties = root.addElement("properties");
Element title = properties.addElement("title");
title.setText(html.valueOf("/html/head/title"));
public Document convert( Document html )
{
Document doc = factory.createDocument( );
Element root = doc.addElement( "document" );
Element properties = root.addElement( "properties" );
Element title = properties.addElement( "title" );
Element body = root.addElement("body");
title.setText( html.valueOf( "/html/head/title" ) );
Element htmlContent = (Element) html.selectSingleNode("/html/body");
if (htmlContent == null) {
log.info("No body element found for HTML document: "
+ html.asXML());
} else {
addSections(body, htmlContent);
Element body = root.addElement( "body" );
Element htmlContent = (Element) html.selectSingleNode( "/html/body" );
if ( htmlContent == null )
{
log.info( "No body element found for HTML document: "
+ html.asXML( ) );
}
else
{
addSections( body, htmlContent );
}
return doc;
}
@ -106,19 +112,24 @@ public class Html2XdocBean {
* @param output the output destination
* @param body the block of HTML markup to convert
*/
protected void addSections(Element output, Element body) {
List content = getBodyContent(body.content());
protected void addSections( Element output, Element body )
{
List content = getBodyContent( body.content( ) );
for (Iterator iter = content.iterator(); iter.hasNext();) {
Node node = (Node) iter.next();
if (isHeading(node)) {
makeSection(output, node);
} else {
guaranteeHasSection(output);
processNode(node);
for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
{
Node node = (Node) iter.next( );
if ( isHeading( node ) )
{
makeSection( output, node );
}
else
{
guaranteeHasSection( output );
processNode( node );
}
}
}
/**
@ -127,13 +138,19 @@ public class Html2XdocBean {
*
* @param node the node to process
*/
private void processNode(Node node) {
if (isCharacterData(node)) {
addTextNode(node);
} else if (isTextFormatting(node)) {
addFormattingNode(node);
} else {
addNode(node);
private void processNode( Node node )
{
if ( isCharacterData( node ) )
{
addTextNode( node );
}
else if ( isTextFormatting( node ) )
{
addFormattingNode( node );
}
else
{
addNode( node );
}
}
@ -145,9 +162,10 @@ public class Html2XdocBean {
* @return true if the node is used to modify the formatting of the
* text; otherwise, false
*/
protected boolean isTextFormatting(Node node) {
protected boolean isTextFormatting( Node node )
{
// Ultimately this needs bold, italic, and so on
return node.getName() != null && node.getName().equals("a");
return ( node.getName( ) != null ) && node.getName( ).equals( "a" );
}
/**
@ -157,9 +175,10 @@ public class Html2XdocBean {
* @param node the node to check
* @return true if the node is a text node; otherwise, false.
*/
protected boolean isCharacterData(Node node) {
protected boolean isCharacterData( Node node )
{
return node instanceof CharacterData
&& (node instanceof Comment) == false;
&& ( ( node instanceof Comment ) == false );
}
/**
@ -169,9 +188,11 @@ public class Html2XdocBean {
* @return true if the given node is a heading element
* (h1, h2, h3 etc); otherwise, false
*/
protected boolean isHeading(Node node) {
String name = node.getName();
return name != null && name.startsWith("h");
protected boolean isHeading( Node node )
{
String name = node.getName( );
return ( name != null ) && name.startsWith( "h" );
}
/**
@ -180,11 +201,16 @@ public class Html2XdocBean {
* @param node the node to check
* @return the integer level of the heading
*/
protected int determineHeadingLevel(Node node) {
try {
String name = node.getName().substring(1);
return Integer.parseInt(name);
} catch (NumberFormatException nfe) {
protected int determineHeadingLevel( Node node )
{
try
{
String name = node.getName( ).substring( 1 );
return Integer.parseInt( name );
}
catch ( NumberFormatException nfe )
{
return 1;
}
}
@ -196,16 +222,22 @@ public class Html2XdocBean {
* @param output the output document to attach the section
* @param node the node to base making a section on
*/
protected void makeSection(Element output, Node node) {
int level = determineHeadingLevel(node);
if (needsNewSection(node)) {
currentNode = output.addElement("section");
protected void makeSection( Element output, Node node )
{
int level = determineHeadingLevel( node );
if ( needsNewSection( node ) )
{
currentNode = output.addElement( "section" );
currentSectionHeadingLevel = level;
currentSectionNode = currentNode;
} else {
currentNode = currentSectionNode.addElement("subsection");
}
currentNode.addAttribute("name", getSectionText(node));
else
{
currentNode = currentSectionNode.addElement( "subsection" );
}
currentNode.addAttribute( "name", getSectionText( node ) );
currentParaNode = null;
}
@ -214,18 +246,25 @@ public class Html2XdocBean {
* contains an embedded element (such as an &lt;a&gt; element)
* then return its text
*/
protected String getSectionText(Node node) {
String text = node.getText().trim();
if (text.length() <= 0 && node instanceof Element) {
protected String getSectionText( Node node )
{
String text = node.getText( ).trim( );
if ( ( text.length( ) <= 0 ) && node instanceof Element )
{
Element element = (Element) node;
// maybe we contain a hypertext link
List childElements = element.elements();
if (! childElements.isEmpty()) {
Node child = (Node) childElements.get(0);
return child.getText();
List childElements = element.elements( );
if ( !childElements.isEmpty( ) )
{
Node child = (Node) childElements.get( 0 );
return child.getText( );
}
}
return text;
}
@ -238,18 +277,22 @@ public class Html2XdocBean {
* @return true if the current node's information means for a new
* section; otherwise, false
*/
protected boolean needsNewSection(Node node) {
int level = determineHeadingLevel(node);
return level <= currentSectionHeadingLevel
|| currentSectionNode == null;
protected boolean needsNewSection( Node node )
{
int level = determineHeadingLevel( node );
return ( level <= currentSectionHeadingLevel )
|| ( currentSectionNode == null );
}
/**
* Determines if a paragraph node is needed.
*/
private void guaranteeHasParaNode() {
if (currentParaNode == null) {
currentParaNode = currentNode.addElement("p");
private void guaranteeHasParaNode( )
{
if ( currentParaNode == null )
{
currentParaNode = currentNode.addElement( "p" );
}
}
@ -257,11 +300,13 @@ public class Html2XdocBean {
* Makes sure the current node is section, if necessary.
* @param output the output element to add the section to
*/
private void guaranteeHasSection(Element output) {
if (currentNode == null) {
private void guaranteeHasSection( Element output )
{
if ( currentNode == null )
{
// we have a section with no name
// should we default it to be the same as the document title?
currentNode = output.addElement("section");
currentNode = output.addElement( "section" );
}
}
@ -269,12 +314,15 @@ public class Html2XdocBean {
* Add the node to the current node.
* @param node the node to add
*/
private void addNode(Node node) {
if (currentParaNode != null && ! shouldBreakPara(node)) {
currentParaNode.add(cloneNode(node));
private void addNode( Node node )
{
if ( ( currentParaNode != null ) && !shouldBreakPara( node ) )
{
currentParaNode.add( cloneNode( node ) );
}
else {
currentNode.add(cloneNode(node));
else
{
currentNode.add( cloneNode( node ) );
currentParaNode = null;
}
}
@ -283,27 +331,31 @@ public class Html2XdocBean {
* @return true if the paragraph should be split, such as for a br or p
* tag
*/
protected boolean shouldBreakPara(Node node) {
String name = node.getName();
return "p".equals(name) || "br".equals(name);
protected boolean shouldBreakPara( Node node )
{
String name = node.getName( );
return "p".equals( name ) || "br".equals( name );
}
/**
* Adds the text of the node to the current paragraph.
* @param node the node to add
*/
private void addTextNode(Node node) {
guaranteeHasParaNode();
currentParaNode.addText(node.getText());
private void addTextNode( Node node )
{
guaranteeHasParaNode( );
currentParaNode.addText( node.getText( ) );
}
/**
* Adds the node to the current paragraph.
* @param node the node to add
*/
private void addFormattingNode(Node node) {
guaranteeHasParaNode();
currentParaNode.add(cloneNode(node));
private void addFormattingNode( Node node )
{
guaranteeHasParaNode( );
currentParaNode.add( cloneNode( node ) );
}
/**
@ -313,54 +365,84 @@ public class Html2XdocBean {
* @param content the content node list to obtain body content from
* @return List
*/
protected List getBodyContent(List content) {
protected List getBodyContent( List content )
{
// lets turn <pre> into <source> and concatenate consective entries
Element lastPre = null;
LinkedList list = new LinkedList();
LinkedList list = new LinkedList( );
boolean lastWasElement = true;
for (Iterator iter = content.iterator(); iter.hasNext();) {
Node node = (Node) iter.next();
if (isPre(node)) {
if (lastPre == null) {
lastPre = factory.createElement("source");
list.add(lastPre);
for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
{
Node node = (Node) iter.next( );
if ( isPre( node ) )
{
if ( lastPre == null )
{
lastPre = factory.createElement( "source" );
list.add( lastPre );
}
lastPre.addText(node.getText());
} else {
if (isWhitespace(node) && lastWasElement) {
if (lastPre != null) {
lastPre.addText(node.getText());
lastPre.addText( node.getText( ) );
}
else
{
if ( isWhitespace( node ) && lastWasElement )
{
if ( lastPre != null )
{
lastPre.addText( node.getText( ) );
}
} else {
}
else
{
lastWasElement = node instanceof Element;
if (lastWasElement) {
if ( lastWasElement )
{
lastPre = null;
}
list.add(node);
list.add( node );
}
}
}
if (list.size() == 0) return list;
if ( list.size( ) == 0 )
{
return list;
}
// now lets remove any whitespace text nodes at the beginning and end
while (true) {
Node node = (Node) list.getFirst();
if (isWhitespace(node)) {
list.removeFirst();
while ( true )
{
Node node = (Node) list.getFirst( );
if ( isWhitespace( node ) )
{
list.removeFirst( );
continue;
}
break;
}
while (true) {
Node node = (Node) list.getLast();
if (isWhitespace(node)) {
list.removeLast();
while ( true )
{
Node node = (Node) list.getLast( );
if ( isWhitespace( node ) )
{
list.removeLast( );
continue;
}
break;
}
return list;
}
@ -368,11 +450,15 @@ public class Html2XdocBean {
* @param node the node to check
* @return true if the node is a pre tag; otherwise false.
*/
protected boolean isPre(Node node) {
if (node instanceof Element) {
protected boolean isPre( Node node )
{
if ( node instanceof Element )
{
Element element = (Element) node;
return element.getName().equals("pre");
return element.getName( ).equals( "pre" );
}
return false;
}
@ -380,21 +466,25 @@ public class Html2XdocBean {
* @param node the node to check
* @return true if the given node is a whitespace text node
*/
protected boolean isWhitespace(Node node) {
if (node instanceof CharacterData) {
String text = node.getText();
return text.trim().length() <= 0;
protected boolean isWhitespace( Node node )
{
if ( node instanceof CharacterData )
{
String text = node.getText( );
return text.trim( ).length( ) <= 0;
}
// if (node instanceof Element) {
// String name = node.getName();
// if (name.equals("p")) {
// String text = node.getText();
// return text.trim().length() <= 0;
// }
// if (name.equals("br")) {
// return true;
// }
// }
// if (node instanceof Element) {
// String name = node.getName();
// if (name.equals("p")) {
// String text = node.getText();
// return text.trim().length() <= 0;
// }
// if (name.equals("br")) {
// return true;
// }
// }
return false;
}
@ -404,13 +494,17 @@ public class Html2XdocBean {
* @param node the node to clone
* @return Node the cloned node
*/
protected Node cloneNode(Node node) {
Node answer = (Node) node.clone();
if (answer instanceof Element) {
protected Node cloneNode( Node node )
{
Node answer = (Node) node.clone( );
if ( answer instanceof Element )
{
Element element = (Element) answer;
element.normalize();
element.normalize( );
}
return answer;
}
}

View File

@ -0,0 +1,12 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Contains utility classes for converting a HTML document into an
XDoc compliant XML document.
</p>
</body>
</html>

View File

@ -59,13 +59,8 @@
*
* TagXMLDoclet.java,v 1.1 2003/02/07 12:10:44 jstrachan Exp
*/
package org.apache.maven.html2xdoc;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
@ -77,62 +72,74 @@ import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
/**
* A test harness for the HTML to XDOC converter
*
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/
public class TestHtml2Xdoc extends TestCase {
public class TestHtml2Xdoc extends TestCase
{
protected boolean verbose = false;
public static void main( String[] args ) {
TestRunner.run( suite() );
public TestHtml2Xdoc( String testName )
{
super( testName );
}
public static Test suite() {
public static void main( String[] args )
{
TestRunner.run( suite( ) );
}
public static Test suite( )
{
return new TestSuite( TestHtml2Xdoc.class );
}
public TestHtml2Xdoc(String testName) {
super(testName);
}
// Test cases
//-------------------------------------------------------------------------
public void testOne() throws Exception {
assertConversion("missingParaBug.html", "missingParaBug.xml");
assertConversion("linkInHeading.html", "linkInHeading.xml");
assertConversion("codeinpara.html", "codeinpara.xml");
assertConversion("input1.html", "output1.xml");
assertConversion("h1h2.html", "h1h2.xml");
assertConversion("h2h3.html", "h1h2.xml");
assertConversion("h2h4.html", "h1h2.xml");
assertConversion("h3h4.html", "h1h2.xml");
assertConversion("link.html", "link.xml");
assertConversion("comment.html", "comment.xml");
public void testOne( ) throws Exception
{
assertConversion( "missingParaBug.html", "missingParaBug.xml" );
assertConversion( "linkInHeading.html", "linkInHeading.xml" );
assertConversion( "codeinpara.html", "codeinpara.xml" );
assertConversion( "input1.html", "output1.xml" );
assertConversion( "h1h2.html", "h1h2.xml" );
assertConversion( "h2h3.html", "h1h2.xml" );
assertConversion( "h2h4.html", "h1h2.xml" );
assertConversion( "h3h4.html", "h1h2.xml" );
assertConversion( "link.html", "link.xml" );
assertConversion( "comment.html", "comment.xml" );
}
// Implementation methods
//-------------------------------------------------------------------------
protected void assertConversion(String input, String output) throws Exception {
Html2XdocBean converter = createConverter();
Document inputDoc = parseHtml(input);
protected void assertConversion( String input, String output )
throws Exception
{
Html2XdocBean converter = createConverter( );
Document inputDoc = parseHtml( input );
Document expectedDoc = parse( output );
Document expectedDoc = parse(output);
Document actualDoc = converter.convert( inputDoc );
Document actualDoc = converter.convert(inputDoc);
if (verbose) {
System.out.println("Comparing: " + input + " to: " + output);
System.out.println("Parsed: " + inputDoc.asXML());
System.out.println("Generated: " + actualDoc.asXML());
System.out.println();
System.out.println();
if ( verbose )
{
System.out.println( "Comparing: " + input + " to: " + output );
System.out.println( "Parsed: " + inputDoc.asXML( ) );
System.out.println( "Generated: " + actualDoc.asXML( ) );
System.out.println( );
System.out.println( );
}
assertEqual("Output for: " + input + " does not match: " + output, expectedDoc, actualDoc);
assertEqual( "Output for: " + input + " does not match: " + output,
expectedDoc, actualDoc );
}
/**
@ -142,32 +149,36 @@ public class TestHtml2Xdoc extends TestCase {
* @param expectedDoc
* @param actualDoc
*/
protected void assertEqual(
String message,
Document expectedDoc,
Document actualDoc) throws IOException {
protected void assertEqual( String message, Document expectedDoc,
Document actualDoc ) throws IOException
{
String expectedText = getPrettyPrintText( expectedDoc );
String actualText = getPrettyPrintText( actualDoc );
String expectedText = getPrettyPrintText(expectedDoc);
String actualText = getPrettyPrintText(actualDoc);
if ( !expectedText.equals( actualText ) )
{
System.out.println( "Expected: " + expectedText );
System.out.println( "Actual: " + actualText );
}
if (!expectedText.equals(actualText)) {
System.out.println("Expected: " + expectedText);
System.out.println("Actual: " + actualText);
}
assertEquals(message, expectedText, actualText);
assertEquals( message, expectedText, actualText );
}
/**
* @param expectedDoc
* @return Object
*/
protected String getPrettyPrintText(Document doc) throws IOException {
OutputFormat format = OutputFormat.createPrettyPrint();
StringWriter buffer = new StringWriter();
XMLWriter writer = new XMLWriter(buffer, format);
writer.write(doc);
writer.close();
return buffer.toString();
protected String getPrettyPrintText( Document doc )
throws IOException
{
OutputFormat format = OutputFormat.createPrettyPrint( );
StringWriter buffer = new StringWriter( );
XMLWriter writer = new XMLWriter( buffer, format );
writer.write( doc );
writer.close( );
return buffer.toString( );
}
/**
@ -176,10 +187,13 @@ public class TestHtml2Xdoc extends TestCase {
* @param input
* @return Document
*/
protected Document parse(String input) throws Exception {
URL url = getClassURL(input);
SAXReader saxReader = new SAXReader();
return saxReader.read(url);
protected Document parse( String input )
throws Exception
{
URL url = getClassURL( input );
SAXReader saxReader = new SAXReader( );
return saxReader.read( url );
}
/**
@ -189,28 +203,35 @@ public class TestHtml2Xdoc extends TestCase {
* @param input
* @return Document
*/
protected Document parseHtml(String input) throws Exception {
URL url = getClassURL(input);
SAXParser htmlParser = new SAXParser();
htmlParser.setProperty(
"http://cyberneko.org/html/properties/names/elems",
"lower"
);
htmlParser.setProperty(
"http://cyberneko.org/html/properties/names/attrs",
"lower"
);
SAXReader saxReader = new SAXReader(htmlParser);
return saxReader.read(url);
protected Document parseHtml( String input )
throws Exception
{
URL url = getClassURL( input );
SAXParser htmlParser = new SAXParser( );
htmlParser.setProperty( "http://cyberneko.org/html/properties/names/elems",
"lower" );
htmlParser.setProperty( "http://cyberneko.org/html/properties/names/attrs",
"lower" );
SAXReader saxReader = new SAXReader( htmlParser );
return saxReader.read( url );
}
protected URL getClassURL(String input) throws Exception {
URL url = getClass().getResource(input);
assertTrue("Could not find resource on classpath for: " + input, url != null);
return url;
}
protected URL getClassURL( String input )
throws Exception
{
URL url = getClass( ).getResource( input );
protected Html2XdocBean createConverter() {
return new Html2XdocBean();
assertTrue( "Could not find resource on classpath for: " + input,
url != null );
return url;
}
protected Html2XdocBean createConverter( )
{
return new Html2XdocBean( );
}
}

View File

@ -0,0 +1,11 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Test classes for org.apache.maven.html2xdoc.
</p>
</body>
</html>