Add package.html, reformat code

git-svn-id: https://svn.apache.org/repos/asf/maven/maven-1/plugins/trunk@368518 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ltheussl 2006-01-12 23:35:13 +00:00
parent 69a739d291
commit 1dc421af44
4 changed files with 363 additions and 225 deletions

View File

@ -16,7 +16,6 @@ package org.apache.maven.html2xdoc;
* limitations under the License. * limitations under the License.
* ==================================================================== * ====================================================================
*/ */
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -38,15 +37,15 @@ import org.dom4j.Node;
* *
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a> * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/ */
public class Html2XdocBean { public class Html2XdocBean
{
/** The Log to which logging calls will be made. */ /** The Log to which logging calls will be made. */
private static final Log log = LogFactory.getLog(Html2XdocBean.class); private static final Log log = LogFactory.getLog( Html2XdocBean.class );
/** /**
* Used to create the output document * Used to create the output document
*/ */
private DocumentFactory factory = new DocumentFactory(); private DocumentFactory factory = new DocumentFactory( );
/** /**
* The current node to attach the sub-nodes. * The current node to attach the sub-nodes.
@ -79,22 +78,29 @@ public class Html2XdocBean {
* @param html the input html document * @param html the input html document
* @return Document * @return Document
*/ */
public Document convert(Document html) { public Document convert( Document html )
Document doc = factory.createDocument(); {
Element root = doc.addElement("document"); Document doc = factory.createDocument( );
Element properties = root.addElement("properties"); Element root = doc.addElement( "document" );
Element title = properties.addElement("title"); Element properties = root.addElement( "properties" );
title.setText(html.valueOf("/html/head/title")); Element title = properties.addElement( "title" );
Element body = root.addElement("body"); title.setText( html.valueOf( "/html/head/title" ) );
Element htmlContent = (Element) html.selectSingleNode("/html/body"); Element body = root.addElement( "body" );
if (htmlContent == null) {
log.info("No body element found for HTML document: " Element htmlContent = (Element) html.selectSingleNode( "/html/body" );
+ html.asXML());
} else { if ( htmlContent == null )
addSections(body, htmlContent); {
log.info( "No body element found for HTML document: "
+ html.asXML( ) );
} }
else
{
addSections( body, htmlContent );
}
return doc; return doc;
} }
@ -106,19 +112,24 @@ public class Html2XdocBean {
* @param output the output destination * @param output the output destination
* @param body the block of HTML markup to convert * @param body the block of HTML markup to convert
*/ */
protected void addSections(Element output, Element body) { protected void addSections( Element output, Element body )
List content = getBodyContent(body.content()); {
List content = getBodyContent( body.content( ) );
for (Iterator iter = content.iterator(); iter.hasNext();) { for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
Node node = (Node) iter.next(); {
if (isHeading(node)) { Node node = (Node) iter.next( );
makeSection(output, node);
} else { if ( isHeading( node ) )
guaranteeHasSection(output); {
processNode(node); makeSection( output, node );
}
else
{
guaranteeHasSection( output );
processNode( node );
} }
} }
} }
/** /**
@ -127,13 +138,19 @@ public class Html2XdocBean {
* *
* @param node the node to process * @param node the node to process
*/ */
private void processNode(Node node) { private void processNode( Node node )
if (isCharacterData(node)) { {
addTextNode(node); if ( isCharacterData( node ) )
} else if (isTextFormatting(node)) { {
addFormattingNode(node); addTextNode( node );
} else { }
addNode(node); else if ( isTextFormatting( node ) )
{
addFormattingNode( node );
}
else
{
addNode( node );
} }
} }
@ -145,9 +162,10 @@ public class Html2XdocBean {
* @return true if the node is used to modify the formatting of the * @return true if the node is used to modify the formatting of the
* text; otherwise, false * text; otherwise, false
*/ */
protected boolean isTextFormatting(Node node) { protected boolean isTextFormatting( Node node )
{
// Ultimately this needs bold, italic, and so on // Ultimately this needs bold, italic, and so on
return node.getName() != null && node.getName().equals("a"); return ( node.getName( ) != null ) && node.getName( ).equals( "a" );
} }
/** /**
@ -157,9 +175,10 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return true if the node is a text node; otherwise, false. * @return true if the node is a text node; otherwise, false.
*/ */
protected boolean isCharacterData(Node node) { protected boolean isCharacterData( Node node )
{
return node instanceof CharacterData return node instanceof CharacterData
&& (node instanceof Comment) == false; && ( ( node instanceof Comment ) == false );
} }
/** /**
@ -169,9 +188,11 @@ public class Html2XdocBean {
* @return true if the given node is a heading element * @return true if the given node is a heading element
* (h1, h2, h3 etc); otherwise, false * (h1, h2, h3 etc); otherwise, false
*/ */
protected boolean isHeading(Node node) { protected boolean isHeading( Node node )
String name = node.getName(); {
return name != null && name.startsWith("h"); String name = node.getName( );
return ( name != null ) && name.startsWith( "h" );
} }
/** /**
@ -180,11 +201,16 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return the integer level of the heading * @return the integer level of the heading
*/ */
protected int determineHeadingLevel(Node node) { protected int determineHeadingLevel( Node node )
try { {
String name = node.getName().substring(1); try
return Integer.parseInt(name); {
} catch (NumberFormatException nfe) { String name = node.getName( ).substring( 1 );
return Integer.parseInt( name );
}
catch ( NumberFormatException nfe )
{
return 1; return 1;
} }
} }
@ -196,16 +222,22 @@ public class Html2XdocBean {
* @param output the output document to attach the section * @param output the output document to attach the section
* @param node the node to base making a section on * @param node the node to base making a section on
*/ */
protected void makeSection(Element output, Node node) { protected void makeSection( Element output, Node node )
int level = determineHeadingLevel(node); {
if (needsNewSection(node)) { int level = determineHeadingLevel( node );
currentNode = output.addElement("section");
if ( needsNewSection( node ) )
{
currentNode = output.addElement( "section" );
currentSectionHeadingLevel = level; currentSectionHeadingLevel = level;
currentSectionNode = currentNode; currentSectionNode = currentNode;
} else {
currentNode = currentSectionNode.addElement("subsection");
} }
currentNode.addAttribute("name", getSectionText(node)); else
{
currentNode = currentSectionNode.addElement( "subsection" );
}
currentNode.addAttribute( "name", getSectionText( node ) );
currentParaNode = null; currentParaNode = null;
} }
@ -214,18 +246,25 @@ public class Html2XdocBean {
* contains an embedded element (such as an &lt;a&gt; element) * contains an embedded element (such as an &lt;a&gt; element)
* then return its text * then return its text
*/ */
protected String getSectionText(Node node) { protected String getSectionText( Node node )
String text = node.getText().trim(); {
if (text.length() <= 0 && node instanceof Element) { String text = node.getText( ).trim( );
if ( ( text.length( ) <= 0 ) && node instanceof Element )
{
Element element = (Element) node; Element element = (Element) node;
// maybe we contain a hypertext link // maybe we contain a hypertext link
List childElements = element.elements(); List childElements = element.elements( );
if (! childElements.isEmpty()) {
Node child = (Node) childElements.get(0); if ( !childElements.isEmpty( ) )
return child.getText(); {
Node child = (Node) childElements.get( 0 );
return child.getText( );
} }
} }
return text; return text;
} }
@ -238,18 +277,22 @@ public class Html2XdocBean {
* @return true if the current node's information means for a new * @return true if the current node's information means for a new
* section; otherwise, false * section; otherwise, false
*/ */
protected boolean needsNewSection(Node node) { protected boolean needsNewSection( Node node )
int level = determineHeadingLevel(node); {
return level <= currentSectionHeadingLevel int level = determineHeadingLevel( node );
|| currentSectionNode == null;
return ( level <= currentSectionHeadingLevel )
|| ( currentSectionNode == null );
} }
/** /**
* Determines if a paragraph node is needed. * Determines if a paragraph node is needed.
*/ */
private void guaranteeHasParaNode() { private void guaranteeHasParaNode( )
if (currentParaNode == null) { {
currentParaNode = currentNode.addElement("p"); if ( currentParaNode == null )
{
currentParaNode = currentNode.addElement( "p" );
} }
} }
@ -257,11 +300,13 @@ public class Html2XdocBean {
* Makes sure the current node is section, if necessary. * Makes sure the current node is section, if necessary.
* @param output the output element to add the section to * @param output the output element to add the section to
*/ */
private void guaranteeHasSection(Element output) { private void guaranteeHasSection( Element output )
if (currentNode == null) { {
if ( currentNode == null )
{
// we have a section with no name // we have a section with no name
// should we default it to be the same as the document title? // should we default it to be the same as the document title?
currentNode = output.addElement("section"); currentNode = output.addElement( "section" );
} }
} }
@ -269,12 +314,15 @@ public class Html2XdocBean {
* Add the node to the current node. * Add the node to the current node.
* @param node the node to add * @param node the node to add
*/ */
private void addNode(Node node) { private void addNode( Node node )
if (currentParaNode != null && ! shouldBreakPara(node)) { {
currentParaNode.add(cloneNode(node)); if ( ( currentParaNode != null ) && !shouldBreakPara( node ) )
{
currentParaNode.add( cloneNode( node ) );
} }
else { else
currentNode.add(cloneNode(node)); {
currentNode.add( cloneNode( node ) );
currentParaNode = null; currentParaNode = null;
} }
} }
@ -283,27 +331,31 @@ public class Html2XdocBean {
* @return true if the paragraph should be split, such as for a br or p * @return true if the paragraph should be split, such as for a br or p
* tag * tag
*/ */
protected boolean shouldBreakPara(Node node) { protected boolean shouldBreakPara( Node node )
String name = node.getName(); {
return "p".equals(name) || "br".equals(name); String name = node.getName( );
return "p".equals( name ) || "br".equals( name );
} }
/** /**
* Adds the text of the node to the current paragraph. * Adds the text of the node to the current paragraph.
* @param node the node to add * @param node the node to add
*/ */
private void addTextNode(Node node) { private void addTextNode( Node node )
guaranteeHasParaNode(); {
currentParaNode.addText(node.getText()); guaranteeHasParaNode( );
currentParaNode.addText( node.getText( ) );
} }
/** /**
* Adds the node to the current paragraph. * Adds the node to the current paragraph.
* @param node the node to add * @param node the node to add
*/ */
private void addFormattingNode(Node node) { private void addFormattingNode( Node node )
guaranteeHasParaNode(); {
currentParaNode.add(cloneNode(node)); guaranteeHasParaNode( );
currentParaNode.add( cloneNode( node ) );
} }
/** /**
@ -313,54 +365,84 @@ public class Html2XdocBean {
* @param content the content node list to obtain body content from * @param content the content node list to obtain body content from
* @return List * @return List
*/ */
protected List getBodyContent(List content) { protected List getBodyContent( List content )
{
// lets turn <pre> into <source> and concatenate consective entries // lets turn <pre> into <source> and concatenate consective entries
Element lastPre = null; Element lastPre = null;
LinkedList list = new LinkedList(); LinkedList list = new LinkedList( );
boolean lastWasElement = true; boolean lastWasElement = true;
for (Iterator iter = content.iterator(); iter.hasNext();) {
Node node = (Node) iter.next();
if (isPre(node)) { for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
if (lastPre == null) { {
lastPre = factory.createElement("source"); Node node = (Node) iter.next( );
list.add(lastPre);
if ( isPre( node ) )
{
if ( lastPre == null )
{
lastPre = factory.createElement( "source" );
list.add( lastPre );
} }
lastPre.addText(node.getText());
} else { lastPre.addText( node.getText( ) );
if (isWhitespace(node) && lastWasElement) {
if (lastPre != null) {
lastPre.addText(node.getText());
} }
} else { else
{
if ( isWhitespace( node ) && lastWasElement )
{
if ( lastPre != null )
{
lastPre.addText( node.getText( ) );
}
}
else
{
lastWasElement = node instanceof Element; lastWasElement = node instanceof Element;
if (lastWasElement) {
if ( lastWasElement )
{
lastPre = null; lastPre = null;
} }
list.add(node);
list.add( node );
} }
} }
} }
if (list.size() == 0) return list; if ( list.size( ) == 0 )
{
return list;
}
// now lets remove any whitespace text nodes at the beginning and end // now lets remove any whitespace text nodes at the beginning and end
while (true) { while ( true )
Node node = (Node) list.getFirst(); {
if (isWhitespace(node)) { Node node = (Node) list.getFirst( );
list.removeFirst();
if ( isWhitespace( node ) )
{
list.removeFirst( );
continue; continue;
} }
break; break;
} }
while (true) {
Node node = (Node) list.getLast(); while ( true )
if (isWhitespace(node)) { {
list.removeLast(); Node node = (Node) list.getLast( );
if ( isWhitespace( node ) )
{
list.removeLast( );
continue; continue;
} }
break; break;
} }
return list; return list;
} }
@ -368,11 +450,15 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return true if the node is a pre tag; otherwise false. * @return true if the node is a pre tag; otherwise false.
*/ */
protected boolean isPre(Node node) { protected boolean isPre( Node node )
if (node instanceof Element) { {
if ( node instanceof Element )
{
Element element = (Element) node; Element element = (Element) node;
return element.getName().equals("pre");
return element.getName( ).equals( "pre" );
} }
return false; return false;
} }
@ -380,21 +466,25 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return true if the given node is a whitespace text node * @return true if the given node is a whitespace text node
*/ */
protected boolean isWhitespace(Node node) { protected boolean isWhitespace( Node node )
if (node instanceof CharacterData) { {
String text = node.getText(); if ( node instanceof CharacterData )
return text.trim().length() <= 0; {
String text = node.getText( );
return text.trim( ).length( ) <= 0;
} }
// if (node instanceof Element) {
// String name = node.getName(); // if (node instanceof Element) {
// if (name.equals("p")) { // String name = node.getName();
// String text = node.getText(); // if (name.equals("p")) {
// return text.trim().length() <= 0; // String text = node.getText();
// } // return text.trim().length() <= 0;
// if (name.equals("br")) { // }
// return true; // if (name.equals("br")) {
// } // return true;
// } // }
// }
return false; return false;
} }
@ -404,13 +494,17 @@ public class Html2XdocBean {
* @param node the node to clone * @param node the node to clone
* @return Node the cloned node * @return Node the cloned node
*/ */
protected Node cloneNode(Node node) { protected Node cloneNode( Node node )
Node answer = (Node) node.clone(); {
if (answer instanceof Element) { Node answer = (Node) node.clone( );
if ( answer instanceof Element )
{
Element element = (Element) answer; Element element = (Element) answer;
element.normalize();
} element.normalize( );
return answer;
} }
return answer;
}
} }

View File

@ -0,0 +1,12 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Contains utility classes for converting a HTML document into an
XDoc compliant XML document.
</p>
</body>
</html>

View File

@ -59,13 +59,8 @@
* *
* TagXMLDoclet.java,v 1.1 2003/02/07 12:10:44 jstrachan Exp * TagXMLDoclet.java,v 1.1 2003/02/07 12:10:44 jstrachan Exp
*/ */
package org.apache.maven.html2xdoc; package org.apache.maven.html2xdoc;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
import junit.framework.Test; import junit.framework.Test;
import junit.framework.TestCase; import junit.framework.TestCase;
import junit.framework.TestSuite; import junit.framework.TestSuite;
@ -77,62 +72,74 @@ import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter; import org.dom4j.io.XMLWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
/** /**
* A test harness for the HTML to XDOC converter * A test harness for the HTML to XDOC converter
* *
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a> * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/ */
public class TestHtml2Xdoc extends TestCase { public class TestHtml2Xdoc extends TestCase
{
protected boolean verbose = false; protected boolean verbose = false;
public static void main( String[] args ) { public TestHtml2Xdoc( String testName )
TestRunner.run( suite() ); {
super( testName );
} }
public static Test suite() { public static void main( String[] args )
{
TestRunner.run( suite( ) );
}
public static Test suite( )
{
return new TestSuite( TestHtml2Xdoc.class ); return new TestSuite( TestHtml2Xdoc.class );
} }
public TestHtml2Xdoc(String testName) {
super(testName);
}
// Test cases // Test cases
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
public void testOne() throws Exception { public void testOne( ) throws Exception
assertConversion("missingParaBug.html", "missingParaBug.xml"); {
assertConversion("linkInHeading.html", "linkInHeading.xml"); assertConversion( "missingParaBug.html", "missingParaBug.xml" );
assertConversion("codeinpara.html", "codeinpara.xml"); assertConversion( "linkInHeading.html", "linkInHeading.xml" );
assertConversion("input1.html", "output1.xml"); assertConversion( "codeinpara.html", "codeinpara.xml" );
assertConversion("h1h2.html", "h1h2.xml"); assertConversion( "input1.html", "output1.xml" );
assertConversion("h2h3.html", "h1h2.xml"); assertConversion( "h1h2.html", "h1h2.xml" );
assertConversion("h2h4.html", "h1h2.xml"); assertConversion( "h2h3.html", "h1h2.xml" );
assertConversion("h3h4.html", "h1h2.xml"); assertConversion( "h2h4.html", "h1h2.xml" );
assertConversion("link.html", "link.xml"); assertConversion( "h3h4.html", "h1h2.xml" );
assertConversion("comment.html", "comment.xml"); assertConversion( "link.html", "link.xml" );
assertConversion( "comment.html", "comment.xml" );
} }
// Implementation methods // Implementation methods
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
protected void assertConversion(String input, String output) throws Exception { protected void assertConversion( String input, String output )
Html2XdocBean converter = createConverter(); throws Exception
Document inputDoc = parseHtml(input); {
Html2XdocBean converter = createConverter( );
Document inputDoc = parseHtml( input );
Document expectedDoc = parse( output );
Document expectedDoc = parse(output); Document actualDoc = converter.convert( inputDoc );
Document actualDoc = converter.convert(inputDoc); if ( verbose )
{
if (verbose) { System.out.println( "Comparing: " + input + " to: " + output );
System.out.println("Comparing: " + input + " to: " + output); System.out.println( "Parsed: " + inputDoc.asXML( ) );
System.out.println("Parsed: " + inputDoc.asXML()); System.out.println( "Generated: " + actualDoc.asXML( ) );
System.out.println("Generated: " + actualDoc.asXML()); System.out.println( );
System.out.println(); System.out.println( );
System.out.println();
} }
assertEqual("Output for: " + input + " does not match: " + output, expectedDoc, actualDoc); assertEqual( "Output for: " + input + " does not match: " + output,
expectedDoc, actualDoc );
} }
/** /**
@ -142,32 +149,36 @@ public class TestHtml2Xdoc extends TestCase {
* @param expectedDoc * @param expectedDoc
* @param actualDoc * @param actualDoc
*/ */
protected void assertEqual( protected void assertEqual( String message, Document expectedDoc,
String message, Document actualDoc ) throws IOException
Document expectedDoc, {
Document actualDoc) throws IOException { String expectedText = getPrettyPrintText( expectedDoc );
String actualText = getPrettyPrintText( actualDoc );
String expectedText = getPrettyPrintText(expectedDoc); if ( !expectedText.equals( actualText ) )
String actualText = getPrettyPrintText(actualDoc); {
System.out.println( "Expected: " + expectedText );
if (!expectedText.equals(actualText)) { System.out.println( "Actual: " + actualText );
System.out.println("Expected: " + expectedText);
System.out.println("Actual: " + actualText);
} }
assertEquals(message, expectedText, actualText);
assertEquals( message, expectedText, actualText );
} }
/** /**
* @param expectedDoc * @param expectedDoc
* @return Object * @return Object
*/ */
protected String getPrettyPrintText(Document doc) throws IOException { protected String getPrettyPrintText( Document doc )
OutputFormat format = OutputFormat.createPrettyPrint(); throws IOException
StringWriter buffer = new StringWriter(); {
XMLWriter writer = new XMLWriter(buffer, format); OutputFormat format = OutputFormat.createPrettyPrint( );
writer.write(doc); StringWriter buffer = new StringWriter( );
writer.close(); XMLWriter writer = new XMLWriter( buffer, format );
return buffer.toString();
writer.write( doc );
writer.close( );
return buffer.toString( );
} }
/** /**
@ -176,10 +187,13 @@ public class TestHtml2Xdoc extends TestCase {
* @param input * @param input
* @return Document * @return Document
*/ */
protected Document parse(String input) throws Exception { protected Document parse( String input )
URL url = getClassURL(input); throws Exception
SAXReader saxReader = new SAXReader(); {
return saxReader.read(url); URL url = getClassURL( input );
SAXReader saxReader = new SAXReader( );
return saxReader.read( url );
} }
/** /**
@ -189,28 +203,35 @@ public class TestHtml2Xdoc extends TestCase {
* @param input * @param input
* @return Document * @return Document
*/ */
protected Document parseHtml(String input) throws Exception { protected Document parseHtml( String input )
URL url = getClassURL(input); throws Exception
SAXParser htmlParser = new SAXParser(); {
htmlParser.setProperty( URL url = getClassURL( input );
"http://cyberneko.org/html/properties/names/elems", SAXParser htmlParser = new SAXParser( );
"lower"
); htmlParser.setProperty( "http://cyberneko.org/html/properties/names/elems",
htmlParser.setProperty( "lower" );
"http://cyberneko.org/html/properties/names/attrs", htmlParser.setProperty( "http://cyberneko.org/html/properties/names/attrs",
"lower" "lower" );
);
SAXReader saxReader = new SAXReader(htmlParser); SAXReader saxReader = new SAXReader( htmlParser );
return saxReader.read(url);
return saxReader.read( url );
} }
protected URL getClassURL(String input) throws Exception { protected URL getClassURL( String input )
URL url = getClass().getResource(input); throws Exception
assertTrue("Could not find resource on classpath for: " + input, url != null); {
URL url = getClass( ).getResource( input );
assertTrue( "Could not find resource on classpath for: " + input,
url != null );
return url; return url;
} }
protected Html2XdocBean createConverter() { protected Html2XdocBean createConverter( )
return new Html2XdocBean(); {
return new Html2XdocBean( );
} }
} }

View File

@ -0,0 +1,11 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Test classes for org.apache.maven.html2xdoc.
</p>
</body>
</html>