Add package.html, reformat code

git-svn-id: https://svn.apache.org/repos/asf/maven/maven-1/plugins/trunk@368518 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ltheussl 2006-01-12 23:35:13 +00:00
parent 69a739d291
commit 1dc421af44
4 changed files with 363 additions and 225 deletions

View File

@ -16,7 +16,6 @@ package org.apache.maven.html2xdoc;
* limitations under the License. * limitations under the License.
* ==================================================================== * ====================================================================
*/ */
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -38,8 +37,8 @@ import org.dom4j.Node;
* *
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a> * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/ */
public class Html2XdocBean { public class Html2XdocBean
{
/** The Log to which logging calls will be made. */ /** The Log to which logging calls will be made. */
private static final Log log = LogFactory.getLog( Html2XdocBean.class ); private static final Log log = LogFactory.getLog( Html2XdocBean.class );
@ -79,22 +78,29 @@ public class Html2XdocBean {
* @param html the input html document * @param html the input html document
* @return Document * @return Document
*/ */
public Document convert(Document html) { public Document convert( Document html )
{
Document doc = factory.createDocument( ); Document doc = factory.createDocument( );
Element root = doc.addElement( "document" ); Element root = doc.addElement( "document" );
Element properties = root.addElement( "properties" ); Element properties = root.addElement( "properties" );
Element title = properties.addElement( "title" ); Element title = properties.addElement( "title" );
title.setText( html.valueOf( "/html/head/title" ) ); title.setText( html.valueOf( "/html/head/title" ) );
Element body = root.addElement( "body" ); Element body = root.addElement( "body" );
Element htmlContent = (Element) html.selectSingleNode( "/html/body" ); Element htmlContent = (Element) html.selectSingleNode( "/html/body" );
if (htmlContent == null) {
if ( htmlContent == null )
{
log.info( "No body element found for HTML document: " log.info( "No body element found for HTML document: "
+ html.asXML( ) ); + html.asXML( ) );
} else { }
else
{
addSections( body, htmlContent ); addSections( body, htmlContent );
} }
return doc; return doc;
} }
@ -106,19 +112,24 @@ public class Html2XdocBean {
* @param output the output destination * @param output the output destination
* @param body the block of HTML markup to convert * @param body the block of HTML markup to convert
*/ */
protected void addSections(Element output, Element body) { protected void addSections( Element output, Element body )
{
List content = getBodyContent( body.content( ) ); List content = getBodyContent( body.content( ) );
for (Iterator iter = content.iterator(); iter.hasNext();) { for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
{
Node node = (Node) iter.next( ); Node node = (Node) iter.next( );
if (isHeading(node)) {
if ( isHeading( node ) )
{
makeSection( output, node ); makeSection( output, node );
} else { }
else
{
guaranteeHasSection( output ); guaranteeHasSection( output );
processNode( node ); processNode( node );
} }
} }
} }
/** /**
@ -127,12 +138,18 @@ public class Html2XdocBean {
* *
* @param node the node to process * @param node the node to process
*/ */
private void processNode(Node node) { private void processNode( Node node )
if (isCharacterData(node)) { {
if ( isCharacterData( node ) )
{
addTextNode( node ); addTextNode( node );
} else if (isTextFormatting(node)) { }
else if ( isTextFormatting( node ) )
{
addFormattingNode( node ); addFormattingNode( node );
} else { }
else
{
addNode( node ); addNode( node );
} }
} }
@ -145,9 +162,10 @@ public class Html2XdocBean {
* @return true if the node is used to modify the formatting of the * @return true if the node is used to modify the formatting of the
* text; otherwise, false * text; otherwise, false
*/ */
protected boolean isTextFormatting(Node node) { protected boolean isTextFormatting( Node node )
{
// Ultimately this needs bold, italic, and so on // Ultimately this needs bold, italic, and so on
return node.getName() != null && node.getName().equals("a"); return ( node.getName( ) != null ) && node.getName( ).equals( "a" );
} }
/** /**
@ -157,9 +175,10 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return true if the node is a text node; otherwise, false. * @return true if the node is a text node; otherwise, false.
*/ */
protected boolean isCharacterData(Node node) { protected boolean isCharacterData( Node node )
{
return node instanceof CharacterData return node instanceof CharacterData
&& (node instanceof Comment) == false; && ( ( node instanceof Comment ) == false );
} }
/** /**
@ -169,9 +188,11 @@ public class Html2XdocBean {
* @return true if the given node is a heading element * @return true if the given node is a heading element
* (h1, h2, h3 etc); otherwise, false * (h1, h2, h3 etc); otherwise, false
*/ */
protected boolean isHeading(Node node) { protected boolean isHeading( Node node )
{
String name = node.getName( ); String name = node.getName( );
return name != null && name.startsWith("h");
return ( name != null ) && name.startsWith( "h" );
} }
/** /**
@ -180,11 +201,16 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return the integer level of the heading * @return the integer level of the heading
*/ */
protected int determineHeadingLevel(Node node) { protected int determineHeadingLevel( Node node )
try { {
try
{
String name = node.getName( ).substring( 1 ); String name = node.getName( ).substring( 1 );
return Integer.parseInt( name ); return Integer.parseInt( name );
} catch (NumberFormatException nfe) { }
catch ( NumberFormatException nfe )
{
return 1; return 1;
} }
} }
@ -196,15 +222,21 @@ public class Html2XdocBean {
* @param output the output document to attach the section * @param output the output document to attach the section
* @param node the node to base making a section on * @param node the node to base making a section on
*/ */
protected void makeSection(Element output, Node node) { protected void makeSection( Element output, Node node )
{
int level = determineHeadingLevel( node ); int level = determineHeadingLevel( node );
if (needsNewSection(node)) {
if ( needsNewSection( node ) )
{
currentNode = output.addElement( "section" ); currentNode = output.addElement( "section" );
currentSectionHeadingLevel = level; currentSectionHeadingLevel = level;
currentSectionNode = currentNode; currentSectionNode = currentNode;
} else { }
else
{
currentNode = currentSectionNode.addElement( "subsection" ); currentNode = currentSectionNode.addElement( "subsection" );
} }
currentNode.addAttribute( "name", getSectionText( node ) ); currentNode.addAttribute( "name", getSectionText( node ) );
currentParaNode = null; currentParaNode = null;
} }
@ -214,18 +246,25 @@ public class Html2XdocBean {
* contains an embedded element (such as an &lt;a&gt; element) * contains an embedded element (such as an &lt;a&gt; element)
* then return its text * then return its text
*/ */
protected String getSectionText(Node node) { protected String getSectionText( Node node )
{
String text = node.getText( ).trim( ); String text = node.getText( ).trim( );
if (text.length() <= 0 && node instanceof Element) {
if ( ( text.length( ) <= 0 ) && node instanceof Element )
{
Element element = (Element) node; Element element = (Element) node;
// maybe we contain a hypertext link // maybe we contain a hypertext link
List childElements = element.elements( ); List childElements = element.elements( );
if (! childElements.isEmpty()) {
if ( !childElements.isEmpty( ) )
{
Node child = (Node) childElements.get( 0 ); Node child = (Node) childElements.get( 0 );
return child.getText( ); return child.getText( );
} }
} }
return text; return text;
} }
@ -238,17 +277,21 @@ public class Html2XdocBean {
* @return true if the current node's information means for a new * @return true if the current node's information means for a new
* section; otherwise, false * section; otherwise, false
*/ */
protected boolean needsNewSection(Node node) { protected boolean needsNewSection( Node node )
{
int level = determineHeadingLevel( node ); int level = determineHeadingLevel( node );
return level <= currentSectionHeadingLevel
|| currentSectionNode == null; return ( level <= currentSectionHeadingLevel )
|| ( currentSectionNode == null );
} }
/** /**
* Determines if a paragraph node is needed. * Determines if a paragraph node is needed.
*/ */
private void guaranteeHasParaNode() { private void guaranteeHasParaNode( )
if (currentParaNode == null) { {
if ( currentParaNode == null )
{
currentParaNode = currentNode.addElement( "p" ); currentParaNode = currentNode.addElement( "p" );
} }
} }
@ -257,8 +300,10 @@ public class Html2XdocBean {
* Makes sure the current node is section, if necessary. * Makes sure the current node is section, if necessary.
* @param output the output element to add the section to * @param output the output element to add the section to
*/ */
private void guaranteeHasSection(Element output) { private void guaranteeHasSection( Element output )
if (currentNode == null) { {
if ( currentNode == null )
{
// we have a section with no name // we have a section with no name
// should we default it to be the same as the document title? // should we default it to be the same as the document title?
currentNode = output.addElement( "section" ); currentNode = output.addElement( "section" );
@ -269,11 +314,14 @@ public class Html2XdocBean {
* Add the node to the current node. * Add the node to the current node.
* @param node the node to add * @param node the node to add
*/ */
private void addNode(Node node) { private void addNode( Node node )
if (currentParaNode != null && ! shouldBreakPara(node)) { {
if ( ( currentParaNode != null ) && !shouldBreakPara( node ) )
{
currentParaNode.add( cloneNode( node ) ); currentParaNode.add( cloneNode( node ) );
} }
else { else
{
currentNode.add( cloneNode( node ) ); currentNode.add( cloneNode( node ) );
currentParaNode = null; currentParaNode = null;
} }
@ -283,8 +331,10 @@ public class Html2XdocBean {
* @return true if the paragraph should be split, such as for a br or p * @return true if the paragraph should be split, such as for a br or p
* tag * tag
*/ */
protected boolean shouldBreakPara(Node node) { protected boolean shouldBreakPara( Node node )
{
String name = node.getName( ); String name = node.getName( );
return "p".equals( name ) || "br".equals( name ); return "p".equals( name ) || "br".equals( name );
} }
@ -292,7 +342,8 @@ public class Html2XdocBean {
* Adds the text of the node to the current paragraph. * Adds the text of the node to the current paragraph.
* @param node the node to add * @param node the node to add
*/ */
private void addTextNode(Node node) { private void addTextNode( Node node )
{
guaranteeHasParaNode( ); guaranteeHasParaNode( );
currentParaNode.addText( node.getText( ) ); currentParaNode.addText( node.getText( ) );
} }
@ -301,7 +352,8 @@ public class Html2XdocBean {
* Adds the node to the current paragraph. * Adds the node to the current paragraph.
* @param node the node to add * @param node the node to add
*/ */
private void addFormattingNode(Node node) { private void addFormattingNode( Node node )
{
guaranteeHasParaNode( ); guaranteeHasParaNode( );
currentParaNode.add( cloneNode( node ) ); currentParaNode.add( cloneNode( node ) );
} }
@ -313,54 +365,84 @@ public class Html2XdocBean {
* @param content the content node list to obtain body content from * @param content the content node list to obtain body content from
* @return List * @return List
*/ */
protected List getBodyContent(List content) { protected List getBodyContent( List content )
{
// lets turn <pre> into <source> and concatenate consective entries // lets turn <pre> into <source> and concatenate consective entries
Element lastPre = null; Element lastPre = null;
LinkedList list = new LinkedList( ); LinkedList list = new LinkedList( );
boolean lastWasElement = true; boolean lastWasElement = true;
for (Iterator iter = content.iterator(); iter.hasNext();) {
for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
{
Node node = (Node) iter.next( ); Node node = (Node) iter.next( );
if (isPre(node)) { if ( isPre( node ) )
if (lastPre == null) { {
if ( lastPre == null )
{
lastPre = factory.createElement( "source" ); lastPre = factory.createElement( "source" );
list.add( lastPre ); list.add( lastPre );
} }
lastPre.addText(node.getText());
} else {
if (isWhitespace(node) && lastWasElement) {
if (lastPre != null) {
lastPre.addText( node.getText( ) ); lastPre.addText( node.getText( ) );
} }
} else { else
{
if ( isWhitespace( node ) && lastWasElement )
{
if ( lastPre != null )
{
lastPre.addText( node.getText( ) );
}
}
else
{
lastWasElement = node instanceof Element; lastWasElement = node instanceof Element;
if (lastWasElement) {
if ( lastWasElement )
{
lastPre = null; lastPre = null;
} }
list.add( node ); list.add( node );
} }
} }
} }
if (list.size() == 0) return list; if ( list.size( ) == 0 )
{
return list;
}
// now lets remove any whitespace text nodes at the beginning and end // now lets remove any whitespace text nodes at the beginning and end
while (true) { while ( true )
{
Node node = (Node) list.getFirst( ); Node node = (Node) list.getFirst( );
if (isWhitespace(node)) {
if ( isWhitespace( node ) )
{
list.removeFirst( ); list.removeFirst( );
continue; continue;
} }
break; break;
} }
while (true) {
while ( true )
{
Node node = (Node) list.getLast( ); Node node = (Node) list.getLast( );
if (isWhitespace(node)) {
if ( isWhitespace( node ) )
{
list.removeLast( ); list.removeLast( );
continue; continue;
} }
break; break;
} }
return list; return list;
} }
@ -368,11 +450,15 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return true if the node is a pre tag; otherwise false. * @return true if the node is a pre tag; otherwise false.
*/ */
protected boolean isPre(Node node) { protected boolean isPre( Node node )
if (node instanceof Element) { {
if ( node instanceof Element )
{
Element element = (Element) node; Element element = (Element) node;
return element.getName( ).equals( "pre" ); return element.getName( ).equals( "pre" );
} }
return false; return false;
} }
@ -380,11 +466,15 @@ public class Html2XdocBean {
* @param node the node to check * @param node the node to check
* @return true if the given node is a whitespace text node * @return true if the given node is a whitespace text node
*/ */
protected boolean isWhitespace(Node node) { protected boolean isWhitespace( Node node )
if (node instanceof CharacterData) { {
if ( node instanceof CharacterData )
{
String text = node.getText( ); String text = node.getText( );
return text.trim( ).length( ) <= 0; return text.trim( ).length( ) <= 0;
} }
// if (node instanceof Element) { // if (node instanceof Element) {
// String name = node.getName(); // String name = node.getName();
// if (name.equals("p")) { // if (name.equals("p")) {
@ -404,13 +494,17 @@ public class Html2XdocBean {
* @param node the node to clone * @param node the node to clone
* @return Node the cloned node * @return Node the cloned node
*/ */
protected Node cloneNode(Node node) { protected Node cloneNode( Node node )
{
Node answer = (Node) node.clone( ); Node answer = (Node) node.clone( );
if (answer instanceof Element) {
if ( answer instanceof Element )
{
Element element = (Element) answer; Element element = (Element) answer;
element.normalize( ); element.normalize( );
} }
return answer; return answer;
} }
} }

View File

@ -0,0 +1,12 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Contains utility classes for converting a HTML document into an
XDoc compliant XML document.
</p>
</body>
</html>

View File

@ -59,13 +59,8 @@
* *
* TagXMLDoclet.java,v 1.1 2003/02/07 12:10:44 jstrachan Exp * TagXMLDoclet.java,v 1.1 2003/02/07 12:10:44 jstrachan Exp
*/ */
package org.apache.maven.html2xdoc; package org.apache.maven.html2xdoc;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
import junit.framework.Test; import junit.framework.Test;
import junit.framework.TestCase; import junit.framework.TestCase;
import junit.framework.TestSuite; import junit.framework.TestSuite;
@ -77,30 +72,39 @@ import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter; import org.dom4j.io.XMLWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
/** /**
* A test harness for the HTML to XDOC converter * A test harness for the HTML to XDOC converter
* *
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a> * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/ */
public class TestHtml2Xdoc extends TestCase { public class TestHtml2Xdoc extends TestCase
{
protected boolean verbose = false; protected boolean verbose = false;
public static void main( String[] args ) { public TestHtml2Xdoc( String testName )
{
super( testName );
}
public static void main( String[] args )
{
TestRunner.run( suite( ) ); TestRunner.run( suite( ) );
} }
public static Test suite() { public static Test suite( )
{
return new TestSuite( TestHtml2Xdoc.class ); return new TestSuite( TestHtml2Xdoc.class );
} }
public TestHtml2Xdoc(String testName) {
super(testName);
}
// Test cases // Test cases
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
public void testOne() throws Exception { public void testOne( ) throws Exception
{
assertConversion( "missingParaBug.html", "missingParaBug.xml" ); assertConversion( "missingParaBug.html", "missingParaBug.xml" );
assertConversion( "linkInHeading.html", "linkInHeading.xml" ); assertConversion( "linkInHeading.html", "linkInHeading.xml" );
assertConversion( "codeinpara.html", "codeinpara.xml" ); assertConversion( "codeinpara.html", "codeinpara.xml" );
@ -115,16 +119,18 @@ public class TestHtml2Xdoc extends TestCase {
// Implementation methods // Implementation methods
//------------------------------------------------------------------------- //-------------------------------------------------------------------------
protected void assertConversion(String input, String output) throws Exception { protected void assertConversion( String input, String output )
throws Exception
{
Html2XdocBean converter = createConverter( ); Html2XdocBean converter = createConverter( );
Document inputDoc = parseHtml( input ); Document inputDoc = parseHtml( input );
Document expectedDoc = parse( output ); Document expectedDoc = parse( output );
Document actualDoc = converter.convert( inputDoc ); Document actualDoc = converter.convert( inputDoc );
if (verbose) { if ( verbose )
{
System.out.println( "Comparing: " + input + " to: " + output ); System.out.println( "Comparing: " + input + " to: " + output );
System.out.println( "Parsed: " + inputDoc.asXML( ) ); System.out.println( "Parsed: " + inputDoc.asXML( ) );
System.out.println( "Generated: " + actualDoc.asXML( ) ); System.out.println( "Generated: " + actualDoc.asXML( ) );
@ -132,7 +138,8 @@ public class TestHtml2Xdoc extends TestCase {
System.out.println( ); System.out.println( );
} }
assertEqual("Output for: " + input + " does not match: " + output, expectedDoc, actualDoc); assertEqual( "Output for: " + input + " does not match: " + output,
expectedDoc, actualDoc );
} }
/** /**
@ -142,18 +149,18 @@ public class TestHtml2Xdoc extends TestCase {
* @param expectedDoc * @param expectedDoc
* @param actualDoc * @param actualDoc
*/ */
protected void assertEqual( protected void assertEqual( String message, Document expectedDoc,
String message, Document actualDoc ) throws IOException
Document expectedDoc, {
Document actualDoc) throws IOException {
String expectedText = getPrettyPrintText( expectedDoc ); String expectedText = getPrettyPrintText( expectedDoc );
String actualText = getPrettyPrintText( actualDoc ); String actualText = getPrettyPrintText( actualDoc );
if (!expectedText.equals(actualText)) { if ( !expectedText.equals( actualText ) )
{
System.out.println( "Expected: " + expectedText ); System.out.println( "Expected: " + expectedText );
System.out.println( "Actual: " + actualText ); System.out.println( "Actual: " + actualText );
} }
assertEquals( message, expectedText, actualText ); assertEquals( message, expectedText, actualText );
} }
@ -161,12 +168,16 @@ public class TestHtml2Xdoc extends TestCase {
* @param expectedDoc * @param expectedDoc
* @return Object * @return Object
*/ */
protected String getPrettyPrintText(Document doc) throws IOException { protected String getPrettyPrintText( Document doc )
throws IOException
{
OutputFormat format = OutputFormat.createPrettyPrint( ); OutputFormat format = OutputFormat.createPrettyPrint( );
StringWriter buffer = new StringWriter( ); StringWriter buffer = new StringWriter( );
XMLWriter writer = new XMLWriter( buffer, format ); XMLWriter writer = new XMLWriter( buffer, format );
writer.write( doc ); writer.write( doc );
writer.close( ); writer.close( );
return buffer.toString( ); return buffer.toString( );
} }
@ -176,9 +187,12 @@ public class TestHtml2Xdoc extends TestCase {
* @param input * @param input
* @return Document * @return Document
*/ */
protected Document parse(String input) throws Exception { protected Document parse( String input )
throws Exception
{
URL url = getClassURL( input ); URL url = getClassURL( input );
SAXReader saxReader = new SAXReader( ); SAXReader saxReader = new SAXReader( );
return saxReader.read( url ); return saxReader.read( url );
} }
@ -189,28 +203,35 @@ public class TestHtml2Xdoc extends TestCase {
* @param input * @param input
* @return Document * @return Document
*/ */
protected Document parseHtml(String input) throws Exception { protected Document parseHtml( String input )
throws Exception
{
URL url = getClassURL( input ); URL url = getClassURL( input );
SAXParser htmlParser = new SAXParser( ); SAXParser htmlParser = new SAXParser( );
htmlParser.setProperty(
"http://cyberneko.org/html/properties/names/elems", htmlParser.setProperty( "http://cyberneko.org/html/properties/names/elems",
"lower" "lower" );
); htmlParser.setProperty( "http://cyberneko.org/html/properties/names/attrs",
htmlParser.setProperty( "lower" );
"http://cyberneko.org/html/properties/names/attrs",
"lower"
);
SAXReader saxReader = new SAXReader( htmlParser ); SAXReader saxReader = new SAXReader( htmlParser );
return saxReader.read( url ); return saxReader.read( url );
} }
protected URL getClassURL(String input) throws Exception { protected URL getClassURL( String input )
throws Exception
{
URL url = getClass( ).getResource( input ); URL url = getClass( ).getResource( input );
assertTrue("Could not find resource on classpath for: " + input, url != null);
assertTrue( "Could not find resource on classpath for: " + input,
url != null );
return url; return url;
} }
protected Html2XdocBean createConverter() { protected Html2XdocBean createConverter( )
{
return new Html2XdocBean( ); return new Html2XdocBean( );
} }
} }

View File

@ -0,0 +1,11 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Test classes for org.apache.maven.html2xdoc.
</p>
</body>
</html>