Add package.html, reformat code

git-svn-id: https://svn.apache.org/repos/asf/maven/maven-1/plugins/trunk@368518 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ltheussl 2006-01-12 23:35:13 +00:00
parent 69a739d291
commit 1dc421af44
4 changed files with 363 additions and 225 deletions

View File

@ -16,7 +16,6 @@ package org.apache.maven.html2xdoc;
* limitations under the License.
* ====================================================================
*/
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@ -38,8 +37,8 @@ import org.dom4j.Node;
*
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/
public class Html2XdocBean {
public class Html2XdocBean
{
/** The Log to which logging calls will be made. */
private static final Log log = LogFactory.getLog( Html2XdocBean.class );
@ -79,22 +78,29 @@ public class Html2XdocBean {
* @param html the input html document
* @return Document
*/
public Document convert(Document html) {
public Document convert( Document html )
{
Document doc = factory.createDocument( );
Element root = doc.addElement( "document" );
Element properties = root.addElement( "properties" );
Element title = properties.addElement( "title" );
title.setText( html.valueOf( "/html/head/title" ) );
Element body = root.addElement( "body" );
Element htmlContent = (Element) html.selectSingleNode( "/html/body" );
if (htmlContent == null) {
if ( htmlContent == null )
{
log.info( "No body element found for HTML document: "
+ html.asXML( ) );
} else {
}
else
{
addSections( body, htmlContent );
}
return doc;
}
@ -106,19 +112,24 @@ public class Html2XdocBean {
* @param output the output destination
* @param body the block of HTML markup to convert
*/
protected void addSections(Element output, Element body) {
protected void addSections( Element output, Element body )
{
List content = getBodyContent( body.content( ) );
for (Iterator iter = content.iterator(); iter.hasNext();) {
for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
{
Node node = (Node) iter.next( );
if (isHeading(node)) {
if ( isHeading( node ) )
{
makeSection( output, node );
} else {
}
else
{
guaranteeHasSection( output );
processNode( node );
}
}
}
/**
@ -127,12 +138,18 @@ public class Html2XdocBean {
*
* @param node the node to process
*/
private void processNode(Node node) {
if (isCharacterData(node)) {
private void processNode( Node node )
{
if ( isCharacterData( node ) )
{
addTextNode( node );
} else if (isTextFormatting(node)) {
}
else if ( isTextFormatting( node ) )
{
addFormattingNode( node );
} else {
}
else
{
addNode( node );
}
}
@ -145,9 +162,10 @@ public class Html2XdocBean {
* @return true if the node is used to modify the formatting of the
* text; otherwise, false
*/
protected boolean isTextFormatting(Node node) {
protected boolean isTextFormatting( Node node )
{
// Ultimately this needs bold, italic, and so on
return node.getName() != null && node.getName().equals("a");
return ( node.getName( ) != null ) && node.getName( ).equals( "a" );
}
/**
@ -157,9 +175,10 @@ public class Html2XdocBean {
* @param node the node to check
* @return true if the node is a text node; otherwise, false.
*/
protected boolean isCharacterData(Node node) {
protected boolean isCharacterData( Node node )
{
return node instanceof CharacterData
&& (node instanceof Comment) == false;
&& ( ( node instanceof Comment ) == false );
}
/**
@ -169,9 +188,11 @@ public class Html2XdocBean {
* @return true if the given node is a heading element
* (h1, h2, h3 etc); otherwise, false
*/
protected boolean isHeading(Node node) {
protected boolean isHeading( Node node )
{
String name = node.getName( );
return name != null && name.startsWith("h");
return ( name != null ) && name.startsWith( "h" );
}
/**
@ -180,11 +201,16 @@ public class Html2XdocBean {
* @param node the node to check
* @return the integer level of the heading
*/
protected int determineHeadingLevel(Node node) {
try {
protected int determineHeadingLevel( Node node )
{
try
{
String name = node.getName( ).substring( 1 );
return Integer.parseInt( name );
} catch (NumberFormatException nfe) {
}
catch ( NumberFormatException nfe )
{
return 1;
}
}
@ -196,15 +222,21 @@ public class Html2XdocBean {
* @param output the output document to attach the section
* @param node the node to base making a section on
*/
protected void makeSection(Element output, Node node) {
protected void makeSection( Element output, Node node )
{
int level = determineHeadingLevel( node );
if (needsNewSection(node)) {
if ( needsNewSection( node ) )
{
currentNode = output.addElement( "section" );
currentSectionHeadingLevel = level;
currentSectionNode = currentNode;
} else {
}
else
{
currentNode = currentSectionNode.addElement( "subsection" );
}
currentNode.addAttribute( "name", getSectionText( node ) );
currentParaNode = null;
}
@ -214,18 +246,25 @@ public class Html2XdocBean {
* contains an embedded element (such as an &lt;a&gt; element)
* then return its text
*/
protected String getSectionText(Node node) {
protected String getSectionText( Node node )
{
String text = node.getText( ).trim( );
if (text.length() <= 0 && node instanceof Element) {
if ( ( text.length( ) <= 0 ) && node instanceof Element )
{
Element element = (Element) node;
// maybe we contain a hypertext link
List childElements = element.elements( );
if (! childElements.isEmpty()) {
if ( !childElements.isEmpty( ) )
{
Node child = (Node) childElements.get( 0 );
return child.getText( );
}
}
return text;
}
@ -238,17 +277,21 @@ public class Html2XdocBean {
* @return true if the current node's information means for a new
* section; otherwise, false
*/
protected boolean needsNewSection(Node node) {
protected boolean needsNewSection( Node node )
{
int level = determineHeadingLevel( node );
return level <= currentSectionHeadingLevel
|| currentSectionNode == null;
return ( level <= currentSectionHeadingLevel )
|| ( currentSectionNode == null );
}
/**
* Determines if a paragraph node is needed.
*/
private void guaranteeHasParaNode() {
if (currentParaNode == null) {
private void guaranteeHasParaNode( )
{
if ( currentParaNode == null )
{
currentParaNode = currentNode.addElement( "p" );
}
}
@ -257,8 +300,10 @@ public class Html2XdocBean {
* Makes sure the current node is section, if necessary.
* @param output the output element to add the section to
*/
private void guaranteeHasSection(Element output) {
if (currentNode == null) {
private void guaranteeHasSection( Element output )
{
if ( currentNode == null )
{
// we have a section with no name
// should we default it to be the same as the document title?
currentNode = output.addElement( "section" );
@ -269,11 +314,14 @@ public class Html2XdocBean {
* Add the node to the current node.
* @param node the node to add
*/
private void addNode(Node node) {
if (currentParaNode != null && ! shouldBreakPara(node)) {
private void addNode( Node node )
{
if ( ( currentParaNode != null ) && !shouldBreakPara( node ) )
{
currentParaNode.add( cloneNode( node ) );
}
else {
else
{
currentNode.add( cloneNode( node ) );
currentParaNode = null;
}
@ -283,8 +331,10 @@ public class Html2XdocBean {
* @return true if the paragraph should be split, such as for a br or p
* tag
*/
protected boolean shouldBreakPara(Node node) {
protected boolean shouldBreakPara( Node node )
{
String name = node.getName( );
return "p".equals( name ) || "br".equals( name );
}
@ -292,7 +342,8 @@ public class Html2XdocBean {
* Adds the text of the node to the current paragraph.
* @param node the node to add
*/
private void addTextNode(Node node) {
private void addTextNode( Node node )
{
guaranteeHasParaNode( );
currentParaNode.addText( node.getText( ) );
}
@ -301,7 +352,8 @@ public class Html2XdocBean {
* Adds the node to the current paragraph.
* @param node the node to add
*/
private void addFormattingNode(Node node) {
private void addFormattingNode( Node node )
{
guaranteeHasParaNode( );
currentParaNode.add( cloneNode( node ) );
}
@ -313,54 +365,84 @@ public class Html2XdocBean {
* @param content the content node list to obtain body content from
* @return List
*/
protected List getBodyContent(List content) {
protected List getBodyContent( List content )
{
// lets turn <pre> into <source> and concatenate consective entries
Element lastPre = null;
LinkedList list = new LinkedList( );
boolean lastWasElement = true;
for (Iterator iter = content.iterator(); iter.hasNext();) {
for ( Iterator iter = content.iterator( ); iter.hasNext( ); )
{
Node node = (Node) iter.next( );
if (isPre(node)) {
if (lastPre == null) {
if ( isPre( node ) )
{
if ( lastPre == null )
{
lastPre = factory.createElement( "source" );
list.add( lastPre );
}
lastPre.addText(node.getText());
} else {
if (isWhitespace(node) && lastWasElement) {
if (lastPre != null) {
lastPre.addText( node.getText( ) );
}
} else {
else
{
if ( isWhitespace( node ) && lastWasElement )
{
if ( lastPre != null )
{
lastPre.addText( node.getText( ) );
}
}
else
{
lastWasElement = node instanceof Element;
if (lastWasElement) {
if ( lastWasElement )
{
lastPre = null;
}
list.add( node );
}
}
}
if (list.size() == 0) return list;
if ( list.size( ) == 0 )
{
return list;
}
// now lets remove any whitespace text nodes at the beginning and end
while (true) {
while ( true )
{
Node node = (Node) list.getFirst( );
if (isWhitespace(node)) {
if ( isWhitespace( node ) )
{
list.removeFirst( );
continue;
}
break;
}
while (true) {
while ( true )
{
Node node = (Node) list.getLast( );
if (isWhitespace(node)) {
if ( isWhitespace( node ) )
{
list.removeLast( );
continue;
}
break;
}
return list;
}
@ -368,11 +450,15 @@ public class Html2XdocBean {
* @param node the node to check
* @return true if the node is a pre tag; otherwise false.
*/
protected boolean isPre(Node node) {
if (node instanceof Element) {
protected boolean isPre( Node node )
{
if ( node instanceof Element )
{
Element element = (Element) node;
return element.getName( ).equals( "pre" );
}
return false;
}
@ -380,11 +466,15 @@ public class Html2XdocBean {
* @param node the node to check
* @return true if the given node is a whitespace text node
*/
protected boolean isWhitespace(Node node) {
if (node instanceof CharacterData) {
protected boolean isWhitespace( Node node )
{
if ( node instanceof CharacterData )
{
String text = node.getText( );
return text.trim( ).length( ) <= 0;
}
// if (node instanceof Element) {
// String name = node.getName();
// if (name.equals("p")) {
@ -404,13 +494,17 @@ public class Html2XdocBean {
* @param node the node to clone
* @return Node the cloned node
*/
protected Node cloneNode(Node node) {
protected Node cloneNode( Node node )
{
Node answer = (Node) node.clone( );
if (answer instanceof Element) {
if ( answer instanceof Element )
{
Element element = (Element) answer;
element.normalize( );
}
return answer;
}
}

View File

@ -0,0 +1,12 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Contains utility classes for converting a HTML document into an
XDoc compliant XML document.
</p>
</body>
</html>

View File

@ -59,13 +59,8 @@
*
* TagXMLDoclet.java,v 1.1 2003/02/07 12:10:44 jstrachan Exp
*/
package org.apache.maven.html2xdoc;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
@ -77,30 +72,39 @@ import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
/**
* A test harness for the HTML to XDOC converter
*
* @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
*/
public class TestHtml2Xdoc extends TestCase {
public class TestHtml2Xdoc extends TestCase
{
protected boolean verbose = false;
public static void main( String[] args ) {
public TestHtml2Xdoc( String testName )
{
super( testName );
}
public static void main( String[] args )
{
TestRunner.run( suite( ) );
}
public static Test suite() {
public static Test suite( )
{
return new TestSuite( TestHtml2Xdoc.class );
}
public TestHtml2Xdoc(String testName) {
super(testName);
}
// Test cases
//-------------------------------------------------------------------------
public void testOne() throws Exception {
public void testOne( ) throws Exception
{
assertConversion( "missingParaBug.html", "missingParaBug.xml" );
assertConversion( "linkInHeading.html", "linkInHeading.xml" );
assertConversion( "codeinpara.html", "codeinpara.xml" );
@ -115,16 +119,18 @@ public class TestHtml2Xdoc extends TestCase {
// Implementation methods
//-------------------------------------------------------------------------
protected void assertConversion(String input, String output) throws Exception {
protected void assertConversion( String input, String output )
throws Exception
{
Html2XdocBean converter = createConverter( );
Document inputDoc = parseHtml( input );
Document expectedDoc = parse( output );
Document actualDoc = converter.convert( inputDoc );
if (verbose) {
if ( verbose )
{
System.out.println( "Comparing: " + input + " to: " + output );
System.out.println( "Parsed: " + inputDoc.asXML( ) );
System.out.println( "Generated: " + actualDoc.asXML( ) );
@ -132,7 +138,8 @@ public class TestHtml2Xdoc extends TestCase {
System.out.println( );
}
assertEqual("Output for: " + input + " does not match: " + output, expectedDoc, actualDoc);
assertEqual( "Output for: " + input + " does not match: " + output,
expectedDoc, actualDoc );
}
/**
@ -142,18 +149,18 @@ public class TestHtml2Xdoc extends TestCase {
* @param expectedDoc
* @param actualDoc
*/
protected void assertEqual(
String message,
Document expectedDoc,
Document actualDoc) throws IOException {
protected void assertEqual( String message, Document expectedDoc,
Document actualDoc ) throws IOException
{
String expectedText = getPrettyPrintText( expectedDoc );
String actualText = getPrettyPrintText( actualDoc );
if (!expectedText.equals(actualText)) {
if ( !expectedText.equals( actualText ) )
{
System.out.println( "Expected: " + expectedText );
System.out.println( "Actual: " + actualText );
}
assertEquals( message, expectedText, actualText );
}
@ -161,12 +168,16 @@ public class TestHtml2Xdoc extends TestCase {
* @param expectedDoc
* @return Object
*/
protected String getPrettyPrintText(Document doc) throws IOException {
protected String getPrettyPrintText( Document doc )
throws IOException
{
OutputFormat format = OutputFormat.createPrettyPrint( );
StringWriter buffer = new StringWriter( );
XMLWriter writer = new XMLWriter( buffer, format );
writer.write( doc );
writer.close( );
return buffer.toString( );
}
@ -176,9 +187,12 @@ public class TestHtml2Xdoc extends TestCase {
* @param input
* @return Document
*/
protected Document parse(String input) throws Exception {
protected Document parse( String input )
throws Exception
{
URL url = getClassURL( input );
SAXReader saxReader = new SAXReader( );
return saxReader.read( url );
}
@ -189,28 +203,35 @@ public class TestHtml2Xdoc extends TestCase {
* @param input
* @return Document
*/
protected Document parseHtml(String input) throws Exception {
protected Document parseHtml( String input )
throws Exception
{
URL url = getClassURL( input );
SAXParser htmlParser = new SAXParser( );
htmlParser.setProperty(
"http://cyberneko.org/html/properties/names/elems",
"lower"
);
htmlParser.setProperty(
"http://cyberneko.org/html/properties/names/attrs",
"lower"
);
htmlParser.setProperty( "http://cyberneko.org/html/properties/names/elems",
"lower" );
htmlParser.setProperty( "http://cyberneko.org/html/properties/names/attrs",
"lower" );
SAXReader saxReader = new SAXReader( htmlParser );
return saxReader.read( url );
}
protected URL getClassURL(String input) throws Exception {
protected URL getClassURL( String input )
throws Exception
{
URL url = getClass( ).getResource( input );
assertTrue("Could not find resource on classpath for: " + input, url != null);
assertTrue( "Could not find resource on classpath for: " + input,
url != null );
return url;
}
protected Html2XdocBean createConverter() {
protected Html2XdocBean createConverter( )
{
return new Html2XdocBean( );
}
}

View File

@ -0,0 +1,11 @@
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<title>org.apache.maven.html2xdoc</title>
</head>
<body>
<p>
Test classes for org.apache.maven.html2xdoc.
</p>
</body>
</html>