diff --git a/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java b/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java index 7eb65567..c00139b1 100644 --- a/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java +++ b/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java @@ -59,12 +59,15 @@ package org.apache.maven.linkcheck; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.PrintWriter; import java.util.ArrayList; -import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; +import java.util.Set; +import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -78,8 +81,7 @@ import org.w3c.tidy.Tidy; /** * @author Ben Walding - * @version $Id: FileToCheck.java,v 1.13 2003/10/26 22:49:40 dion Exp $ - * + * @version $Id: FileToCheck.java,v 1.14 2003/12/06 23:17:44 bwalding Exp $ */ public class FileToCheck { @@ -88,24 +90,29 @@ public class FileToCheck */ private static Log LOG = LogFactory.getLog(FileToCheck.class); - private File base; + private String base; private File fileToCheck; private String status = STATUS_OK; private String message = ""; private int successful; private int unsuccessful; + private List links = new ArrayList(); public static final String STATUS_UNKNOWN = null; public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source"; public static final String STATUS_OK = "OK"; - public FileToCheck(File base, File fileToCheck) + public FileToCheck(File baseFile, File fileToCheck) { - this.base = base; + this.base = baseFile.getAbsolutePath(); this.fileToCheck = fileToCheck; + } - private List links = new ArrayList(); + private void addResult(LinkCheckResult lcr) + { + this.links.add(lcr); + } public void check(LinkValidatorManager lvm) throws Exception { @@ -116,34 +123,10 @@ public class FileToCheck try { - Tidy tidy = new Tidy(); - Document doc = null; - + final Set hrefs; try { - FileInputStream in = new FileInputStream(fileToCheck); - tidy.setMakeClean(true); - tidy.setXmlTags(true); - tidy.setXmlOut(true); - tidy.setQuiet(true); - tidy.setShowWarnings(false); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - PrintWriter errOut = new PrintWriter(baos); - tidy.setErrout(errOut); - LOG.debug("Processing:" + fileToCheck); - tidy.setXHTML(true); - org.w3c.dom.Document domDocument = tidy.parseDOM(in, null); - - // now read a dom4j document from - // JTidy's W3C DOM object - - DOMReader domReader = new DOMReader(); - doc = domReader.read(domDocument); - - if (LOG.isDebugEnabled()) - { - LOG.debug(baos.toString()); - } + hrefs = getLinks(); } catch (Throwable e) { @@ -154,30 +137,11 @@ public class FileToCheck LinkCheckResult lcr = new LinkCheckResult(); lcr.setStatus("PARSE FAILURE"); lcr.setTarget("N/A"); - this.links.add(lcr); + addResult(lcr); return; } - List xpathResults = new ArrayList(); - - xpathResults.addAll(doc.selectNodes("//a/@href")); - xpathResults.addAll(doc.selectNodes("//img/@src")); - // - xpathResults.addAll(doc.selectNodes("//link/@href")); - //