Split things out into marginally better methods. Resolved at least one

resource leak.
PR: MPLINKCHECK-6


git-svn-id: https://svn.apache.org/repos/asf/maven/maven-1/plugins/trunk@114456 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
bwalding 2003-12-06 23:17:44 +00:00
parent 08d0ac83e8
commit d7c8559949
3 changed files with 159 additions and 73 deletions

View File

@ -59,12 +59,15 @@ package org.apache.maven.linkcheck;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -78,8 +81,7 @@ import org.w3c.tidy.Tidy;
/**
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @version $Id: FileToCheck.java,v 1.13 2003/10/26 22:49:40 dion Exp $
*
* @version $Id: FileToCheck.java,v 1.14 2003/12/06 23:17:44 bwalding Exp $
*/
public class FileToCheck
{
@ -88,24 +90,29 @@ public class FileToCheck
*/
private static Log LOG = LogFactory.getLog(FileToCheck.class);
private File base;
private String base;
private File fileToCheck;
private String status = STATUS_OK;
private String message = "";
private int successful;
private int unsuccessful;
private List links = new ArrayList();
public static final String STATUS_UNKNOWN = null;
public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
public static final String STATUS_OK = "OK";
public FileToCheck(File base, File fileToCheck)
public FileToCheck(File baseFile, File fileToCheck)
{
this.base = base;
this.base = baseFile.getAbsolutePath();
this.fileToCheck = fileToCheck;
}
private List links = new ArrayList();
private void addResult(LinkCheckResult lcr)
{
this.links.add(lcr);
}
public void check(LinkValidatorManager lvm) throws Exception
{
@ -116,34 +123,10 @@ public class FileToCheck
try
{
Tidy tidy = new Tidy();
Document doc = null;
final Set hrefs;
try
{
FileInputStream in = new FileInputStream(fileToCheck);
tidy.setMakeClean(true);
tidy.setXmlTags(true);
tidy.setXmlOut(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter errOut = new PrintWriter(baos);
tidy.setErrout(errOut);
LOG.debug("Processing:" + fileToCheck);
tidy.setXHTML(true);
org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
// now read a dom4j document from
// JTidy's W3C DOM object
DOMReader domReader = new DOMReader();
doc = domReader.read(domDocument);
if (LOG.isDebugEnabled())
{
LOG.debug(baos.toString());
}
hrefs = getLinks();
}
catch (Throwable e)
{
@ -154,30 +137,11 @@ public class FileToCheck
LinkCheckResult lcr = new LinkCheckResult();
lcr.setStatus("PARSE FAILURE");
lcr.setTarget("N/A");
this.links.add(lcr);
addResult(lcr);
return;
}
List xpathResults = new ArrayList();
xpathResults.addAll(doc.selectNodes("//a/@href"));
xpathResults.addAll(doc.selectNodes("//img/@src"));
//<link rel="stylesheet" href="...">
xpathResults.addAll(doc.selectNodes("//link/@href"));
//<script src="http://ar.atwola.com/file/adsWrapper.js">
xpathResults.addAll(doc.selectNodes("//script/@src"));
Map uniqueLinks = new HashMap();
Iterator linkIter = xpathResults.iterator();
while (linkIter.hasNext())
{
Node node = (Node) linkIter.next();
String href = node.getText();
uniqueLinks.put(href, href);
}
Iterator iter = uniqueLinks.keySet().iterator();
while (iter.hasNext())
for (Iterator iter = hrefs.iterator(); iter.hasNext(); )
{
String href = (String) iter.next();
@ -194,21 +158,20 @@ public class FileToCheck
case LinkValidationResult.VALID :
successful++;
lcr.setStatus("OK");
this.links.add(lcr); //At some point we won't want to store valid links. The tests require that we do at present
addResult(lcr); //At some point we won't want to store valid links. The tests require that we do at present
break;
case LinkValidationResult.UNKNOWN :
unsuccessful++;
lcr.setStatus("UNKNOWN REF");
this.links.add(lcr);
addResult(lcr);
break;
case LinkValidationResult.INVALID :
unsuccessful++;
lcr.setStatus("NOT FOUND");
this.links.add(lcr);
addResult(lcr);
break;
}
}
}
catch (Exception e)
@ -218,6 +181,95 @@ public class FileToCheck
}
}
private Set getLinks() throws FileNotFoundException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter errOut = new PrintWriter(baos);
FileInputStream in = new FileInputStream(fileToCheck);
try
{
Tidy tidy = getTidy();
tidy.setErrout(errOut);
LOG.debug("Processing:" + fileToCheck);
org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
// now read a dom4j document from
// JTidy's W3C DOM object
final DOMReader domReader = new DOMReader();
final Document doc = domReader.read(domDocument);
LOG.debug(baos.toString());
return findUniqueLinks(doc);
}
finally
{
close(in);
close(baos);
}
}
private void close(InputStream is)
{
try
{
is.close();
}
catch (Exception e)
{
//Don't really care.
}
}
private void close(OutputStream os)
{
try
{
os.close();
}
catch (Exception e)
{
//Don't really care.
}
}
private Set findUniqueLinks(Document doc)
{
List xpathResults = new ArrayList();
xpathResults.addAll(doc.selectNodes("//a/@href"));
xpathResults.addAll(doc.selectNodes("//img/@src"));
//<link rel="stylesheet" href="...">
xpathResults.addAll(doc.selectNodes("//link/@href"));
//<script src="http://ar.atwola.com/file/adsWrapper.js">
xpathResults.addAll(doc.selectNodes("//script/@src"));
Set results = new TreeSet();
Iterator linkIter = xpathResults.iterator();
while (linkIter.hasNext())
{
Node node = (Node) linkIter.next();
String href = node.getText();
results.add(href);
}
return results;
}
private Tidy getTidy()
{
Tidy tidy = new Tidy();
tidy.setMakeClean(true);
tidy.setXmlTags(true);
tidy.setXmlOut(true);
tidy.setXHTML(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
return tidy;
}
/**
* Returns the message.
* @return String
@ -245,15 +297,6 @@ public class FileToCheck
this.message = message;
}
/**
* Sets the status.
* @param status The status to set
*/
public void setStatus(String status)
{
this.status = status;
}
public List getResults()
{
return links;
@ -279,10 +322,10 @@ public class FileToCheck
public String getName()
{
String baseName = base.getAbsolutePath();
String fileName = fileToCheck.getAbsolutePath();
if (fileName.startsWith(baseName))
fileName = fileName.substring(baseName.length() + 1);
if (fileName.startsWith(base)) {
fileName = fileName.substring(base.length() + 1);
}
fileName = fileName.replace('\\', '/');
return fileName;
@ -309,4 +352,4 @@ public class FileToCheck
return buf.toString();
}
}
}

View File

@ -79,7 +79,7 @@ import org.apache.maven.project.Project;
* their links checked.
*
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @version $Id: LinkCheck.java,v 1.10 2003/09/13 21:48:08 bwalding Exp $
* @version $Id: LinkCheck.java,v 1.11 2003/12/06 23:17:44 bwalding Exp $
*/
public class LinkCheck
{
@ -205,6 +205,13 @@ public class LinkCheck
}
else
{
if (allFiles.size() % 1000 == 0) {
LOG.info("Found " + allFiles.size() + " files so far.");
final long MEG = 1024 * 1024;
Runtime r = Runtime.getRuntime();
LOG.info( " Memory: " + ((r.totalMemory() - r.freeMemory()) / MEG) + "M/" + (r.totalMemory() / MEG) + "M");
}
//LOG.info(" File - " + file);
allFiles.add(new FileToCheck(baseDir, file));
}
}

View File

@ -0,0 +1,36 @@
package org.apache.maven.linkcheck;
import java.io.File;
import org.apache.maven.jelly.MavenJellyContext;
import org.apache.maven.project.Project;
/**
* @author <a href="bwalding@apache.org">Ben Walding</a>
* @version $Id: LinkCheckCli.java,v 1.1 2003/12/06 23:17:44 bwalding Exp $
*/
public class LinkCheckCli
{
public static void main(String args[]) throws Exception
{
LinkCheckCli lcc = new LinkCheckCli();
lcc.doMain(args);
}
private void doMain(String args[]) throws Exception
{
Project p = new Project();
MavenJellyContext ctx = new MavenJellyContext();
ctx.setProxyHost(null);
p.setContext(ctx);
LinkCheck lc = new LinkCheck();
lc.setBasedir(new File("d:/data"));
lc.setOutput(new File("target/linkcheck.xml"));
lc.setCache("target/linkcheck.cache");
lc.setOutputEncoding("ISO-8859");
lc.setExclude("");
lc.setProject(p);
lc.doExecute();
}
}