Split things out into marginally better methods. Resolved at least one
resource leak. PR: MPLINKCHECK-6 git-svn-id: https://svn.apache.org/repos/asf/maven/maven-1/plugins/trunk@114456 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08d0ac83e8
commit
d7c8559949
@ -59,12 +59,15 @@ package org.apache.maven.linkcheck;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
@ -78,8 +81,7 @@ import org.w3c.tidy.Tidy;
|
||||
|
||||
/**
|
||||
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
|
||||
* @version $Id: FileToCheck.java,v 1.13 2003/10/26 22:49:40 dion Exp $
|
||||
*
|
||||
* @version $Id: FileToCheck.java,v 1.14 2003/12/06 23:17:44 bwalding Exp $
|
||||
*/
|
||||
public class FileToCheck
|
||||
{
|
||||
@ -88,24 +90,29 @@ public class FileToCheck
|
||||
*/
|
||||
private static Log LOG = LogFactory.getLog(FileToCheck.class);
|
||||
|
||||
private File base;
|
||||
private String base;
|
||||
private File fileToCheck;
|
||||
private String status = STATUS_OK;
|
||||
private String message = "";
|
||||
private int successful;
|
||||
private int unsuccessful;
|
||||
private List links = new ArrayList();
|
||||
|
||||
public static final String STATUS_UNKNOWN = null;
|
||||
public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
|
||||
public static final String STATUS_OK = "OK";
|
||||
|
||||
public FileToCheck(File base, File fileToCheck)
|
||||
public FileToCheck(File baseFile, File fileToCheck)
|
||||
{
|
||||
this.base = base;
|
||||
this.base = baseFile.getAbsolutePath();
|
||||
this.fileToCheck = fileToCheck;
|
||||
|
||||
}
|
||||
|
||||
private List links = new ArrayList();
|
||||
private void addResult(LinkCheckResult lcr)
|
||||
{
|
||||
this.links.add(lcr);
|
||||
}
|
||||
|
||||
public void check(LinkValidatorManager lvm) throws Exception
|
||||
{
|
||||
@ -116,34 +123,10 @@ public class FileToCheck
|
||||
|
||||
try
|
||||
{
|
||||
Tidy tidy = new Tidy();
|
||||
Document doc = null;
|
||||
|
||||
final Set hrefs;
|
||||
try
|
||||
{
|
||||
FileInputStream in = new FileInputStream(fileToCheck);
|
||||
tidy.setMakeClean(true);
|
||||
tidy.setXmlTags(true);
|
||||
tidy.setXmlOut(true);
|
||||
tidy.setQuiet(true);
|
||||
tidy.setShowWarnings(false);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
PrintWriter errOut = new PrintWriter(baos);
|
||||
tidy.setErrout(errOut);
|
||||
LOG.debug("Processing:" + fileToCheck);
|
||||
tidy.setXHTML(true);
|
||||
org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
|
||||
|
||||
// now read a dom4j document from
|
||||
// JTidy's W3C DOM object
|
||||
|
||||
DOMReader domReader = new DOMReader();
|
||||
doc = domReader.read(domDocument);
|
||||
|
||||
if (LOG.isDebugEnabled())
|
||||
{
|
||||
LOG.debug(baos.toString());
|
||||
}
|
||||
hrefs = getLinks();
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
@ -154,30 +137,11 @@ public class FileToCheck
|
||||
LinkCheckResult lcr = new LinkCheckResult();
|
||||
lcr.setStatus("PARSE FAILURE");
|
||||
lcr.setTarget("N/A");
|
||||
this.links.add(lcr);
|
||||
addResult(lcr);
|
||||
return;
|
||||
}
|
||||
|
||||
List xpathResults = new ArrayList();
|
||||
|
||||
xpathResults.addAll(doc.selectNodes("//a/@href"));
|
||||
xpathResults.addAll(doc.selectNodes("//img/@src"));
|
||||
//<link rel="stylesheet" href="...">
|
||||
xpathResults.addAll(doc.selectNodes("//link/@href"));
|
||||
//<script src="http://ar.atwola.com/file/adsWrapper.js">
|
||||
xpathResults.addAll(doc.selectNodes("//script/@src"));
|
||||
|
||||
Map uniqueLinks = new HashMap();
|
||||
Iterator linkIter = xpathResults.iterator();
|
||||
while (linkIter.hasNext())
|
||||
{
|
||||
Node node = (Node) linkIter.next();
|
||||
String href = node.getText();
|
||||
uniqueLinks.put(href, href);
|
||||
}
|
||||
|
||||
Iterator iter = uniqueLinks.keySet().iterator();
|
||||
while (iter.hasNext())
|
||||
for (Iterator iter = hrefs.iterator(); iter.hasNext(); )
|
||||
{
|
||||
String href = (String) iter.next();
|
||||
|
||||
@ -194,21 +158,20 @@ public class FileToCheck
|
||||
case LinkValidationResult.VALID :
|
||||
successful++;
|
||||
lcr.setStatus("OK");
|
||||
this.links.add(lcr); //At some point we won't want to store valid links. The tests require that we do at present
|
||||
addResult(lcr); //At some point we won't want to store valid links. The tests require that we do at present
|
||||
break;
|
||||
case LinkValidationResult.UNKNOWN :
|
||||
unsuccessful++;
|
||||
lcr.setStatus("UNKNOWN REF");
|
||||
this.links.add(lcr);
|
||||
addResult(lcr);
|
||||
break;
|
||||
case LinkValidationResult.INVALID :
|
||||
unsuccessful++;
|
||||
lcr.setStatus("NOT FOUND");
|
||||
this.links.add(lcr);
|
||||
addResult(lcr);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
@ -218,6 +181,95 @@ public class FileToCheck
|
||||
}
|
||||
}
|
||||
|
||||
private Set getLinks() throws FileNotFoundException
|
||||
{
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
PrintWriter errOut = new PrintWriter(baos);
|
||||
FileInputStream in = new FileInputStream(fileToCheck);
|
||||
try
|
||||
{
|
||||
Tidy tidy = getTidy();
|
||||
tidy.setErrout(errOut);
|
||||
LOG.debug("Processing:" + fileToCheck);
|
||||
org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
|
||||
|
||||
// now read a dom4j document from
|
||||
// JTidy's W3C DOM object
|
||||
final DOMReader domReader = new DOMReader();
|
||||
final Document doc = domReader.read(domDocument);
|
||||
|
||||
LOG.debug(baos.toString());
|
||||
|
||||
return findUniqueLinks(doc);
|
||||
}
|
||||
finally
|
||||
{
|
||||
close(in);
|
||||
close(baos);
|
||||
}
|
||||
}
|
||||
|
||||
private void close(InputStream is)
|
||||
{
|
||||
try
|
||||
{
|
||||
is.close();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
//Don't really care.
|
||||
}
|
||||
}
|
||||
|
||||
private void close(OutputStream os)
|
||||
{
|
||||
try
|
||||
{
|
||||
os.close();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
//Don't really care.
|
||||
}
|
||||
}
|
||||
|
||||
private Set findUniqueLinks(Document doc)
|
||||
{
|
||||
List xpathResults = new ArrayList();
|
||||
|
||||
xpathResults.addAll(doc.selectNodes("//a/@href"));
|
||||
xpathResults.addAll(doc.selectNodes("//img/@src"));
|
||||
|
||||
//<link rel="stylesheet" href="...">
|
||||
xpathResults.addAll(doc.selectNodes("//link/@href"));
|
||||
|
||||
//<script src="http://ar.atwola.com/file/adsWrapper.js">
|
||||
xpathResults.addAll(doc.selectNodes("//script/@src"));
|
||||
|
||||
Set results = new TreeSet();
|
||||
Iterator linkIter = xpathResults.iterator();
|
||||
while (linkIter.hasNext())
|
||||
{
|
||||
Node node = (Node) linkIter.next();
|
||||
String href = node.getText();
|
||||
results.add(href);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private Tidy getTidy()
|
||||
{
|
||||
Tidy tidy = new Tidy();
|
||||
tidy.setMakeClean(true);
|
||||
tidy.setXmlTags(true);
|
||||
tidy.setXmlOut(true);
|
||||
tidy.setXHTML(true);
|
||||
tidy.setQuiet(true);
|
||||
tidy.setShowWarnings(false);
|
||||
return tidy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the message.
|
||||
* @return String
|
||||
@ -245,15 +297,6 @@ public class FileToCheck
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the status.
|
||||
* @param status The status to set
|
||||
*/
|
||||
public void setStatus(String status)
|
||||
{
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public List getResults()
|
||||
{
|
||||
return links;
|
||||
@ -279,10 +322,10 @@ public class FileToCheck
|
||||
|
||||
public String getName()
|
||||
{
|
||||
String baseName = base.getAbsolutePath();
|
||||
String fileName = fileToCheck.getAbsolutePath();
|
||||
if (fileName.startsWith(baseName))
|
||||
fileName = fileName.substring(baseName.length() + 1);
|
||||
if (fileName.startsWith(base)) {
|
||||
fileName = fileName.substring(base.length() + 1);
|
||||
}
|
||||
|
||||
fileName = fileName.replace('\\', '/');
|
||||
return fileName;
|
||||
@ -309,4 +352,4 @@ public class FileToCheck
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -79,7 +79,7 @@ import org.apache.maven.project.Project;
|
||||
* their links checked.
|
||||
*
|
||||
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
|
||||
* @version $Id: LinkCheck.java,v 1.10 2003/09/13 21:48:08 bwalding Exp $
|
||||
* @version $Id: LinkCheck.java,v 1.11 2003/12/06 23:17:44 bwalding Exp $
|
||||
*/
|
||||
public class LinkCheck
|
||||
{
|
||||
@ -205,6 +205,13 @@ public class LinkCheck
|
||||
}
|
||||
else
|
||||
{
|
||||
if (allFiles.size() % 1000 == 0) {
|
||||
LOG.info("Found " + allFiles.size() + " files so far.");
|
||||
final long MEG = 1024 * 1024;
|
||||
Runtime r = Runtime.getRuntime();
|
||||
LOG.info( " Memory: " + ((r.totalMemory() - r.freeMemory()) / MEG) + "M/" + (r.totalMemory() / MEG) + "M");
|
||||
}
|
||||
//LOG.info(" File - " + file);
|
||||
allFiles.add(new FileToCheck(baseDir, file));
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
package org.apache.maven.linkcheck;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.maven.jelly.MavenJellyContext;
|
||||
import org.apache.maven.project.Project;
|
||||
|
||||
/**
|
||||
* @author <a href="bwalding@apache.org">Ben Walding</a>
|
||||
* @version $Id: LinkCheckCli.java,v 1.1 2003/12/06 23:17:44 bwalding Exp $
|
||||
*/
|
||||
public class LinkCheckCli
|
||||
{
|
||||
public static void main(String args[]) throws Exception
|
||||
{
|
||||
LinkCheckCli lcc = new LinkCheckCli();
|
||||
lcc.doMain(args);
|
||||
}
|
||||
|
||||
private void doMain(String args[]) throws Exception
|
||||
{
|
||||
Project p = new Project();
|
||||
MavenJellyContext ctx = new MavenJellyContext();
|
||||
ctx.setProxyHost(null);
|
||||
p.setContext(ctx);
|
||||
|
||||
LinkCheck lc = new LinkCheck();
|
||||
lc.setBasedir(new File("d:/data"));
|
||||
lc.setOutput(new File("target/linkcheck.xml"));
|
||||
lc.setCache("target/linkcheck.cache");
|
||||
lc.setOutputEncoding("ISO-8859");
|
||||
lc.setExclude("");
|
||||
lc.setProject(p);
|
||||
lc.doExecute();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user