Mozilla/mozilla/webshell/tests/viewer/nsWebCrawler.cpp
rods%netscape.com 20b4083174 1) Removes the arg of nsIDOMWindow in the nsIWebBrowserPrint calls
2) Adds a couple of new methods and an attr to nsIWebBrowserPrint (navigate, is frameset, & exitPP)
3) Removes all but one method from nsIContentViewerFile.idl/h the remaining call is for print regression testing
4) Removes the "static" implementation of nsIContentViewerFile.h
5) Fixed up nsIContentViewerFile.idl and turned it back on so it is now generating the header file
6) Removed all uses of nsIContentViewerFile.h except for the WebCrawler (uses it for Printing Regression testing)
7) nsDocumentViewer.cpp now implements nsIWebBrowserPrint.idl this makes it easier to add new print functionality in one place
8) You can now ask an instance of the ContentViewer for a nsIWebBrowserPrint to do printing (it retruns the nsIWebBrowserPrint interface implemented by the nsDocumentViewer)
9) Anybody who was using nsIContentViewerFile to print will now use nsIWebBrowserPrint
10) You can now do a "GetInterface()" on a GlobalWindow for a nsIWebBrowserPrint
11) The browser UI now uses the GetInterface on the GlobalWindow to get a nsIWebBrowserPrint object to do printing and this can be used for all printing functionality
Bug 120622 r=dcone sr=waterson


git-svn-id: svn://10.0.0.236/trunk@113419 18797224-902f-48f8-a5cc-f745e15eee43
2002-02-01 14:52:11 +00:00

1141 lines
30 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
* Pierre Phaneuf <pp@ludusdesign.com>
* This Original Code has been modified by IBM Corporation. Modifications made by IBM
* described herein are Copyright (c) International Business Machines Corporation, 2000.
* Modifications to Mozilla code or documentation identified per MPL Section 3.3
*
* Date Modified by Description of modification
* 04/20/2000 IBM Corp. OS/2 VisualAge build.
*/
#include "nscore.h"
#include "nsCOMPtr.h"
#include "nsWebCrawler.h"
#include "nsViewerApp.h"
#include "nsIWebShell.h"
#include "nsIContentViewer.h"
#include "nsIDocumentViewer.h"
#include "nsIDocument.h"
#include "nsIContent.h"
#include "nsIPresShell.h"
#include "nsIPresContext.h"
#include "nsIViewManager.h"
#include "nsIFrame.h"
#include "nsIFrameDebug.h"
#include "nsIURL.h"
#include "nsNetUtil.h"
#include "nsITimer.h"
#include "nsIAtom.h"
#include "nsIFrameUtil.h"
#include "nsIComponentManager.h"
#include "nsLayoutCID.h"
#include "nsRect.h"
#include "plhash.h"
#include "nsINameSpaceManager.h"
#include "nsXPIDLString.h"
#include "nsReadableUtils.h"
#include "nsIServiceManager.h"
#include "nsIEventQueueService.h"
#include "nsIEventQueue.h"
#include "prprf.h"
#include "nsIContentViewer.h"
#include "nsIContentViewerFile.h"
#include "nsIDocShell.h"
#include "nsIWebNavigation.h"
#include "nsIWebProgress.h"
static NS_DEFINE_IID(kFrameUtilCID, NS_FRAME_UTIL_CID);
static NS_DEFINE_IID(kIFrameUtilIID, NS_IFRAME_UTIL_IID);
static NS_DEFINE_IID(kIXMLContentIID, NS_IXMLCONTENT_IID);
static PLHashNumber
HashKey(nsIAtom* key)
{
return NS_PTR_TO_INT32(key);
}
static PRIntn
CompareKeys(nsIAtom* key1, nsIAtom* key2)
{
return key1 == key2;
}
class AtomHashTable {
public:
AtomHashTable();
~AtomHashTable();
const void* Get(nsIAtom* aKey);
const void* Put(nsIAtom* aKey, const void* aValue);
const void* Remove(nsIAtom* aKey);
protected:
PLHashTable* mTable;
};
AtomHashTable::AtomHashTable()
{
mTable = PL_NewHashTable(8, (PLHashFunction) HashKey,
(PLHashComparator) CompareKeys,
(PLHashComparator) nsnull,
nsnull, nsnull);
}
static PRIntn PR_CALLBACK
DestroyEntry(PLHashEntry *he, PRIntn i, void *arg)
{
((nsIAtom*)he->key)->Release();
return HT_ENUMERATE_NEXT;
}
AtomHashTable::~AtomHashTable()
{
PL_HashTableEnumerateEntries(mTable, DestroyEntry, 0);
PL_HashTableDestroy(mTable);
}
/**
* Get the data associated with a Atom.
*/
const void*
AtomHashTable::Get(nsIAtom* aKey)
{
PRInt32 hashCode = NS_PTR_TO_INT32(aKey);
PLHashEntry** hep = PL_HashTableRawLookup(mTable, hashCode, aKey);
PLHashEntry* he = *hep;
if (nsnull != he) {
return he->value;
}
return nsnull;
}
/**
* Create an association between a Atom and some data. This call
* returns an old association if there was one (or nsnull if there
* wasn't).
*/
const void*
AtomHashTable::Put(nsIAtom* aKey, const void* aData)
{
PRInt32 hashCode = NS_PTR_TO_INT32(aKey);
PLHashEntry** hep = PL_HashTableRawLookup(mTable, hashCode, aKey);
PLHashEntry* he = *hep;
if (nsnull != he) {
const void* oldValue = he->value;
he->value = NS_CONST_CAST(void*, aData);
return oldValue;
}
NS_ADDREF(aKey);
PL_HashTableRawAdd(mTable, hep, hashCode, aKey, NS_CONST_CAST(void*, aData));
return nsnull;
}
/**
* Remove an association between a Atom and it's data. This returns
* the old associated data.
*/
const void*
AtomHashTable::Remove(nsIAtom* aKey)
{
PRInt32 hashCode = NS_PTR_TO_INT32(aKey);
PLHashEntry** hep = PL_HashTableRawLookup(mTable, hashCode, aKey);
PLHashEntry* he = *hep;
void* oldValue = nsnull;
if (nsnull != he) {
oldValue = he->value;
PL_HashTableRawRemove(mTable, hep, he);
}
return oldValue;
}
//----------------------------------------------------------------------
nsWebCrawler::nsWebCrawler(nsViewerApp* aViewer)
: mHaveURLList(PR_FALSE),
mQueuedLoadURLs(0)
{
NS_INIT_REFCNT();
mBrowser = nsnull;
mViewer = aViewer;
mCrawl = PR_FALSE;
mJiggleLayout = PR_FALSE;
mPostExit = PR_FALSE;
mDelay = 200 /*msec*/; // XXXwaterson straigt outta my arse
mMaxPages = -1;
mRecord = nsnull;
mLinkTag = getter_AddRefs(NS_NewAtom("a"));
mFrameTag = getter_AddRefs(NS_NewAtom("frame"));
mIFrameTag = getter_AddRefs(NS_NewAtom("iframe"));
mHrefAttr = getter_AddRefs(NS_NewAtom("href"));
mSrcAttr = getter_AddRefs(NS_NewAtom("src"));
mBaseHrefAttr = getter_AddRefs(NS_NewAtom("_base_href"));
mVisited = new AtomHashTable();
mVerbose = nsnull;
LL_I2L(mStartLoad, 0);
mRegressing = PR_FALSE;
mPrinterTestType = 0;
mRegressionOutputLevel = 0; // full output
mIncludeStyleInfo = PR_TRUE;
}
static void FreeStrings(nsVoidArray& aArray)
{
PRInt32 i, n = aArray.Count();
for (i = 0; i < n; i++) {
nsString* s = (nsString*) aArray.ElementAt(i);
delete s;
}
aArray.Clear();
}
nsWebCrawler::~nsWebCrawler()
{
FreeStrings(mSafeDomains);
FreeStrings(mAvoidDomains);
NS_IF_RELEASE(mBrowser);
delete mVisited;
}
NS_IMPL_ISUPPORTS2(nsWebCrawler,
nsIWebProgressListener,
nsISupportsWeakReference)
void
nsWebCrawler::DumpRegressionData()
{
#ifdef NS_DEBUG
nsCOMPtr<nsIWebShell> webshell;
mBrowser->GetWebShell(*getter_AddRefs(webshell));
if (! webshell)
return;
if (mOutputDir.Length() > 0) {
nsIPresShell* shell = GetPresShell(webshell);
if (!shell) return;
if ( mPrinterTestType > 0 ) {
nsCOMPtr <nsIContentViewer> viewer;
nsCOMPtr<nsIDocShell> docShell(do_QueryInterface(webshell));
docShell->GetContentViewer(getter_AddRefs(viewer));
if (viewer){
nsCOMPtr<nsIContentViewerFile> viewerFile = do_QueryInterface(viewer);
if (viewerFile) {
nsAutoString regressionFileName;
FILE *fp = GetOutputFile(mLastURL, regressionFileName);
switch (mPrinterTestType) {
case 1:
// dump print data to a file for regression testing
viewerFile->Print(PR_TRUE, fp, nsnull);
break;
case 2:
// visual printing tests, all go to the printer, no printer dialog
viewerFile->Print(PR_TRUE, nsnull, nsnull);
break;
case 3:
// visual printing tests, all go to the printer, with a printer dialog
viewerFile->Print(PR_FALSE, nsnull, nsnull);
break;
default:
break;
}
fclose(fp);
if( mPrinterTestType == 1) {
if (mRegressing) {
PerformRegressionTest(regressionFileName);
}
else {
fputs(NS_LossyConvertUCS2toASCII(regressionFileName).get(),
stdout);
printf(" - being written\n");
}
}
}
}
}
else {
nsIFrame* root;
shell->GetRootFrame(&root);
if (nsnull != root) {
nsCOMPtr<nsIPresContext> presContext;
shell->GetPresContext(getter_AddRefs(presContext));
if (mOutputDir.Length() > 0) {
nsAutoString regressionFileName;
FILE *fp = GetOutputFile(mLastURL, regressionFileName);
if (fp) {
nsIFrameDebug* fdbg;
if (NS_SUCCEEDED(root->QueryInterface(NS_GET_IID(nsIFrameDebug), (void**) &fdbg))) {
fdbg->DumpRegressionData(presContext, fp, 0, mIncludeStyleInfo);
}
fclose(fp);
if (mRegressing) {
PerformRegressionTest(regressionFileName);
}
else {
fputs(NS_LossyConvertUCS2toASCII(regressionFileName).get(),
stdout);
printf(" - being written\n");
}
}
else {
char* file;
(void)mLastURL->GetPath(&file);
printf("could not open output file for %s\n", file);
nsCRT::free(file);
}
}
else {
nsIFrameDebug* fdbg;
if (NS_SUCCEEDED(root->QueryInterface(NS_GET_IID(nsIFrameDebug), (void**) &fdbg))) {
fdbg->DumpRegressionData(presContext, stdout, 0, mIncludeStyleInfo);
}
}
}
}
NS_RELEASE(shell);
}
#endif
}
void
nsWebCrawler::LoadNextURLCallback(nsITimer *aTimer, void *aClosure)
{
nsWebCrawler* self = (nsWebCrawler*) aClosure;
// if we are doing printing regression tests, check to see
// if we can print (a previous job is not printing)
if (self->mPrinterTestType > 0) {
nsCOMPtr<nsIWebShell> webshell;
self->mBrowser->GetWebShell(*getter_AddRefs(webshell));
if (webshell){
nsCOMPtr <nsIContentViewer> viewer;
nsCOMPtr<nsIDocShell> docShell(do_QueryInterface(webshell));
docShell->GetContentViewer(getter_AddRefs(viewer));
if (viewer){
nsCOMPtr<nsIContentViewerFile> viewerFile = do_QueryInterface(viewer);
if (viewerFile) {
PRBool printable;
viewerFile->GetPrintable(&printable);
if (PR_TRUE !=printable){
self->mTimer = do_CreateInstance("@mozilla.org/timer;1");
self->mTimer->Init(LoadNextURLCallback, self, self->mDelay);
return;
}
}
}
}
}
self->DumpRegressionData();
self->LoadNextURL(PR_FALSE);
}
void
nsWebCrawler::QueueExitCallback(nsITimer *aTimer, void *aClosure)
{
nsWebCrawler* self = (nsWebCrawler*) aClosure;
self->DumpRegressionData();
self->QueueExit();
}
// nsIWebProgressListener implementation
NS_IMETHODIMP
nsWebCrawler::OnStateChange(nsIWebProgress* aWebProgress,
nsIRequest* aRequest,
PRInt32 progressStateFlags,
nsresult aStatus)
{
// Make sure that we're being notified for _our_ shell, and not some
// subshell that's been created e.g. for an IFRAME.
nsCOMPtr<nsIWebShell> shell;
mBrowser->GetWebShell(*getter_AddRefs(shell));
nsCOMPtr<nsIDocShell> docShell = do_QueryInterface(shell);
if (docShell) {
nsCOMPtr<nsIWebProgress> progress = do_GetInterface(docShell);
if (aWebProgress != progress)
return NS_OK;
}
// Make sure that we're being notified for the whole document, not a
// sub-load.
if (! (progressStateFlags & nsIWebProgressListener::STATE_IS_DOCUMENT))
return NS_OK;
if (progressStateFlags & nsIWebProgressListener::STATE_START) {
// If the document load is starting, remember its URL as the last
// URL we've loaded.
nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
if (! channel) {
NS_ERROR("no channel avail");
return NS_ERROR_FAILURE;
}
nsCOMPtr<nsIURI> uri;
channel->GetURI(getter_AddRefs(uri));
mLastURL = uri;
}
//XXXwaterson are these really _not_ mutually exclusive?
// else
if ((progressStateFlags & nsIWebProgressListener::STATE_STOP) && (aStatus == NS_OK)) {
// If the document load is finishing, then wrap up and maybe load
// some more URLs.
nsresult rv;
PRTime endLoadTime = PR_Now();
nsCOMPtr<nsIURI> uri;
nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
rv = channel->GetURI(getter_AddRefs(uri));
if (NS_FAILED(rv)) return rv;
// Ignore this notification unless its for the current url. That way
// we skip over embedded webshell notifications (e.g. frame cells,
// iframes, etc.)
nsXPIDLCString spec;
uri->GetSpec(getter_Copies(spec));
PRTime delta, cvt, rounder;
LL_I2L(cvt, 1000);
LL_I2L(rounder, 499);
LL_SUB(delta, endLoadTime, mStartLoad);
LL_ADD(delta, delta, rounder);
LL_DIV(delta, delta, cvt);
printf("+++ %s: done loading (%lld msec)\n", spec.get(), delta);
// Make sure the document bits make it to the screen at least once
nsCOMPtr<nsIPresShell> shell = dont_AddRef(GetPresShell());
if (shell) {
// Force the presentation shell to flush any pending reflows
shell->FlushPendingNotifications(PR_FALSE);
// Force the view manager to update itself
nsCOMPtr<nsIViewManager> vm;
shell->GetViewManager(getter_AddRefs(vm));
if (vm) {
nsIView* rootView;
vm->GetRootView(rootView);
vm->UpdateView(rootView, NS_VMREFRESH_IMMEDIATE);
}
if (mJiggleLayout) {
nsRect r;
mBrowser->GetContentBounds(r);
nscoord oldWidth = r.width;
while (r.width > 100) {
r.width -= 10;
mBrowser->SizeWindowTo(r.width, r.height, PR_FALSE, PR_FALSE);
}
while (r.width < oldWidth) {
r.width += 10;
mBrowser->SizeWindowTo(r.width, r.height, PR_FALSE, PR_FALSE);
}
}
}
if (mCrawl) {
FindMoreURLs();
}
mTimer = do_CreateInstance("@mozilla.org/timer;1");
if(mPrinterTestType>0){
mDelay = 5000; // printing needs more time to load, so give it plenty
} else {
mDelay = 200;
}
if ((0 < mQueuedLoadURLs) || (0 < mPendingURLs.Count())) {
mTimer->Init(LoadNextURLCallback, this, mDelay);
}
else if (mPostExit) {
mTimer->Init(QueueExitCallback, this, mDelay);
}
}
return NS_OK;
}
NS_IMETHODIMP
nsWebCrawler::OnProgressChange(nsIWebProgress *aWebProgress,
nsIRequest *aRequest,
PRInt32 aCurSelfProgress,
PRInt32 aMaxSelfProgress,
PRInt32 aCurTotalProgress,
PRInt32 aMaxTotalProgress) {
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsWebCrawler::OnLocationChange(nsIWebProgress* aWebProgress,
nsIRequest* aRequest,
nsIURI *location)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsWebCrawler::OnStatusChange(nsIWebProgress* aWebProgress,
nsIRequest* aRequest,
nsresult aStatus,
const PRUnichar* aMessage)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsWebCrawler::OnSecurityChange(nsIWebProgress *aWebProgress,
nsIRequest *aRequest,
PRInt32 state)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
FILE*
nsWebCrawler::GetOutputFile(nsIURI *aURL, nsString& aOutputName)
{
static const char kDefaultOutputFileName[] = "test.txt"; // the default
FILE *result = nsnull;
if (nsnull!=aURL)
{
char *inputFileName;
char* file;
(void)aURL->GetPath(&file);
nsAutoString inputFileFullPath; inputFileFullPath.AssignWithConversion(file);
nsCRT::free(file);
PRInt32 fileNameOffset = inputFileFullPath.RFindChar('/');
if (-1==fileNameOffset)
{
inputFileName = new char[strlen(kDefaultOutputFileName) + 1];
strcpy (inputFileName, kDefaultOutputFileName);
}
else
{
PRInt32 len = inputFileFullPath.Length() - fileNameOffset;
inputFileName = new char[len + 1 + 20];
char *c = inputFileName;
for (PRInt32 i=fileNameOffset+1; i<fileNameOffset+len; i++)
{
char ch = (char) inputFileFullPath.CharAt(i);
if (ch == '.') {
// Stop on dot so that we don't keep the old extension
break;
}
*c++ = ch;
}
// Tack on ".rgd" extension for "regression data"
*c++ = '.';
*c++ = 'r';
*c++ = 'g';
*c++ = 'd';
*c++ = '\0';
aOutputName.Truncate();
aOutputName.AppendWithConversion(inputFileName);
}
nsAutoString outputFileName(mOutputDir);
outputFileName.AppendWithConversion(inputFileName);
PRInt32 bufLen = outputFileName.Length()+1;
char *buf = new char[bufLen+1];
outputFileName.ToCString(buf, bufLen);
result = fopen(buf, "wt");
delete [] buf;
delete [] inputFileName;
}
return result;
}
void
nsWebCrawler::AddURL(const nsString& aURL)
{
nsString* url = new nsString(aURL);
mPendingURLs.AppendElement(url);
if (mVerbose) {
printf("WebCrawler: adding '");
fputs(NS_LossyConvertUCS2toASCII(aURL).get(), stdout);
printf("'\n");
}
}
void
nsWebCrawler::AddSafeDomain(const nsString& aDomain)
{
nsString* s = new nsString(aDomain);
mSafeDomains.AppendElement(s);
}
void
nsWebCrawler::AddAvoidDomain(const nsString& aDomain)
{
nsString* s = new nsString(aDomain);
mAvoidDomains.AppendElement(s);
}
void
nsWebCrawler::SetOutputDir(const nsString& aOutputDir)
{
mOutputDir = aOutputDir;
}
void
nsWebCrawler::SetRegressionDir(const nsString& aDir)
{
mRegressionDir = aDir;
}
void
nsWebCrawler::Start()
{
// Enable observing each URL load...
nsCOMPtr<nsIWebShell> shell;
mBrowser->GetWebShell(*getter_AddRefs(shell));
nsCOMPtr<nsIDocShell> docShell(do_QueryInterface(shell));
if (docShell) {
nsCOMPtr<nsIWebProgress> progress(do_GetInterface(docShell));
if (progress) {
progress->AddProgressListener(this);
LoadNextURL(PR_FALSE);
}
}
}
void
nsWebCrawler::EnableCrawler()
{
mCrawl = PR_TRUE;
}
static const unsigned char kLowerLookup[256] = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,
97,98,99,100,101,102,103,104,105,106,107,108,109,
110,111,112,113,114,115,116,117,118,119,120,121,122,
91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
static PRBool
EndsWith(const nsString& aDomain, const char* aHost, PRInt32 aHostLen)
{
PRInt32 slen = aDomain.Length();
if (slen < aHostLen) {
return PR_FALSE;
}
const PRUnichar* uc = aDomain.get();
uc += slen - aHostLen;
const PRUnichar* end = uc + aHostLen;
while (uc < end) {
unsigned char uch = (unsigned char) ((*uc++) & 0xff);
unsigned char ch = (unsigned char) ((*aHost++) & 0xff);
if (kLowerLookup[uch] != kLowerLookup[ch]) {
return PR_FALSE;
}
}
return PR_TRUE;
}
static PRBool
StartsWith(const nsString& s1, const char* s2)
{
PRInt32 s1len = s1.Length();
PRInt32 s2len = strlen(s2);
if (s1len < s2len) {
return PR_FALSE;
}
const PRUnichar* uc = s1.get();
const PRUnichar* end = uc + s2len;
while (uc < end) {
unsigned char uch = (unsigned char) ((*uc++) & 0xff);
unsigned char ch = (unsigned char) ((*s2++) & 0xff);
if (kLowerLookup[uch] != kLowerLookup[ch]) {
return PR_FALSE;
}
}
return PR_TRUE;
}
PRBool
nsWebCrawler::OkToLoad(const nsString& aURLSpec)
{
if (!StartsWith(aURLSpec, "http:") && !StartsWith(aURLSpec, "ftp:") &&
!StartsWith(aURLSpec, "file:") &&
!StartsWith(aURLSpec, "resource:")) {
return PR_FALSE;
}
PRBool ok = PR_TRUE;
nsIURI* url;
nsresult rv;
rv = NS_NewURI(&url, aURLSpec);
if (NS_OK == rv) {
nsXPIDLCString host;
rv = url->GetHost(getter_Copies(host));
if (rv == NS_OK) {
PRInt32 hostlen = PL_strlen(host);
// Check domains to avoid
PRInt32 i, n = mAvoidDomains.Count();
for (i = 0; i < n; i++) {
nsString* s = (nsString*) mAvoidDomains.ElementAt(i);
if (s && EndsWith(*s, host, hostlen)) {
printf("Avoiding '");
fputs(NS_LossyConvertUCS2toASCII(aURLSpec).get(), stdout);
printf("'\n");
return PR_FALSE;
}
}
// Check domains to stay within
n = mSafeDomains.Count();
if (n == 0) {
// If we don't care then all the domains that we aren't
// avoiding are OK
return PR_TRUE;
}
for (i = 0; i < n; i++) {
nsString* s = (nsString*) mSafeDomains.ElementAt(i);
if (s && EndsWith(*s, host, hostlen)) {
return PR_TRUE;
}
}
ok = PR_FALSE;
}
NS_RELEASE(url);
}
return ok;
}
void
nsWebCrawler::RecordLoadedURL(const nsString& aURL)
{
if (nsnull != mRecord) {
fputs(NS_LossyConvertUCS2toASCII(aURL).get(), mRecord);
fputs("\n", mRecord);
fflush(mRecord);
}
}
void
nsWebCrawler::FindURLsIn(nsIDocument* aDocument, nsIContent* aNode)
{
nsCOMPtr<nsIAtom> atom;
aNode->GetTag(*getter_AddRefs(atom));
if ((atom == mLinkTag) || (atom == mFrameTag) || (atom == mIFrameTag)) {
// Get absolute url that tag targets
nsAutoString base, src, absURLSpec;
if (atom == mLinkTag) {
aNode->GetAttr(kNameSpaceID_HTML, mHrefAttr, src);
}
else {
aNode->GetAttr(kNameSpaceID_HTML, mSrcAttr, src);
}
nsCOMPtr<nsIURI> docURL;
aDocument->GetDocumentURL(getter_AddRefs(docURL));
nsresult rv;
rv = NS_MakeAbsoluteURI(absURLSpec, src, docURL);
if (NS_OK == rv) {
nsCOMPtr<nsIAtom> urlAtom = getter_AddRefs(NS_NewAtom(absURLSpec));
if (0 == mVisited->Get(urlAtom)) {
// Remember the URL as visited so that we don't go there again
mVisited->Put(urlAtom, "visited");
if (OkToLoad(absURLSpec)) {
mPendingURLs.AppendElement(new nsString(absURLSpec));
if (mVerbose) {
printf("Adding '");
fputs(NS_LossyConvertUCS2toASCII(absURLSpec).get(), stdout);
printf("'\n");
}
}
else {
if (mVerbose) {
printf("Skipping '");
fputs(NS_LossyConvertUCS2toASCII(absURLSpec).get(), stdout);
printf("'\n");
}
}
}
else {
if (mVerbose) {
printf("Already visited '");
fputs(NS_LossyConvertUCS2toASCII(absURLSpec).get(), stdout);
printf("'\n");
}
}
}
}
PRBool canHaveKids;
aNode->CanContainChildren(canHaveKids);
if (canHaveKids) {
PRInt32 i, n;
aNode->ChildCount(n);
for (i = 0; i < n; i++) {
nsIContent* kid;
aNode->ChildAt(i, kid);
if (nsnull != kid) {
FindURLsIn(aDocument, kid);
NS_RELEASE(kid);
}
}
}
}
void
nsWebCrawler::FindMoreURLs()
{
nsCOMPtr<nsIWebShell> shell;
mBrowser->GetWebShell(*getter_AddRefs(shell));
nsCOMPtr<nsIDocShell> docShell(do_QueryInterface(shell));
if (docShell) {
nsCOMPtr<nsIContentViewer> cv;
docShell->GetContentViewer(getter_AddRefs(cv));
if (cv) {
nsCOMPtr<nsIDocumentViewer> docv = do_QueryInterface(cv);
if (docv) {
nsCOMPtr<nsIDocument> doc;
docv->GetDocument(*getter_AddRefs(doc));
if (doc) {
nsCOMPtr<nsIContent> root;
doc->GetRootContent(getter_AddRefs(root));
if (root) {
FindURLsIn(doc, root);
}
}
}
}
}
}
void
nsWebCrawler::SetBrowserWindow(nsBrowserWindow* aWindow)
{
NS_IF_RELEASE(mBrowser);
mBrowser = aWindow;
NS_IF_ADDREF(mBrowser);
}
void
nsWebCrawler::GetBrowserWindow(nsBrowserWindow** aWindow)
{
NS_IF_ADDREF(mBrowser);
*aWindow = mBrowser;
}
void
nsWebCrawler::LoadNextURL(PRBool aQueueLoad)
{
if ((mMaxPages < 0) || (mMaxPages > 0)) {
while (0 != mPendingURLs.Count()) {
nsString* url = NS_REINTERPRET_CAST(nsString*, mPendingURLs.ElementAt(0));
mPendingURLs.RemoveElementAt(0);
if (nsnull != url) {
if (OkToLoad(*url)) {
RecordLoadedURL(*url);
nsIWebShell* webShell;
mBrowser->GetWebShell(webShell);
if (aQueueLoad) {
// Call stop to cancel any pending URL Refreshes...
/// webShell->Stop();
QueueLoadURL(*url);
}
else {
mCurrentURL = *url;
mStartLoad = PR_Now();
nsCOMPtr<nsIWebNavigation> webNav(do_QueryInterface(webShell));
webNav->LoadURI(url->get(), nsIWebNavigation::LOAD_FLAGS_NONE, nsnull, nsnull, nsnull);
}
NS_RELEASE(webShell);
if (mMaxPages > 0) {
--mMaxPages;
}
delete url;
return;
}
delete url;
}
}
}
if (nsnull != mRecord) {
fclose(mRecord);
mRecord = nsnull;
}
}
nsIPresShell*
nsWebCrawler::GetPresShell(nsIWebShell* aWebShell)
{
nsIWebShell* webShell = aWebShell;
if (webShell) {
NS_ADDREF(webShell);
}
else {
mBrowser->GetWebShell(webShell);
}
nsIPresShell* shell = nsnull;
nsCOMPtr<nsIDocShell> docShell(do_QueryInterface(webShell));
if (nsnull != webShell) {
nsIContentViewer* cv = nsnull;
docShell->GetContentViewer(&cv);
if (nsnull != cv) {
nsIDocumentViewer* docv = nsnull;
cv->QueryInterface(NS_GET_IID(nsIDocumentViewer), (void**) &docv);
if (nsnull != docv) {
nsIPresContext* cx;
docv->GetPresContext(cx);
if (nsnull != cx) {
cx->GetShell(&shell);
NS_RELEASE(cx);
}
NS_RELEASE(docv);
}
NS_RELEASE(cv);
}
NS_RELEASE(webShell);
}
return shell;
}
static FILE*
OpenRegressionFile(const nsString& aBaseName, const nsString& aOutputName)
{
nsAutoString a;
a.Append(aBaseName);
a.Append(NS_LITERAL_STRING("/"));
a.Append(aOutputName);
char* fn = ToNewCString(a);
FILE* fp = fopen(fn, "r");
if (!fp) {
printf("Unable to open regression data file %s\n", fn);
}
delete[] fn;
return fp;
}
#define BUF_SIZE 1024
// Load up both data files (original and the one we just output) into
// two independent xml content trees. Then compare them.
void
nsWebCrawler::PerformRegressionTest(const nsString& aOutputName)
{
// First load the trees
nsIFrameUtil* fu;
nsresult rv = nsComponentManager::CreateInstance(kFrameUtilCID, nsnull,
kIFrameUtilIID, (void **)&fu);
if (NS_FAILED(rv)) {
printf("Can't find nsIFrameUtil implementation\n");
return;
}
FILE* f1 = OpenRegressionFile(mRegressionDir, aOutputName);
if (!f1) {
NS_RELEASE(fu);
return;
}
FILE* f2 = OpenRegressionFile(mOutputDir, aOutputName);
if (!f2) {
fclose(f1);
NS_RELEASE(fu);
return;
}
rv = fu->CompareRegressionData(f1, f2,mRegressionOutputLevel);
NS_RELEASE(fu);
char dirName[BUF_SIZE];
char fileName[BUF_SIZE];
mOutputDir.ToCString(dirName, BUF_SIZE-1);
aOutputName.ToCString(fileName, BUF_SIZE-1);
printf("regression test %s%s %s\n", dirName, fileName, NS_SUCCEEDED(rv) ? "passed" : "failed");
}
//----------------------------------------------------------------------
static NS_DEFINE_IID(kEventQueueServiceCID, NS_EVENTQUEUESERVICE_CID);
static NS_DEFINE_IID(kIEventQueueServiceIID, NS_IEVENTQUEUESERVICE_IID);
static nsresult
QueueEvent(PLEvent* aEvent)
{
nsISupports* is;
nsresult rv = nsServiceManager::GetService(kEventQueueServiceCID,
kIEventQueueServiceIID,
&is,
nsnull);
if (NS_FAILED(rv)) {
return rv;
}
nsCOMPtr<nsIEventQueueService> eqs = do_QueryInterface(is);
if (eqs) {
nsCOMPtr<nsIEventQueue> eq;
rv = eqs->GetThreadEventQueue(NS_CURRENT_THREAD, getter_AddRefs(eq));
if (eq) {
eq->PostEvent(aEvent);
}
}
nsServiceManager::ReleaseService(kEventQueueServiceCID, is, nsnull);
return rv;
}
//----------------------------------------------------------------------
struct ExitEvent : public PLEvent {
ExitEvent(nsWebCrawler* aCrawler);
~ExitEvent();
void DoIt() {
crawler->Exit();
}
nsWebCrawler* crawler;
static void PR_CALLBACK HandleMe(ExitEvent* e);
static void PR_CALLBACK DeleteMe(ExitEvent* e);
};
ExitEvent::ExitEvent(nsWebCrawler* aCrawler)
: crawler(aCrawler)
{
PL_InitEvent(this, crawler, (PLHandleEventProc) HandleMe,
(PLDestroyEventProc) DeleteMe);
NS_ADDREF(aCrawler);
}
ExitEvent::~ExitEvent()
{
NS_RELEASE(crawler);
}
void
ExitEvent::HandleMe(ExitEvent* e)
{
e->DoIt();
}
void
ExitEvent::DeleteMe(ExitEvent* e)
{
delete e;
}
void
nsWebCrawler::QueueExit()
{
ExitEvent* event = new ExitEvent(this);
QueueEvent(event);
}
void
nsWebCrawler::Exit()
{
mViewer->Exit();
}
//----------------------------------------------------------------------
struct LoadEvent : public PLEvent {
LoadEvent(nsWebCrawler* aCrawler, const nsString& aURL);
~LoadEvent();
void DoIt() {
crawler->GoToQueuedURL(url);
}
nsString url;
nsWebCrawler* crawler;
static void PR_CALLBACK HandleMe(LoadEvent* e);
static void PR_CALLBACK DeleteMe(LoadEvent* e);
};
LoadEvent::LoadEvent(nsWebCrawler* aCrawler, const nsString& aURL)
: url(aURL),
crawler(aCrawler)
{
PL_InitEvent(this, crawler, (PLHandleEventProc) HandleMe,
(PLDestroyEventProc) DeleteMe);
NS_ADDREF(aCrawler);
}
LoadEvent::~LoadEvent()
{
NS_RELEASE(crawler);
}
void
LoadEvent::HandleMe(LoadEvent* e)
{
e->DoIt();
}
void
LoadEvent::DeleteMe(LoadEvent* e)
{
delete e;
}
void
nsWebCrawler::GoToQueuedURL(const nsString& aURL)
{
nsIWebShell* webShell;
mBrowser->GetWebShell(webShell);
nsCOMPtr<nsIWebNavigation> webNav(do_QueryInterface(webShell));
if (webNav) {
mCurrentURL = aURL;
mStartLoad = PR_Now();
webNav->LoadURI(aURL.get(), nsIWebNavigation::LOAD_FLAGS_NONE, nsnull, nsnull, nsnull);
NS_RELEASE(webShell);
}
mQueuedLoadURLs--;
}
nsresult
nsWebCrawler::QueueLoadURL(const nsString& aURL)
{
LoadEvent* event = new LoadEvent(this, aURL);
nsresult rv = QueueEvent(event);
if (NS_SUCCEEDED(rv)) {
mQueuedLoadURLs++;
}
return rv;
}