зеркало из https://github.com/mozilla/pjs.git
Fixed up the web-crawler to only trigger crawls on a document being loaded, not on one of it's images; support -o even w/o a filter; use the regression dump for -o instead of list
This commit is contained in:
Родитель
c0c1a822e2
Коммит
b86fb23719
|
@ -160,6 +160,7 @@ nsWebCrawler::nsWebCrawler(nsViewerApp* aViewer)
|
|||
mFrameTag = NS_NewAtom("FRAME");
|
||||
mIFrameTag = NS_NewAtom("IFRAME");
|
||||
mVisited = new AtomHashTable();
|
||||
mVerbose = nsnull;
|
||||
}
|
||||
|
||||
static void FreeStrings(nsVoidArray& aArray)
|
||||
|
@ -194,6 +195,13 @@ NS_IMPL_ISUPPORTS(nsWebCrawler, kISupportsIID)
|
|||
NS_IMETHODIMP
|
||||
nsWebCrawler::OnStartBinding(nsIURL* aURL, const char *aContentType)
|
||||
{
|
||||
if (mVerbose) {
|
||||
printf("Crawler: starting ");
|
||||
nsAutoString tmp;
|
||||
aURL->ToString(tmp);
|
||||
fputs(tmp, stdout);
|
||||
printf("\n");
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -212,8 +220,24 @@ nsWebCrawler::OnStatus(nsIURL* aURL, const nsString& aMsg)
|
|||
NS_IMETHODIMP
|
||||
nsWebCrawler::OnStopBinding(nsIURL* aURL, PRInt32 status, const nsString& aMsg)
|
||||
{
|
||||
if (nsnull!=mFilter)
|
||||
{
|
||||
if (mVerbose) {
|
||||
printf("Crawler: stopping ");
|
||||
nsAutoString tmp;
|
||||
aURL->ToString(tmp);
|
||||
fputs(tmp, stdout);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (nsnull == aURL) {
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// Skip url post-processing for non-document urls
|
||||
if (!mCurrentURL.Equals(aURL->GetSpec())) {
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
if ((nsnull != mFilter) || (nsnull != mOutputDir)) {
|
||||
nsIPresShell* shell = GetPresShell();
|
||||
if (nsnull != shell) {
|
||||
nsIFrame* root = shell->GetRootFrame();
|
||||
|
@ -224,14 +248,14 @@ nsWebCrawler::OnStopBinding(nsIURL* aURL, PRInt32 status, const nsString& aMsg)
|
|||
FILE *fp = GetOutputFile(aURL);
|
||||
if (nsnull!=fp)
|
||||
{
|
||||
root->List(fp, 0, filter);
|
||||
root->DumpRegressionData(fp, 0);
|
||||
fclose(fp);
|
||||
}
|
||||
else
|
||||
printf("could not open output file for %s\n", aURL->GetFile());
|
||||
}
|
||||
else
|
||||
root->List(stdout, 0, filter);
|
||||
root->DumpRegressionData(stdout, 0);
|
||||
}
|
||||
NS_RELEASE(shell);
|
||||
}
|
||||
|
@ -286,7 +310,7 @@ FILE * nsWebCrawler::GetOutputFile(nsIURL *aURL)
|
|||
char *c = inputFileName;
|
||||
for (PRInt32 i=fileNameOffset+1; i<fileNameOffset+len; i++)
|
||||
{
|
||||
*c = inputFileFullPath[i];
|
||||
*c = (char) inputFileFullPath[i];
|
||||
c++;
|
||||
}
|
||||
inputFileName[len-1]=nsnull;
|
||||
|
@ -613,6 +637,7 @@ nsWebCrawler::LoadNextURL()
|
|||
}
|
||||
nsIWebShell* webShell;
|
||||
mBrowser->GetWebShell(webShell);
|
||||
mCurrentURL = *url;
|
||||
webShell->LoadURL(*url);
|
||||
NS_RELEASE(webShell);
|
||||
|
||||
|
|
|
@ -104,6 +104,10 @@ public:
|
|||
|
||||
void LoadNextURL();
|
||||
|
||||
void SetVerbose(PRBool aSetting) {
|
||||
mVerbose = aSetting;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual ~nsWebCrawler();
|
||||
|
||||
|
@ -139,6 +143,9 @@ protected:
|
|||
PRInt32 mHeight;
|
||||
PRInt32 mMaxPages;
|
||||
|
||||
nsString mCurrentURL;
|
||||
PRBool mVerbose;
|
||||
|
||||
nsVoidArray mPendingURLs;
|
||||
nsVoidArray mSafeDomains;
|
||||
nsVoidArray mAvoidDomains;
|
||||
|
|
Загрузка…
Ссылка в новой задаче