gecko-dev/webshell/tests/viewer/nsWebCrawler.h

196 строки
4.9 KiB
C
Исходник Обычный вид История

1998-07-28 02:16:13 +04:00
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
1998-07-28 02:16:13 +04:00
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
1998-07-28 02:16:13 +04:00
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
1998-07-28 02:16:13 +04:00
*/
#ifndef nsWebCrawler_h___
#define nsWebCrawler_h___
#include "nsCOMPtr.h"
#include "nsBrowserWindow.h"
#include "nsIWebProgressListener.h"
1998-07-28 02:16:13 +04:00
#include "nsVoidArray.h"
#include "nsString.h"
#include "nsIAtom.h"
#include "nsWeakReference.h"
1998-07-28 02:16:13 +04:00
class nsIContent;
class nsIDocument;
class nsITimer;
class nsIURI;
class nsIPresShell;
1998-07-28 02:16:13 +04:00
class nsViewerApp;
class AtomHashTable;
class nsWebCrawler : public nsIWebProgressListener,
public nsSupportsWeakReference {
1998-07-28 02:16:13 +04:00
public:
1999-02-08 20:57:00 +03:00
// Make a new web-crawler for the given viewer. Note: the web
// crawler does not addref the viewer.
1998-07-28 02:16:13 +04:00
nsWebCrawler(nsViewerApp* aViewer);
// nsISupports
NS_DECL_ISUPPORTS
// nsIWebProgressListener
NS_DECL_NSIWEBPROGRESSLISTENER
1998-07-28 02:16:13 +04:00
// Add a url to load
void AddURL(const nsString& aURL);
// Add a domain that is safe to load url's from
void AddSafeDomain(const nsString& aDomain);
// Add a domain that must be avoided
void AddAvoidDomain(const nsString& aDomain);
void SetBrowserWindow(nsBrowserWindow* aWindow);
void GetBrowserWindow(nsBrowserWindow** aWindow);
1998-07-28 02:16:13 +04:00
// Set the delay (by default, the timer is set to one second)
void SetDelay(PRInt32 aSeconds) {
mDelay = aSeconds;
}
2000-01-26 18:14:41 +03:00
void SetPrintTest(PRInt32 aTestType) { mPrinterTestType = aTestType; }
void RegressionOutput(PRInt32 aRegressionOutputLevel) { mRegressionOutputLevel = aRegressionOutputLevel; }
1998-07-28 02:16:13 +04:00
void EnableJiggleLayout() {
mJiggleLayout = PR_TRUE;
}
// If set to TRUE the loader will post an exit message on exit
void SetExitOnDone(PRBool aPostExit) {
mPostExit = aPostExit;
}
// Start loading documents
void Start();
// Enable the crawler; when a document contains links to other
// documents the crawler will go to them subject to the limitations
// on the total crawl count and the domain name checks.
void EnableCrawler();
void SetRecordFile(FILE* aFile) {
mRecord = aFile;
}
1998-07-28 03:01:06 +04:00
void SetMaxPages(PRInt32 aMax) {
mMaxPages = aMax;
}
void SetOutputDir(const nsString& aOutputDir);
void DumpRegressionData(nsIWebShell* aWebShell,
nsIURI* aURL);
void SetRegressionDir(const nsString& aOutputDir);
void SetEnableRegression(PRBool aSetting) {
mRegressing = aSetting;
}
void LoadNextURL(PRBool aQueueLoad);
nsresult QueueLoadURL(const nsString& aURL);
void GoToQueuedURL(const nsString& aURL);
void QueueExit();
void Exit();
1998-07-28 02:16:13 +04:00
void SetVerbose(PRBool aSetting) {
mVerbose = aSetting;
}
PRBool Crawling() const {
return mCrawl;
}
PRBool LoadingURLList() const {
return mHaveURLList;
}
void IncludeStyleData(PRBool aIncludeStyle) {
mIncludeStyleInfo = aIncludeStyle;
}
1998-07-28 02:16:13 +04:00
protected:
virtual ~nsWebCrawler();
void FindURLsIn(nsIDocument* aDocument, nsIContent* aNode);
void FindMoreURLs();
PRBool OkToLoad(const nsString& aURLSpec);
void RecordLoadedURL(const nsString& aURLSpec);
/** generate an output name from a URL */
FILE* GetOutputFile(nsIURI *aURL, nsString& aOutputName);
nsIPresShell* GetPresShell(nsIWebShell* aWebShell = nsnull);
void PerformRegressionTest(const nsString& aOutputName);
nsBrowserWindow* mBrowser;
1998-07-28 02:16:13 +04:00
nsViewerApp* mViewer;
nsCOMPtr<nsITimer> mTimer;
1998-07-28 02:16:13 +04:00
FILE* mRecord;
nsCOMPtr<nsIAtom> mLinkTag;
nsCOMPtr<nsIAtom> mFrameTag;
nsCOMPtr<nsIAtom> mIFrameTag;
nsCOMPtr<nsIAtom> mHrefAttr;
nsCOMPtr<nsIAtom> mSrcAttr;
nsCOMPtr<nsIAtom> mBaseHrefAttr;
1998-07-28 02:16:13 +04:00
AtomHashTable* mVisited;
nsString mOutputDir;
1998-07-28 02:16:13 +04:00
PRBool mCrawl;
PRBool mHaveURLList;
1998-07-28 02:16:13 +04:00
PRBool mJiggleLayout;
PRBool mPostExit;
PRInt32 mDelay; // first delay encountered from command line or delay:= in file
PRInt32 mLastDelay; // last delay encountered from command line or delay:= in file
1998-07-28 03:01:06 +04:00
PRInt32 mMaxPages;
1998-07-28 02:16:13 +04:00
nsString mCurrentURL;
nsCOMPtr<nsIURI> mLastURL;
nsIWebShell* mLastWebShell;
PRTime mStartLoad;
PRBool mVerbose;
PRBool mRegressing;
2000-01-26 18:14:41 +03:00
PRInt32 mPrinterTestType;
PRInt32 mRegressionOutputLevel;
nsString mRegressionDir;
PRBool mIncludeStyleInfo;
1998-07-28 02:16:13 +04:00
nsVoidArray mPendingURLs;
nsVoidArray mSafeDomains;
nsVoidArray mAvoidDomains;
PRInt32 mQueuedLoadURLs;
1998-07-28 02:16:13 +04:00
};
#endif /* nsWebCrawler_h___ */