From 3e8c1eb79821c5d6966d943f1d6e5c5359bb467a Mon Sep 17 00:00:00 2001 From: "dcamp@mozilla.com" Date: Tue, 29 Jan 2008 12:57:18 -0800 Subject: [PATCH] Bug 402611: Deal with changes to the safebrowsing v2 protocol. r=tony, a1.9=beltzner --- browser/app/profile/firefox.js | 1 + .../safebrowsing/content/globalstore.js | 12 +- .../safebrowsing/content/malware-warden.js | 12 +- .../safebrowsing/content/phishing-warden.js | 3 + .../safebrowsing/content/sb-loader.js | 4 +- toolkit/components/build/nsToolkitCompsCID.h | 8 + .../components/build/nsToolkitCompsModule.cpp | 6 + .../url-classifier/content/listmanager.js | 16 + .../url-classifier/public/Makefile.in | 1 + .../public/nsIUrlClassifierDBService.idl | 61 +- .../public/nsIUrlClassifierHashCompleter.idl | 99 + .../public/nsIUrlListManager.idl | 8 +- .../components/url-classifier/src/Makefile.in | 3 +- .../src/nsUrlClassifierDBService.cpp | 1652 +++++++++++------ .../src/nsUrlClassifierDBService.h | 25 +- .../src/nsUrlClassifierHashCompleter.cpp | 493 +++++ .../src/nsUrlClassifierHashCompleter.h | 129 ++ .../src/nsUrlClassifierStreamUpdater.cpp | 63 +- .../src/nsUrlClassifierStreamUpdater.h | 15 +- .../tests/unit/head_urlclassifier.js | 51 +- .../url-classifier/tests/unit/test_addsub.js | 41 +- .../tests/unit/test_dbservice.js | 22 +- .../url-classifier/tests/unit/test_partial.js | 403 ++++ .../tests/unit/test_streamupdater.js | 51 +- 24 files changed, 2475 insertions(+), 704 deletions(-) create mode 100644 toolkit/components/url-classifier/public/nsIUrlClassifierHashCompleter.idl create mode 100644 toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.cpp create mode 100644 toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.h create mode 100644 toolkit/components/url-classifier/tests/unit/test_partial.js diff --git a/browser/app/profile/firefox.js b/browser/app/profile/firefox.js index 78edfa1d5d23..849091d89f8e 100644 --- a/browser/app/profile/firefox.js +++ b/browser/app/profile/firefox.js @@ -559,6 +559,7 @@ pref("browser.safebrowsing.provider.0.name", "Google"); pref("browser.safebrowsing.provider.0.lookupURL", "http://sb.google.com/safebrowsing/lookup?sourceid=firefox-antiphish&features=TrustRank&client={moz:client}&appver={moz:version}&"); pref("browser.safebrowsing.provider.0.keyURL", "https://sb-ssl.google.com/safebrowsing/getkey?client={moz:client}&"); pref("browser.safebrowsing.provider.0.reportURL", "http://sb.google.com/safebrowsing/report?"); +pref("browser.safebrowsing.provider.0.gethashURL", "http://sb.google.com/safebrowsing/gethash?client={moz:client}&appver={moz:version}&pver=2.1"); // privacy policy -- Both url and fallbackurl must exist, although they may // point to the same file. fallbackurl must be a chrome url diff --git a/browser/components/safebrowsing/content/globalstore.js b/browser/components/safebrowsing/content/globalstore.js index 7376a9d61d42..0619f75886f3 100644 --- a/browser/components/safebrowsing/content/globalstore.js +++ b/browser/components/safebrowsing/content/globalstore.js @@ -47,6 +47,7 @@ // reportURL: When shown a warning bubble, we send back the user decision // (get me out of here/ignore warning) to this URL (strip cookies // first). This is optional. +// gethashURL: Url for requesting complete hashes from the provider. // reportGenericURL: HTML page for general user feedback // reportPhishURL: HTML page for notifying the provider of a new phishing page // reportErrorURL: HTML page for notifying the provider of a false positive @@ -110,6 +111,7 @@ PROT_DataProvider.prototype.loadDataProviderPrefs_ = function() { this.lookupURL_ = this.getUrlPref_(basePref + "lookupURL"); this.keyURL_ = this.getUrlPref_(basePref + "keyURL"); this.reportURL_ = this.getUrlPref_(basePref + "reportURL"); + this.gethashURL_ = this.getUrlPref_(basePref + "gethashURL"); // Urls to HTML report pages this.reportGenericURL_ = this.getUrlPref_(basePref + "reportGenericURL"); @@ -143,6 +145,8 @@ PROT_DataProvider.prototype.updateListManager_ = function() { // Clear the key to stop updates. listManager.setKeyUrl(""); } + + listManager.setGethashUrl(this.getGethashURL()); } /** @@ -206,11 +210,9 @@ PROT_DataProvider.prototype.getUpdateURL = function() { PROT_DataProvider.prototype.getLookupURL = function() { return this.lookupURL_; } -PROT_DataProvider.prototype.getKeyURL = function() { - return this.keyURL_; -} -PROT_DataProvider.prototype.getReportURL = function() { - return this.reportURL_; + +PROT_DataProvider.prototype.getGethashURL = function() { + return this.gethashURL_; } PROT_DataProvider.prototype.getReportGenericURL = function() { diff --git a/browser/components/safebrowsing/content/malware-warden.js b/browser/components/safebrowsing/content/malware-warden.js index 7590b93659fa..4ef1aa5bcd1e 100644 --- a/browser/components/safebrowsing/content/malware-warden.js +++ b/browser/components/safebrowsing/content/malware-warden.js @@ -61,16 +61,14 @@ function PROT_MalwareWarden() { var testUpdate = "n:1000\ni:test-malware-simple\nad:1\n" + - "a:1:" + testData.length + "\n" + - testData + - "\n"; + "a:1:32:" + testData.length + "\n" + + testData; testData = "mozilla.com/firefox/its-a-trap.html"; testUpdate += "n:1000\ni:test-phish-simple\nad:1\n" + - "a:1:" + testData.length + "\n" + - testData + - "\n"; + "a:1:32:" + testData.length + "\n" + + testData; var dbService_ = Cc["@mozilla.org/url-classifier/dbservice;1"] .getService(Ci.nsIUrlClassifierDBService); @@ -92,7 +90,7 @@ function PROT_MalwareWarden() { try { dbService_.beginUpdate(listener); - dbService_.beginStream(); + dbService_.beginStream(""); dbService_.updateStream(testUpdate); dbService_.finishStream(); dbService_.finishUpdate(); diff --git a/browser/components/safebrowsing/content/phishing-warden.js b/browser/components/safebrowsing/content/phishing-warden.js index c7c8c075afb6..dcfef25efa83 100644 --- a/browser/components/safebrowsing/content/phishing-warden.js +++ b/browser/components/safebrowsing/content/phishing-warden.js @@ -375,6 +375,9 @@ PROT_PhishingWarden.prototype.addWebProgressToAllTabs_ = function() { * @param url */ PROT_PhishingWarden.prototype.onDocNavStart = function(request, url) { + // XXX: most of this code is dead and needs to be removed. + return; + G_Debug(this, "checkRemote: " + (this.checkRemote_ ? "yes" : "no")); diff --git a/browser/components/safebrowsing/content/sb-loader.js b/browser/components/safebrowsing/content/sb-loader.js index e82cd022e23d..20e16503efbb 100644 --- a/browser/components/safebrowsing/content/sb-loader.js +++ b/browser/components/safebrowsing/content/sb-loader.js @@ -73,7 +73,7 @@ var safebrowsing = { safebrowsing.malwareWarden = malwareWarden; // Register tables - malwareWarden.registerBlackTable("goog-malware-sha128"); + malwareWarden.registerBlackTable("goog-malware-shavar"); malwareWarden.maybeToggleUpdateChecking(); @@ -87,7 +87,7 @@ var safebrowsing = { // Register tables // XXX: move table names to a pref that we originally will download // from the provider (need to workout protocol details) - phishWarden.registerBlackTable("goog-phish-sha128"); + phishWarden.registerBlackTable("goog-phish-shavar"); // Download/update lists if we're in non-enhanced mode phishWarden.maybeToggleUpdateChecking(); diff --git a/toolkit/components/build/nsToolkitCompsCID.h b/toolkit/components/build/nsToolkitCompsCID.h index e990dbcb65bc..d27e2471e1ae 100644 --- a/toolkit/components/build/nsToolkitCompsCID.h +++ b/toolkit/components/build/nsToolkitCompsCID.h @@ -83,6 +83,9 @@ #define NS_URLCLASSIFIERUTILS_CONTRACTID \ "@mozilla.org/url-classifier/utils;1" +#define NS_URLCLASSIFIERHASHCOMPLETER_CONTRACTID \ + "@mozilla.org/url-classifier/hashcompleter;1" + #define NS_SCRIPTABLEUNESCAPEHTML_CONTRACTID "@mozilla.org/feed-unescapehtml;1" #define NS_NAVHISTORYSERVICE_CONTRACTID \ @@ -156,6 +159,11 @@ #define NS_URLCLASSIFIERUTILS_CID \ { 0xb7b2ccec, 0x7912, 0x4ea6, { 0xa5, 0x48, 0xb0, 0x38, 0x44, 0x70, 0x04, 0xbd} } +// {786e0a0e-e035-4600-8ee0-365a63a80b80} +#define NS_URLCLASSIFIERHASHCOMPLETER_CID \ +{ 0x786e0a0e, 0xe035, 0x4600, \ + { 0x8e, 0xe0, 0x36, 0x5a, 0x63, 0xa8, 0x0b, 0x80 } } + // {10f2f5f0-f103-4901-980f-ba11bd70d60d} #define NS_SCRIPTABLEUNESCAPEHTML_CID \ { 0x10f2f5f0, 0xf103, 0x4901, { 0x98, 0x0f, 0xba, 0x11, 0xbd, 0x70, 0xd6, 0x0d} } diff --git a/toolkit/components/build/nsToolkitCompsModule.cpp b/toolkit/components/build/nsToolkitCompsModule.cpp index e561d8303877..a45a6e2f82c9 100644 --- a/toolkit/components/build/nsToolkitCompsModule.cpp +++ b/toolkit/components/build/nsToolkitCompsModule.cpp @@ -58,6 +58,7 @@ #include "nsUrlClassifierDBService.h" #include "nsUrlClassifierStreamUpdater.h" #include "nsUrlClassifierUtils.h" +#include "nsUrlClassifierHashCompleter.h" #include "nsDocShellCID.h" #endif @@ -88,6 +89,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsTypeAheadFind) #ifdef MOZ_URL_CLASSIFIER NS_GENERIC_FACTORY_CONSTRUCTOR(nsUrlClassifierStreamUpdater) NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsUrlClassifierUtils, Init) +NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsUrlClassifierHashCompleter, Init) static NS_IMETHODIMP nsUrlClassifierDBServiceConstructor(nsISupports *aOuter, REFNSIID aIID, @@ -167,6 +169,10 @@ static const nsModuleComponentInfo components[] = NS_URLCLASSIFIERUTILS_CID, NS_URLCLASSIFIERUTILS_CONTRACTID, nsUrlClassifierUtilsConstructor }, + { "Url Classifier Hash Completer", + NS_URLCLASSIFIERHASHCOMPLETER_CID, + NS_URLCLASSIFIERHASHCOMPLETER_CONTRACTID, + nsUrlClassifierHashCompleterConstructor }, #endif #ifdef MOZ_FEEDS { "Unescape HTML", diff --git a/toolkit/components/url-classifier/content/listmanager.js b/toolkit/components/url-classifier/content/listmanager.js index 1fa4a1bc9af3..315d164be83c 100644 --- a/toolkit/components/url-classifier/content/listmanager.js +++ b/toolkit/components/url-classifier/content/listmanager.js @@ -70,6 +70,7 @@ function PROT_ListManager() { this.prefs_ = new G_Preferences(); this.updateserverURL_ = null; + this.gethashURL_ = null; this.isTesting_ = false; @@ -90,6 +91,9 @@ function PROT_ListManager() { this.dbService_ = Cc["@mozilla.org/url-classifier/dbservice;1"] .getService(Ci.nsIUrlClassifierDBService); + + this.hashCompleter_ = Cc["@mozilla.org/url-classifier/hashcompleter;1"] + .createInstance(Ci.nsIUrlClassifierHashCompleter); } /** @@ -124,6 +128,17 @@ PROT_ListManager.prototype.setUpdateUrl = function(url) { } } +/** + * Set the gethash url. + */ +PROT_ListManager.prototype.setGethashUrl = function(url) { + G_Debug(this, "Set gethash url: " + url); + if (url != this.gethashURL_) { + this.gethashURL_ = url; + this.hashCompleter_.gethashUrl = url; + } +} + /** * Set the crypto key url. * @param url String @@ -146,6 +161,7 @@ PROT_ListManager.prototype.registerTable = function(tableName, opt_requireMac) { this.tablesData[tableName] = {}; this.tablesData[tableName].needsUpdate = false; + this.dbService_.setHashCompleter(tableName, this.hashCompleter_); return true; } diff --git a/toolkit/components/url-classifier/public/Makefile.in b/toolkit/components/url-classifier/public/Makefile.in index f12d14423400..a1a6c03b7ac5 100644 --- a/toolkit/components/url-classifier/public/Makefile.in +++ b/toolkit/components/url-classifier/public/Makefile.in @@ -11,6 +11,7 @@ XPIDL_MODULE = url-classifier XPIDLSRCS = nsIUrlClassifierDBService.idl \ nsIUrlClassifierStreamUpdater.idl \ nsIUrlClassifierUtils.idl \ + nsIUrlClassifierHashCompleter.idl \ nsIUrlListManager.idl \ $(NULL) diff --git a/toolkit/components/url-classifier/public/nsIUrlClassifierDBService.idl b/toolkit/components/url-classifier/public/nsIUrlClassifierDBService.idl index 8e648e151d98..73ee6131806d 100644 --- a/toolkit/components/url-classifier/public/nsIUrlClassifierDBService.idl +++ b/toolkit/components/url-classifier/public/nsIUrlClassifierDBService.idl @@ -38,6 +38,14 @@ #include "nsISupports.idl" +%{C++ +#include "nsTArray.h" +class nsUrlClassifierLookupResult; +%} +[ptr] native ResultArray(nsTArray); + +interface nsIUrlClassifierHashCompleter; + // Interface for JS function callbacks [scriptable, function, uuid(4ca27b6b-a674-4b3d-ab30-d21e2da2dffb)] interface nsIUrlClassifierCallback : nsISupports { @@ -49,13 +57,17 @@ interface nsIUrlClassifierCallback : nsISupports { * clients streaming updates to the url-classifier (usually * nsUrlClassifierStreamUpdater. */ -[scriptable, uuid(113671b8-c5cc-47d9-bc57-269568c7ce29)] +[scriptable, uuid(bb0528b3-71e2-4795-8732-d60a4476e6df)] interface nsIUrlClassifierUpdateObserver : nsISupports { /** * The update requested a new URL whose contents should be downloaded - * and sent to the classifier as a new stream + * and sent to the classifier as a new stream. + * + * @param url The url that was requested. + * @param table The table name that this URL's contents will be associated + * with. */ - void updateUrlRequested(in ACString url); + void updateUrlRequested(in ACString url, in ACString table); /* A stream update has completed */ void streamFinished(); @@ -77,7 +89,7 @@ interface nsIUrlClassifierUpdateObserver : nsISupports { * It provides async methods for querying and updating the database. As the * methods complete, they call the callback function. */ -[scriptable, uuid(dc3b958e-b345-458d-83f7-77e82b42a514)] +[scriptable, uuid(bcc32b18-78be-49f6-a895-a1a341a9e94b)] interface nsIUrlClassifierDBService : nsISupports { /** @@ -87,12 +99,9 @@ interface nsIUrlClassifierDBService : nsISupports * by the service. * @param c: The callback will be called with a comma-separated list * of tables to which the key belongs. - * @param needsProxy: Should be true if the callback needs to be called - * in the main thread, false if the callback is threadsafe. */ void lookup(in ACString spec, - in nsIUrlClassifierCallback c, - in boolean needsProxy); + in nsIUrlClassifierCallback c); /** * Lists the tables along with which chunks are available in each table. @@ -106,6 +115,14 @@ interface nsIUrlClassifierDBService : nsISupports */ void getTables(in nsIUrlClassifierCallback c); + /** + * Set the nsIUrlClassifierCompleter object for a given table. This + * object will be used to request complete versions of partial + * hashes. + */ + void setHashCompleter(in ACString tableName, + in nsIUrlClassifierHashCompleter completer); + //////////////////////////////////////////////////////////////////////////// // Incremental update methods. // @@ -135,8 +152,11 @@ interface nsIUrlClassifierDBService : nsISupports /** * Begin a stream update. This should be called once per url being * fetched. + * + * @param table The table the contents of this stream will be associated + * with, or empty for the initial stream. */ - void beginStream(); + void beginStream(in ACString table); /** * Update the table incrementally. @@ -184,9 +204,30 @@ interface nsIUrlClassifierDBService : nsISupports * Interface for the actual worker thread. Implementations of this need not * be thread aware and just work on the database. */ -[scriptable, uuid(3ed0c8f9-a5d8-4186-beb1-5d828e95ea90)] +[scriptable, uuid(76d923e5-bbde-4292-ae35-16a67d04d524)] interface nsIUrlClassifierDBServiceWorker : nsIUrlClassifierDBService { // Provide a way to forcibly close the db connection. void closeDb(); }; + +/** + * This is an internal helper interface for communication between the + * main thread and the dbservice worker thread. It is called for each + * lookup to provide a set of possible results, which the main thread + * may need to expand using an nsIUrlClassifierCompleter. + */ +[uuid(f1dc83c6-ad43-4f0f-a809-fd43de7de8a4)] +interface nsIUrlClassifierLookupCallback : nsISupports +{ + /** + * The lookup process is complete. + * + * @param results + * If this parameter is null, there were no results found. + * If not, it contains an array of nsUrlClassifierEntry objects + * with possible matches. The callee is responsible for freeing + * this array. + */ + void lookupComplete(in ResultArray results); +}; diff --git a/toolkit/components/url-classifier/public/nsIUrlClassifierHashCompleter.idl b/toolkit/components/url-classifier/public/nsIUrlClassifierHashCompleter.idl new file mode 100644 index 000000000000..f6ad65cb45c8 --- /dev/null +++ b/toolkit/components/url-classifier/public/nsIUrlClassifierHashCompleter.idl @@ -0,0 +1,99 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Mozilla Corporation + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsISupports.idl" + +/** + * This interface is implemented by nsIUrlClassifierHashCompleter clients. + */ +[scriptable, uuid(bbd6c954-7cb4-4447-bc55-8cefd1ceed89)] +interface nsIUrlClassifierHashCompleterCallback : nsISupports +{ + /** + * A complete hash has been found that matches the partial hash. + * This method may be called 0-n times for a given + * nsIUrlClassifierCompleter::complete() call. + * + * @param hash + * The 128-bit hash that was discovered. + * @param table + * The name of the table that this hash belongs to. + * @param chunkId + * The database chunk that this hash belongs to. + */ + void completion(in ACString hash, + in ACString table, + in PRUint32 chunkId); + + /** + * The completion is complete. This method is called once per + * nsIUrlClassifierCompleter::complete() call, after all completion() + * calls are finished. + * + * @param status + * NS_OK if the request completed successfully, or an error code. + */ + void completionFinished(in nsresult status); +}; + +/** + * Clients updating the url-classifier database have the option of sending + * partial (32-bit) hashes of URL fragments to be blacklisted. If the + * url-classifier encounters one of these truncated hashes, it will ask + * an nsIUrlClassifierCompleter instance to asynchronously provide the + * complete hash, along with some associated metadata. + */ +[scriptable, uuid(1a3c19d9-ccd6-4d1a-a48a-1ab662e56e60)] +interface nsIUrlClassifierHashCompleter : nsISupports +{ + /** + * Request a completed hash. + * + * @param partialHash + * The 32-bit hash encountered by the url-classifier. + * @param callback + * An nsIUrlClassifierCompleterCallback instance. + */ + void complete(in ACString partialHash, + in nsIUrlClassifierHashCompleterCallback callback); + + /** + * The URL for the gethash request + */ + attribute ACString gethashUrl; +}; + diff --git a/toolkit/components/url-classifier/public/nsIUrlListManager.idl b/toolkit/components/url-classifier/public/nsIUrlListManager.idl index 8da4912e64e5..d1592080b3bb 100644 --- a/toolkit/components/url-classifier/public/nsIUrlListManager.idl +++ b/toolkit/components/url-classifier/public/nsIUrlListManager.idl @@ -49,7 +49,7 @@ interface nsIUrlListManagerCallback : nsISupports { }; -[scriptable, uuid(874d6c95-fb8b-4f89-b36d-85fe267ab356)] +[scriptable, uuid(9c5598ec-9986-40cf-af40-b5e0d817a3a0)] interface nsIUrlListManager : nsISupports { /** @@ -63,6 +63,12 @@ interface nsIUrlListManager : nsISupports */ void setKeyUrl(in ACString url); + /** + * Set the URL that we will query for complete hashes after a partial + * hash match. + */ + void setGethashUrl(in ACString url); + /** * Add a table to the list of tables we are managing. The name is a * string of the format provider_name-semantic_type-table_type. For diff --git a/toolkit/components/url-classifier/src/Makefile.in b/toolkit/components/url-classifier/src/Makefile.in index c6303b6596db..b342853e937c 100644 --- a/toolkit/components/url-classifier/src/Makefile.in +++ b/toolkit/components/url-classifier/src/Makefile.in @@ -25,6 +25,7 @@ CPPSRCS = \ nsUrlClassifierDBService.cpp \ nsUrlClassifierStreamUpdater.cpp \ nsUrlClassifierUtils.cpp \ + nsUrlClassifierHashCompleter.cpp \ $(NULL) LOCAL_INCLUDES = \ @@ -39,5 +40,3 @@ EXTRA_PP_COMPONENTS = nsUrlClassifierLib.js \ include $(topsrcdir)/config/rules.mk -export:: $(topsrcdir)/security/nss/lib/freebl/sha512.c - $(INSTALL) $^ . diff --git a/toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp b/toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp index e01f00cf3d2c..f0b8131d43ef 100644 --- a/toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp +++ b/toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp @@ -70,6 +70,7 @@ #include "prlog.h" #include "prlock.h" #include "prprf.h" +#include "prnetdb.h" #include "zlib.h" /** @@ -86,21 +87,17 @@ * section of the protocol document at * http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec * - * A set of fragments is associated with a domain. The domain for a given - * fragment is the three-host-component domain of the fragment (two host - * components for URLs with only two components) with a trailing slash. - * So for the fragments listed above, the domains are example.com/, - * www.example.com/ and mail.example.com/. A collection of fragments for - * a given domain is referred to in this code as an Entry. - * - * Entries are associated with the table from which its fragments came. - * - * Fragments are added to the database in chunks. Each fragment in an entry - * keeps track of which chunk it came from, and as a chunk is added it keeps - * track of which entries contain its fragments. + * A fragment is associated with a domain. The domain for a given + * fragment is the three-host-component domain of the fragment (two + * host components for URLs with only two components) with a trailing + * slash. So for the fragments listed above, the domains are + * example.com/, www.example.com/ and mail.example.com/. * * Fragments and domains are hashed in the database. The hash is described * in the protocol document, but it's basically a truncated SHA256 hash. + * + * A (table, chunk id, domain key, fragment) tuple is referred to as + * an Entry. */ // NSPR_LOG_MODULES=UrlClassifierDbService:5 @@ -127,7 +124,7 @@ static const PRLogModuleInfo *gUrlClassifierDbServiceLog = nsnull; // want to change schema, or to recover from updating bugs. When an // implementation version change is detected, the database is scrapped // and we start over. -#define IMPLEMENTATION_VERSION 1 +#define IMPLEMENTATION_VERSION 2 #define MAX_HOST_COMPONENTS 5 #define MAX_PATH_COMPONENTS 4 @@ -135,8 +132,6 @@ static const PRLogModuleInfo *gUrlClassifierDbServiceLog = nsnull; // Updates will fail if fed chunks larger than this #define MAX_CHUNK_SIZE (1024 * 1024) -#define KEY_LENGTH 16 - // Prefs for implementing nsIURIClassifier to block page loads #define CHECK_MALWARE_PREF "browser.safebrowsing.malware.enabled" #define CHECK_MALWARE_DEFAULT PR_FALSE @@ -159,249 +154,162 @@ static PRBool gShuttingDownThread = PR_FALSE; // ------------------------------------------------------------------------- // Hash class implementation -// A convenience wrapper around the 16-byte hash for a domain or fragment. +// A convenience wrapper around the potentially-truncated hash for a +// domain or fragment. +template struct nsUrlClassifierHash { - PRUint8 buf[KEY_LENGTH]; + static const PRUint32 sHashSize = S; + typedef nsUrlClassifierHash self_type; + PRUint8 buf[S]; - nsresult FromPlaintext(const nsACString& plainText, nsICryptoHash *hash); - void Assign(const nsACString& str); + nsresult FromPlaintext(const nsACString& plainText, nsICryptoHash *hash) { + // From the protocol doc: + // Each entry in the chunk is composed of the 128 most significant bits + // of the SHA 256 hash of a suffix/prefix expression. - const PRBool operator==(const nsUrlClassifierHash& hash) const { + nsresult rv = hash->Init(nsICryptoHash::SHA256); + NS_ENSURE_SUCCESS(rv, rv); + + rv = hash->Update + (reinterpret_cast(plainText.BeginReading()), + plainText.Length()); + NS_ENSURE_SUCCESS(rv, rv); + + nsCAutoString hashed; + rv = hash->Finish(PR_FALSE, hashed); + NS_ENSURE_SUCCESS(rv, rv); + + NS_ASSERTION(hashed.Length() >= sHashSize, + "not enough characters in the hash"); + + memcpy(buf, hashed.BeginReading(), sHashSize); + + return NS_OK; + } + + void Assign(const nsACString& str) { + NS_ASSERTION(str.Length() >= sHashSize, + "string must be at least sHashSize characters long"); + memcpy(buf, str.BeginReading(), sHashSize); + } + + const PRBool operator==(const self_type& hash) const { return (memcmp(buf, hash.buf, sizeof(buf)) == 0); } - const PRBool operator!=(const nsUrlClassifierHash& hash) const { + const PRBool operator!=(const self_type& hash) const { return !(*this == hash); } + const PRBool operator<(const self_type& hash) const { + return memcmp(buf, hash.buf, sizeof(self_type)) < 0; + } + const PRBool StartsWith(const nsUrlClassifierHash& hash) const { + NS_ASSERTION(sHashSize >= PARTIAL_LENGTH, "nsUrlClassifierHash must be at least PARTIAL_LENGTH bytes long"); + return memcmp(buf, hash.buf, PARTIAL_LENGTH) == 0; + } }; -nsresult -nsUrlClassifierHash::FromPlaintext(const nsACString& plainText, - nsICryptoHash *hash) -{ - // From the protocol doc: - // Each entry in the chunk is composed of the 128 most significant bits - // of the SHA 256 hash of a suffix/prefix expression. +typedef nsUrlClassifierHash nsUrlClassifierDomainHash; +typedef nsUrlClassifierHash nsUrlClassifierPartialHash; +typedef nsUrlClassifierHash nsUrlClassifierCompleteHash; - nsresult rv = hash->Init(nsICryptoHash::SHA256); - NS_ENSURE_SUCCESS(rv, rv); - - rv = hash->Update - (reinterpret_cast(plainText.BeginReading()), - plainText.Length()); - NS_ENSURE_SUCCESS(rv, rv); - - nsCAutoString hashed; - rv = hash->Finish(PR_FALSE, hashed); - NS_ENSURE_SUCCESS(rv, rv); - - NS_ASSERTION(hashed.Length() >= KEY_LENGTH, - "not enough characters in the hash"); - - memcpy(buf, hashed.BeginReading(), KEY_LENGTH); - - return NS_OK; -} - -void -nsUrlClassifierHash::Assign(const nsACString& str) -{ - NS_ASSERTION(str.Length() >= KEY_LENGTH, - "string must be at least KEY_LENGTH characters long"); - memcpy(buf, str.BeginReading(), KEY_LENGTH); -} // ------------------------------------------------------------------------- // Entry class implementation -// This class represents one entry in the classifier database. It is a list -// of fragments and their associated chunks for a given key/table pair. +// This class represents one entry in the classifier database. It consists +// of a table id, a chunk id, a domain hash, and a partial or complete hash. class nsUrlClassifierEntry { public: - nsUrlClassifierEntry() : mId(0) {} + nsUrlClassifierEntry() + : mId(0) + , mHavePartial(PR_FALSE) + , mHaveComplete(PR_FALSE) + , mTableId(0) + , mChunkId(0) + , mAddChunkId(0) + {} ~nsUrlClassifierEntry() {} - // Read an entry from a database statement - PRBool ReadStatement(mozIStorageStatement* statement); + // Check that this entry could potentially match the complete hash. + PRBool Match(const nsUrlClassifierCompleteHash &hash); - // Prepare a statement to write this entry to the database - nsresult BindStatement(mozIStorageStatement* statement); - - // Add a single fragment associated with a given chunk - PRBool AddFragment(const nsUrlClassifierHash& hash, PRUint32 chunkNum); - - // Add all the fragments in a given entry to this entry - PRBool Merge(const nsUrlClassifierEntry& entry); - - // Remove all fragments in a given entry from this entry. Fragments that - // are found in this entry will be removed from the argument's entry, - // fragments that are not found will be left in the argument's entry. - // Will return TRUE if any fragments were subtracted. - PRBool SubtractFragments(nsUrlClassifierEntry& entry); - - // Remove all fragments associated with a given chunk - PRBool SubtractChunk(PRUint32 chunkNum); - - // Check if there is a fragment with this hash in the entry - PRBool HasFragment(const nsUrlClassifierHash& hash); + // Check that the sub entry should apply to this entry. + PRBool SubMatch(const nsUrlClassifierEntry& sub); // Clear out the entry structure void Clear(); - PRBool IsEmpty() { return mFragments.Length() == 0; } + // Set the partial hash for this domain. + void SetHash(const nsUrlClassifierPartialHash &partialHash) { + mPartialHash = partialHash; + mHavePartial = PR_TRUE; + } + + // Set the complete hash for this domain. + void SetHash(const nsUrlClassifierCompleteHash &completeHash) { + mCompleteHash = completeHash; + mHaveComplete = PR_TRUE; + } + + PRBool operator== (const nsUrlClassifierEntry& entry) const { + return ! (mTableId != entry.mTableId || + mChunkId != entry.mChunkId || + mHavePartial != entry.mHavePartial || + (mHavePartial && mPartialHash != entry.mPartialHash) || + mHaveComplete != entry.mHaveComplete || + (mHaveComplete && mCompleteHash != entry.mCompleteHash)); + } + + PRBool operator< (const nsUrlClassifierEntry& entry) const { + return (mTableId < entry.mTableId || + mChunkId < entry.mChunkId || + mHavePartial && !entry.mHavePartial || + (mHavePartial && mPartialHash < entry.mPartialHash) || + mHaveComplete && !entry.mHaveComplete || + (mHaveComplete && mCompleteHash < entry.mCompleteHash)); + } - nsUrlClassifierHash mKey; PRUint32 mId; + + nsUrlClassifierDomainHash mKey; + + PRBool mHavePartial; + nsUrlClassifierPartialHash mPartialHash; + + PRBool mHaveComplete; + nsUrlClassifierCompleteHash mCompleteHash; + PRUint32 mTableId; - -private: - // Add all the fragments from a database blob - PRBool AddFragments(const PRUint8* blob, PRUint32 blobLength); - - // One hash/chunkID pair in the fragment - struct Fragment { - nsUrlClassifierHash hash; - PRUint32 chunkNum; - - PRInt32 Diff(const Fragment& fragment) const { - PRInt32 cmp = memcmp(hash.buf, fragment.hash.buf, sizeof(hash.buf)); - if (cmp != 0) return cmp; - return chunkNum - fragment.chunkNum; - } - - PRBool operator==(const Fragment& fragment) const { - return (Diff(fragment) == 0); - } - - PRBool operator<(const Fragment& fragment) const { - return (Diff(fragment) < 0); - } - }; - - nsTArray mFragments; + PRUint32 mChunkId; + PRUint32 mAddChunkId; }; PRBool -nsUrlClassifierEntry::ReadStatement(mozIStorageStatement* statement) +nsUrlClassifierEntry::Match(const nsUrlClassifierCompleteHash &hash) { - mId = statement->AsInt32(0); + if (mHaveComplete) + return mCompleteHash == hash; - PRUint32 size; - const PRUint8* blob = statement->AsSharedBlob(1, &size); - if (!blob || (size != KEY_LENGTH)) - return PR_FALSE; - memcpy(mKey.buf, blob, KEY_LENGTH); + if (mHavePartial) + return hash.StartsWith(mPartialHash); - blob = statement->AsSharedBlob(2, &size); - if (!AddFragments(blob, size)) + return PR_FALSE; +} + +PRBool +nsUrlClassifierEntry::SubMatch(const nsUrlClassifierEntry &subEntry) +{ + if ((mTableId != subEntry.mTableId) || (mChunkId != subEntry.mAddChunkId)) return PR_FALSE; - mTableId = statement->AsInt32(3); + if (subEntry.mHaveComplete) + return mHaveComplete && mCompleteHash == subEntry.mCompleteHash; - return PR_TRUE; -} - -nsresult -nsUrlClassifierEntry::BindStatement(mozIStorageStatement* statement) -{ - nsresult rv; - - if (mId == 0) - rv = statement->BindNullParameter(0); - else - rv = statement->BindInt32Parameter(0, mId); - NS_ENSURE_SUCCESS(rv, rv); - - rv = statement->BindBlobParameter(1, mKey.buf, KEY_LENGTH); - NS_ENSURE_SUCCESS(rv, rv); - - // Store the entries as one big blob. - // This results in a database that isn't portable between machines. - rv = statement->BindBlobParameter - (2, reinterpret_cast(mFragments.Elements()), - mFragments.Length() * sizeof(Fragment)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = statement->BindInt32Parameter(3, mTableId); - NS_ENSURE_SUCCESS(rv, rv); - - return PR_TRUE; -} - -PRBool -nsUrlClassifierEntry::AddFragment(const nsUrlClassifierHash& hash, - PRUint32 chunkNum) -{ - Fragment* fragment = mFragments.AppendElement(); - if (!fragment) - return PR_FALSE; - - fragment->hash = hash; - fragment->chunkNum = chunkNum; - - return PR_TRUE; -} - -PRBool -nsUrlClassifierEntry::AddFragments(const PRUint8* blob, PRUint32 blobLength) -{ - NS_ASSERTION(blobLength % sizeof(Fragment) == 0, - "Fragment blob not the right length"); - Fragment* fragment = mFragments.AppendElements - (reinterpret_cast(blob), blobLength / sizeof(Fragment)); - return (fragment != nsnull); -} - -PRBool -nsUrlClassifierEntry::Merge(const nsUrlClassifierEntry& entry) -{ - Fragment* fragment = mFragments.AppendElements(entry.mFragments); - return (fragment != nsnull); -} - -PRBool -nsUrlClassifierEntry::SubtractFragments(nsUrlClassifierEntry& entry) -{ - PRBool foundFragments = PR_FALSE; - - for (PRUint32 i = 0; i < entry.mFragments.Length(); i++) { - for (PRUint32 j = 0; j < mFragments.Length(); j++) { - if (mFragments[j].hash == entry.mFragments[i].hash) { - mFragments.RemoveElementAt(j); - entry.mFragments.RemoveElementAt(i--); - foundFragments = PR_TRUE; - break; - } - } - } - - return foundFragments; -} - -PRBool -nsUrlClassifierEntry::SubtractChunk(PRUint32 chunkNum) -{ - PRUint32 i = 0; - while (i < mFragments.Length()) { - if (mFragments[i].chunkNum == chunkNum) - mFragments.RemoveElementAt(i); - else - i++; - } - - return PR_TRUE; -} - -PRBool -nsUrlClassifierEntry::HasFragment(const nsUrlClassifierHash& hash) -{ - for (PRUint32 i = 0; i < mFragments.Length(); i++) { - const Fragment& fragment = mFragments[i]; - if (fragment.hash == hash) - return PR_TRUE; - } + if (subEntry.mHavePartial) + return mHavePartial && mPartialHash == subEntry.mPartialHash; return PR_FALSE; } @@ -410,9 +318,48 @@ void nsUrlClassifierEntry::Clear() { mId = 0; - mFragments.Clear(); + mHavePartial = PR_FALSE; + mHaveComplete = PR_FALSE; } +// ------------------------------------------------------------------------- +// Lookup result class implementation + +// This helper class wraps a nsUrlClassifierEntry found during a lookup. +class nsUrlClassifierLookupResult +{ +public: + nsUrlClassifierLookupResult() : mConfirmed(PR_FALSE) {} + ~nsUrlClassifierLookupResult() {} + + PRBool operator==(const nsUrlClassifierLookupResult &result) const { + // Don't need to compare table name, it's contained by id in the entry. + return (mLookupFragment == result.mLookupFragment && + mConfirmed == result.mConfirmed && + mEntry == result.mEntry); + } + + PRBool operator<(const nsUrlClassifierLookupResult &result) const { + // Don't need to compare table name, it's contained by id in the entry. + return (mLookupFragment < result.mLookupFragment || + mConfirmed < result.mConfirmed || + mEntry < result.mEntry); + } + + // The hash that matched this entry. + nsUrlClassifierCompleteHash mLookupFragment; + + // The entry that was found during the lookup. + nsUrlClassifierEntry mEntry; + + // TRUE if the lookup matched a complete hash (not just a partial + // one). + PRPackedBool mConfirmed; + + // The table name associated with mEntry.mTableId. + nsCString mTableName; +}; + // ------------------------------------------------------------------------- // Store class implementation @@ -421,24 +368,36 @@ class nsUrlClassifierStore { public: nsUrlClassifierStore() {} - ~nsUrlClassifierStore() {} + virtual ~nsUrlClassifierStore() {} // Initialize the statements for the store. nsresult Init(nsUrlClassifierDBServiceWorker *worker, mozIStorageConnection *connection, - const nsACString& entriesTableName, - const nsACString& chunksTableName); - + const nsACString& entriesTableName); // Shut down the store. void Close(); - // Read the entry for a given key/table from the database - nsresult ReadEntry(const nsUrlClassifierHash& key, - PRUint32 tableId, - nsUrlClassifierEntry& entry); + // Read an entry from a database statement + virtual PRBool ReadStatement(mozIStorageStatement* statement, + nsUrlClassifierEntry& entry); + + // Prepare a statement to write this entry to the database + virtual nsresult BindStatement(const nsUrlClassifierEntry& entry, + mozIStorageStatement* statement); + + // Read the entries for a given key/table from the database + nsresult ReadEntries(const nsUrlClassifierDomainHash& key, + PRUint32 tableId, + nsTArray& entry); + + // Read the entries for a given key/table/chunk from the database + nsresult ReadEntries(const nsUrlClassifierDomainHash& key, + PRUint32 tableId, + PRUint32 chunkId, + nsTArray& entry); // Read the entry with a given ID from the database - nsresult ReadEntry(PRUint32 id, nsUrlClassifierEntry& entry); + nsresult ReadEntry(PRUint32 id, nsUrlClassifierEntry& entry, PRBool *exists); // Remove an entry from the database nsresult DeleteEntry(nsUrlClassifierEntry& entry); @@ -446,12 +405,6 @@ public: // Write an entry to the database nsresult WriteEntry(nsUrlClassifierEntry& entry); - // Associate a list of entries in the database with a given table and - // chunk. - nsresult SetChunkEntries(PRUint32 tableId, - PRUint32 chunkNum, - nsTArray &entryIds); - // Remove all entries for a given table/chunk pair from the database. nsresult Expire(PRUint32 tableId, PRUint32 chunkNum); @@ -459,27 +412,26 @@ public: // Retrieve the lookup statement for this table. mozIStorageStatement *LookupStatement() { return mLookupStatement; } -private: +protected: + nsresult ReadEntries(mozIStorageStatement *statement, + nsTArray& entries); nsUrlClassifierDBServiceWorker *mWorker; nsCOMPtr mConnection; nsCOMPtr mLookupStatement; nsCOMPtr mLookupWithTableStatement; + nsCOMPtr mLookupWithChunkStatement; nsCOMPtr mLookupWithIDStatement; nsCOMPtr mUpdateStatement; nsCOMPtr mDeleteStatement; - - nsCOMPtr mAddChunkEntriesStatement; - nsCOMPtr mGetChunkEntriesStatement; - nsCOMPtr mDeleteChunkEntriesStatement; + nsCOMPtr mExpireStatement; }; nsresult nsUrlClassifierStore::Init(nsUrlClassifierDBServiceWorker *worker, mozIStorageConnection *connection, - const nsACString& entriesName, - const nsACString& chunksName) + const nsACString& entriesName) { mWorker = worker; mConnection = connection; @@ -503,9 +455,9 @@ nsUrlClassifierStore::Init(nsUrlClassifierDBServiceWorker *worker, NS_ENSURE_SUCCESS(rv, rv); rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("INSERT OR REPLACE INTO ") + entriesName + - NS_LITERAL_CSTRING(" VALUES (?1, ?2, ?3, ?4)"), - getter_AddRefs(mUpdateStatement)); + (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName + + NS_LITERAL_CSTRING(" WHERE domain=?1 AND table_id=?2 AND chunk_id=?3"), + getter_AddRefs(mLookupWithChunkStatement)); NS_ENSURE_SUCCESS(rv, rv); rv = mConnection->CreateStatement @@ -515,22 +467,9 @@ nsUrlClassifierStore::Init(nsUrlClassifierDBServiceWorker *worker, NS_ENSURE_SUCCESS(rv, rv); rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("INSERT OR REPLACE INTO ") + chunksName + - NS_LITERAL_CSTRING(" VALUES (?1, ?2, ?3)"), - getter_AddRefs(mAddChunkEntriesStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT entries FROM ") + chunksName + - NS_LITERAL_CSTRING(" WHERE chunk_id = ?1 AND table_id = ?2"), - getter_AddRefs(mGetChunkEntriesStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("DELETE FROM ") + chunksName + + (NS_LITERAL_CSTRING("DELETE FROM ") + entriesName + NS_LITERAL_CSTRING(" WHERE table_id=?1 AND chunk_id=?2"), - getter_AddRefs(mDeleteChunkEntriesStatement)); - NS_ENSURE_SUCCESS(rv, rv); + getter_AddRefs(mExpireStatement)); return NS_OK; } @@ -541,40 +480,323 @@ nsUrlClassifierStore::Close() mLookupStatement = nsnull; mLookupWithTableStatement = nsnull; mLookupWithIDStatement = nsnull; + mLookupWithChunkStatement = nsnull; mUpdateStatement = nsnull; mDeleteStatement = nsnull; - - mAddChunkEntriesStatement = nsnull; - mGetChunkEntriesStatement = nsnull; - mDeleteChunkEntriesStatement = nsnull; + mExpireStatement = nsnull; mConnection = nsnull; } -nsresult -nsUrlClassifierStore::SetChunkEntries(PRUint32 tableId, - PRUint32 chunkNum, - nsTArray &entryIDs) + +PRBool +nsUrlClassifierStore::ReadStatement(mozIStorageStatement* statement, + nsUrlClassifierEntry& entry) { - mozStorageStatementScoper scoper(mAddChunkEntriesStatement); - nsresult rv = mAddChunkEntriesStatement->BindInt32Parameter(0, chunkNum); + entry.mId = statement->AsInt32(0); + + PRUint32 size; + const PRUint8* blob = statement->AsSharedBlob(1, &size); + if (!blob || (size != DOMAIN_LENGTH)) + return PR_FALSE; + memcpy(entry.mKey.buf, blob, DOMAIN_LENGTH); + + blob = statement->AsSharedBlob(2, &size); + if (!blob || size == 0) { + entry.mHavePartial = PR_FALSE; + } else { + if (size != PARTIAL_LENGTH) + return PR_FALSE; + entry.mHavePartial = PR_TRUE; + memcpy(entry.mPartialHash.buf, blob, PARTIAL_LENGTH); + } + + blob = statement->AsSharedBlob(3, &size); + if (!blob || size == 0) { + entry.mHaveComplete = PR_FALSE; + } else { + if (size != COMPLETE_LENGTH) + return PR_FALSE; + entry.mHaveComplete = PR_TRUE; + memcpy(entry.mCompleteHash.buf, blob, COMPLETE_LENGTH); + } + + // If we only have a partial entry, and that partial entry matches the + // domain, we don't save the extra copy to the database. + if (!(entry.mHavePartial || entry.mHaveComplete)) { + entry.SetHash(entry.mKey); + } + + entry.mChunkId = statement->AsInt32(4); + entry.mTableId = statement->AsInt32(5); + + return PR_TRUE; +} + +nsresult +nsUrlClassifierStore::BindStatement(const nsUrlClassifierEntry &entry, + mozIStorageStatement* statement) +{ + nsresult rv; + + if (entry.mId == 0) + rv = statement->BindNullParameter(0); + else + rv = statement->BindInt32Parameter(0, entry.mId); NS_ENSURE_SUCCESS(rv, rv); - mAddChunkEntriesStatement->BindInt32Parameter(1, tableId); + rv = statement->BindBlobParameter(1, entry.mKey.buf, DOMAIN_LENGTH); NS_ENSURE_SUCCESS(rv, rv); - mAddChunkEntriesStatement->BindBlobParameter - (2, - reinterpret_cast(entryIDs.Elements()), - entryIDs.Length() * sizeof(PRUint32)); + if (entry.mHavePartial) { + // If we only have a partial entry and that entry matches the domain, + // we'll save some space by only storing the domain hash. + if (!entry.mHaveComplete && entry.mKey == entry.mPartialHash) { + rv = statement->BindNullParameter(2); + } else { + rv = statement->BindBlobParameter(2, entry.mPartialHash.buf, + PARTIAL_LENGTH); + } + } else { + rv = statement->BindNullParameter(2); + } + NS_ENSURE_SUCCESS(rv, rv); - rv = mAddChunkEntriesStatement->Execute(); + if (entry.mHaveComplete) { + rv = statement->BindBlobParameter(3, entry.mCompleteHash.buf, COMPLETE_LENGTH); + } else { + rv = statement->BindNullParameter(3); + } + NS_ENSURE_SUCCESS(rv, rv); + + rv = statement->BindInt32Parameter(4, entry.mChunkId); + NS_ENSURE_SUCCESS(rv, rv); + + rv = statement->BindInt32Parameter(5, entry.mTableId); + NS_ENSURE_SUCCESS(rv, rv); + + return PR_TRUE; +} + +nsresult +nsUrlClassifierStore::ReadEntries(mozIStorageStatement *statement, + nsTArray& entries) +{ + PRBool exists; + nsresult rv = statement->ExecuteStep(&exists); + NS_ENSURE_SUCCESS(rv, rv); + + while (exists) { + nsUrlClassifierEntry *entry = entries.AppendElement(); + if (!entry) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!ReadStatement(statement, *entry)) + return NS_ERROR_FAILURE; + + statement->ExecuteStep(&exists); + } + + return NS_OK; +} + +nsresult +nsUrlClassifierStore::ReadEntries(const nsUrlClassifierDomainHash& hash, + PRUint32 tableId, + nsTArray& entries) +{ + mozStorageStatementScoper scoper(mLookupWithTableStatement); + + nsresult rv = mLookupWithTableStatement->BindBlobParameter + (0, hash.buf, DOMAIN_LENGTH); + NS_ENSURE_SUCCESS(rv, rv); + rv = mLookupWithTableStatement->BindInt32Parameter(1, tableId); + NS_ENSURE_SUCCESS(rv, rv); + + return ReadEntries(mLookupWithTableStatement, entries); +} + +nsresult +nsUrlClassifierStore::ReadEntries(const nsUrlClassifierDomainHash& hash, + PRUint32 tableId, + PRUint32 chunkId, + nsTArray& entries) +{ + mozStorageStatementScoper scoper(mLookupWithChunkStatement); + + nsresult rv = mLookupWithChunkStatement->BindBlobParameter + (0, hash.buf, DOMAIN_LENGTH); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mLookupWithChunkStatement->BindInt32Parameter(1, tableId); + NS_ENSURE_SUCCESS(rv, rv); + rv = mLookupWithChunkStatement->BindInt32Parameter(2, chunkId); + NS_ENSURE_SUCCESS(rv, rv); + + return ReadEntries(mLookupWithChunkStatement, entries); +} + +nsresult +nsUrlClassifierStore::ReadEntry(PRUint32 id, + nsUrlClassifierEntry& entry, + PRBool *exists) +{ + entry.Clear(); + + mozStorageStatementScoper scoper(mLookupWithIDStatement); + + nsresult rv = mLookupWithIDStatement->BindInt32Parameter(0, id); + NS_ENSURE_SUCCESS(rv, rv); + rv = mLookupWithIDStatement->BindInt32Parameter(0, id); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mLookupWithIDStatement->ExecuteStep(exists); + NS_ENSURE_SUCCESS(rv, rv); + + if (*exists) { + if (ReadStatement(mLookupWithIDStatement, entry)) + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +// ------------------------------------------------------------------------- +// nsUrlClassifierAddStore class implementation + +// This class accesses the moz_classifier table. +class nsUrlClassifierAddStore: public nsUrlClassifierStore +{ +public: + nsUrlClassifierAddStore() {}; + virtual ~nsUrlClassifierAddStore() {}; + + nsresult Init(nsUrlClassifierDBServiceWorker *worker, + mozIStorageConnection *connection, + const nsACString& entriesTableName); +}; + +nsresult +nsUrlClassifierAddStore::Init(nsUrlClassifierDBServiceWorker *worker, + mozIStorageConnection *connection, + const nsACString &entriesTableName) +{ + nsresult rv = nsUrlClassifierStore::Init(worker, connection, + entriesTableName); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mConnection->CreateStatement + (NS_LITERAL_CSTRING("INSERT OR REPLACE INTO ") + entriesTableName + + NS_LITERAL_CSTRING(" VALUES (?1, ?2, ?3, ?4, ?5, ?6)"), + getter_AddRefs(mUpdateStatement)); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; } +// ------------------------------------------------------------------------- +// nsUrlClassifierSubStore class implementation + +// This class accesses the moz_subs table. +class nsUrlClassifierSubStore : public nsUrlClassifierStore +{ +public: + nsUrlClassifierSubStore() {}; + virtual ~nsUrlClassifierSubStore() {}; + + nsresult Init(nsUrlClassifierDBServiceWorker *worker, + mozIStorageConnection *connection, + const nsACString& entriesTableName); + + void Close(); + + // Read an entry from a database statement + virtual PRBool ReadStatement(mozIStorageStatement* statement, + nsUrlClassifierEntry& entry); + + // Prepare a statement to write this entry to the database + virtual nsresult BindStatement(const nsUrlClassifierEntry& entry, + mozIStorageStatement* statement); + + // Read a sub entry that would apply to the given add + nsresult ReadSubEntries(const nsUrlClassifierEntry &addEntry, + nsTArray &subEntry); + +protected: + nsCOMPtr mLookupWithAddChunkStatement; +}; + +nsresult +nsUrlClassifierSubStore::Init(nsUrlClassifierDBServiceWorker *worker, + mozIStorageConnection *connection, + const nsACString &entriesTableName) +{ + nsresult rv = nsUrlClassifierStore::Init(worker, connection, + entriesTableName); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mConnection->CreateStatement + (NS_LITERAL_CSTRING("INSERT OR REPLACE INTO ") + entriesTableName + + NS_LITERAL_CSTRING(" VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"), + getter_AddRefs(mUpdateStatement)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mConnection->CreateStatement + (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesTableName + + NS_LITERAL_CSTRING(" WHERE domain=?1 AND table_id=?2 AND add_chunk_id=?3"), + getter_AddRefs(mLookupWithAddChunkStatement)); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +PRBool +nsUrlClassifierSubStore::ReadStatement(mozIStorageStatement* statement, + nsUrlClassifierEntry& entry) +{ + if (!nsUrlClassifierStore::ReadStatement(statement, entry)) + return PR_FALSE; + + entry.mAddChunkId = statement->AsInt32(6); + return PR_TRUE; +} + +nsresult +nsUrlClassifierSubStore::BindStatement(const nsUrlClassifierEntry& entry, + mozIStorageStatement* statement) +{ + nsresult rv = nsUrlClassifierStore::BindStatement(entry, statement); + NS_ENSURE_SUCCESS(rv, rv); + + return statement->BindInt32Parameter(6, entry.mAddChunkId); +} + +nsresult +nsUrlClassifierSubStore::ReadSubEntries(const nsUrlClassifierEntry &addEntry, + nsTArray& entries) +{ + mozStorageStatementScoper scoper(mLookupWithAddChunkStatement); + + nsresult rv = mLookupWithAddChunkStatement->BindBlobParameter + (0, addEntry.mKey.buf, DOMAIN_LENGTH); + NS_ENSURE_SUCCESS(rv, rv); + rv = mLookupWithAddChunkStatement->BindInt32Parameter(1, addEntry.mTableId); + NS_ENSURE_SUCCESS(rv, rv); + rv = mLookupWithAddChunkStatement->BindInt32Parameter(2, addEntry.mChunkId); + NS_ENSURE_SUCCESS(rv, rv); + + return ReadEntries(mLookupWithAddChunkStatement, entries); +} + +void +nsUrlClassifierSubStore::Close() +{ + nsUrlClassifierStore::Close(); + mLookupWithAddChunkStatement = nsnull; +} + // ------------------------------------------------------------------------- // Actual worker implemenatation class nsUrlClassifierDBServiceWorker : public nsIUrlClassifierDBServiceWorker @@ -591,7 +813,7 @@ public: // Queue a lookup for the worker to perform, called in the main thread. nsresult QueueLookup(const nsACString& lookupKey, - nsIUrlClassifierCallback* callback); + nsIUrlClassifierLookupCallback* callback); // Handle any queued-up lookups. We call this function during long-running // update operations to prevent lookups from blocking for too long. @@ -616,10 +838,21 @@ private: // Decompress a zlib'ed chunk (used for -exp tables) nsresult InflateChunk(nsACString& chunk); + // Expand shavar chunk into its individual entries + nsresult GetShaEntries(PRUint32 tableId, + PRUint32 chunkType, + PRUint32 chunkNum, + PRUint32 domainSize, + PRUint32 hashSize, + nsACString& chunk, + nsTArray& entries); + // Expand a chunk into its individual entries nsresult GetChunkEntries(const nsACString& table, PRUint32 tableId, + PRUint32 chunkType, PRUint32 chunkNum, + PRUint32 hashSize, nsACString& chunk, nsTArray& entries); @@ -650,6 +883,10 @@ private: nsresult CacheChunkLists(PRUint32 tableId, PRBool parseAdds, PRBool parseSubs); + + // Clear the cached list of add/subtract chunks. + void ClearCachedChunkLists(); + // Flush the cached add/subtract lists to the database. nsresult FlushChunkLists(); @@ -683,7 +920,7 @@ private: // expand it into the set of fragments that should be searched for in an // entry nsresult GetLookupFragments(const nsCSubstring& spec, - nsTArray& fragments); + nsTArray& fragments); // Check for a canonicalized IP address. PRBool IsCanonicalizedIP(const nsACString& host); @@ -693,16 +930,16 @@ private: // hostname.com/foo/bar -> hostname.com // mail.hostname.com/foo/bar -> mail.hostname.com // www.mail.hostname.com/foo/bar -> mail.hostname.com - nsresult GetKey(const nsACString& spec, nsUrlClassifierHash& hash); + nsresult GetKey(const nsACString& spec, nsUrlClassifierDomainHash& hash); // Look for a given lookup string (www.hostname.com/path/to/resource.html) - // in the entries at the given key. Return the tableids found. + // in the entries at the given key. Returns a list of entries that match. nsresult CheckKey(const nsCSubstring& spec, - const nsUrlClassifierHash& key, - nsTArray& tables); + const nsUrlClassifierDomainHash& key, + nsTArray& results); // Perform a classifier lookup for a given url. - nsresult DoLookup(const nsACString& spec, nsIUrlClassifierCallback* c); + nsresult DoLookup(const nsACString& spec, nsIUrlClassifierLookupCallback* c); nsCOMPtr mDBFile; @@ -715,10 +952,10 @@ private: // The main collection of entries. This is the store that will be checked // when classifying a URL. - nsUrlClassifierStore mMainStore; + nsUrlClassifierAddStore mMainStore; // The collection of subs waiting for their accompanying add. - nsUrlClassifierStore mPendingSubStore; + nsUrlClassifierSubStore mPendingSubStore; nsCOMPtr mGetChunkListsStatement; nsCOMPtr mSetChunkListsStatement; @@ -745,6 +982,7 @@ private: } mChunkType; PRUint32 mChunkNum; + PRUint32 mHashSize; PRUint32 mChunkLen; nsCString mUpdateTable; @@ -775,7 +1013,7 @@ private: class PendingLookup { public: nsCString mKey; - nsCOMPtr mCallback; + nsCOMPtr mCallback; }; // list of pending lookups @@ -786,7 +1024,14 @@ NS_IMPL_THREADSAFE_ISUPPORTS1(nsUrlClassifierDBServiceWorker, nsIUrlClassifierDBServiceWorker) nsUrlClassifierDBServiceWorker::nsUrlClassifierDBServiceWorker() - : mUpdateStatus(NS_OK) + : mUpdateWait(0) + , mState(STATE_LINE) + , mChunkType(CHUNK_ADD) + , mChunkNum(0) + , mHashSize(0) + , mChunkLen(0) + , mUpdateTableId(0) + , mUpdateStatus(NS_OK) , mInStream(PR_FALSE) , mPrimaryStream(PR_FALSE) , mHaveCachedLists(PR_FALSE) @@ -837,7 +1082,7 @@ nsUrlClassifierDBServiceWorker::Init() nsresult nsUrlClassifierDBServiceWorker::QueueLookup(const nsACString& spec, - nsIUrlClassifierCallback* callback) + nsIUrlClassifierLookupCallback* callback) { nsAutoLock lock(mPendingLookupLock); @@ -852,7 +1097,7 @@ nsUrlClassifierDBServiceWorker::QueueLookup(const nsACString& spec, nsresult nsUrlClassifierDBServiceWorker::GetLookupFragments(const nsACString& spec, - nsTArray& fragments) + nsTArray& fragments) { fragments.Clear(); @@ -940,7 +1185,7 @@ nsUrlClassifierDBServiceWorker::GetLookupFragments(const nsACString& spec, key.Append(*paths[pathIndex]); LOG(("Chking %s", key.get())); - nsUrlClassifierHash* hash = fragments.AppendElement(); + nsUrlClassifierCompleteHash* hash = fragments.AppendElement(); if (!hash) return NS_ERROR_OUT_OF_MEMORY; hash->FromPlaintext(key, mCryptoHash); } @@ -951,16 +1196,16 @@ nsUrlClassifierDBServiceWorker::GetLookupFragments(const nsACString& spec, nsresult nsUrlClassifierDBServiceWorker::CheckKey(const nsACString& spec, - const nsUrlClassifierHash& hash, - nsTArray& tables) + const nsUrlClassifierDomainHash& hash, + nsTArray& results) { mozStorageStatementScoper lookupScoper(mMainStore.LookupStatement()); nsresult rv = mMainStore.LookupStatement()->BindBlobParameter - (0, hash.buf, KEY_LENGTH); + (0, hash.buf, DOMAIN_LENGTH); NS_ENSURE_SUCCESS(rv, rv); - nsTArray fragments; + nsTArray fragments; PRBool haveFragments = PR_FALSE; PRBool exists; @@ -974,12 +1219,27 @@ nsUrlClassifierDBServiceWorker::CheckKey(const nsACString& spec, } nsUrlClassifierEntry entry; - if (!entry.ReadStatement(mMainStore.LookupStatement())) + if (!mMainStore.ReadStatement(mMainStore.LookupStatement(), entry)) return NS_ERROR_FAILURE; for (PRUint32 i = 0; i < fragments.Length(); i++) { - if (entry.HasFragment(fragments[i])) { - tables.AppendElement(entry.mTableId); + if (entry.Match(fragments[i])) { + // If the entry doesn't contain a complete hash, we need to + // save it here so that it can be compared against the + // complete hash. However, we don't set entry.mHaveComplete + // because it isn't a verified part of the entry yet. + nsUrlClassifierLookupResult *result = results.AppendElement(); + if (!result) + return NS_ERROR_OUT_OF_MEMORY; + + result->mLookupFragment = fragments[i]; + result->mEntry = entry; + // This is a confirmed result if we matched a complete + // fragment. + result->mConfirmed = entry.mHaveComplete; + + // Fill in the table name. + GetTableName(entry.mTableId, result->mTableName); break; } } @@ -1004,16 +1264,16 @@ nsUrlClassifierDBServiceWorker::CheckKey(const nsACString& spec, */ nsresult nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, - nsIUrlClassifierCallback* c) + nsIUrlClassifierLookupCallback* c) { if (gShuttingDownThread) { - c->HandleEvent(EmptyCString()); + c->LookupComplete(nsnull); return NS_ERROR_NOT_INITIALIZED; } nsresult rv = OpenDb(); if (NS_FAILED(rv)) { - c->HandleEvent(EmptyCString()); + c->LookupComplete(nsnull); return NS_ERROR_FAILURE; } @@ -1035,20 +1295,29 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, const nsCSubstring& host = Substring(begin, iter++); - nsTArray resultTables; - nsUrlClassifierHash hash; + nsAutoPtr > results; + results = new nsTArray(); + if (!results) { + c->LookupComplete(nsnull); + return NS_ERROR_OUT_OF_MEMORY; + } + + nsUrlClassifierDomainHash hash; if (IsCanonicalizedIP(host)) { // Don't break up the host into components - hash.FromPlaintext(host, mCryptoHash); - CheckKey(spec, hash, resultTables); + nsCAutoString lookupHost; + lookupHost.Assign(host); + lookupHost.Append("/"); + hash.FromPlaintext(lookupHost, mCryptoHash); + CheckKey(spec, hash, *results); } else { nsCStringArray hostComponents; hostComponents.ParseString(PromiseFlatCString(host).get(), "."); if (hostComponents.Count() < 2) { // no host or toplevel host, this won't match anything in the db - c->HandleEvent(EmptyCString()); + c->LookupComplete(nsnull); return NS_OK; } @@ -1063,7 +1332,7 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, // we ignore failures from CheckKey because we'd rather try to find // more results than fail. - CheckKey(spec, hash, resultTables); + CheckKey(spec, hash, *results); // Now check with three domain components if (hostComponents.Count() > 2) { @@ -1073,23 +1342,10 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, lookupHost2.Append(lookupHost); hash.FromPlaintext(lookupHost2, mCryptoHash); - CheckKey(spec, hash, resultTables); + CheckKey(spec, hash, *results); } } - nsCAutoString result; - for (PRUint32 i = 0; i < resultTables.Length(); i++) { - nsCAutoString tableName; - GetTableName(resultTables[i], tableName); - - // ignore GetTableName failures - we want to try to get as many of the - // matched tables as possible - if (!result.IsEmpty()) { - result.Append(','); - } - result.Append(tableName); - } - #if defined(PR_LOGGING) if (LOG_ENABLED()) { PRIntervalTime clockEnd = PR_IntervalNow(); @@ -1098,7 +1354,8 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, } #endif - c->HandleEvent(result); + // At this point ownership of 'results' is handed to the callback. + c->LookupComplete(results.forget()); return NS_OK; } @@ -1123,8 +1380,7 @@ nsUrlClassifierDBServiceWorker::HandlePendingLookups() // Lookup a key in the db. NS_IMETHODIMP nsUrlClassifierDBServiceWorker::Lookup(const nsACString& spec, - nsIUrlClassifierCallback* c, - PRBool needsProxy) + nsIUrlClassifierCallback* c) { return HandlePendingLookups(); } @@ -1280,63 +1536,6 @@ nsUrlClassifierDBServiceWorker::InflateChunk(nsACString& chunk) return NS_OK; } -nsresult -nsUrlClassifierStore::ReadEntry(const nsUrlClassifierHash& hash, - PRUint32 tableId, - nsUrlClassifierEntry& entry) -{ - entry.Clear(); - - mozStorageStatementScoper scoper(mLookupWithTableStatement); - - nsresult rv = mLookupWithTableStatement->BindBlobParameter - (0, hash.buf, KEY_LENGTH); - NS_ENSURE_SUCCESS(rv, rv); - rv = mLookupWithTableStatement->BindInt32Parameter(1, tableId); - NS_ENSURE_SUCCESS(rv, rv); - - PRBool exists; - rv = mLookupWithTableStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - - if (exists) { - if (!entry.ReadStatement(mLookupWithTableStatement)) - return NS_ERROR_FAILURE; - } else { - // New entry, initialize it - entry.mKey = hash; - entry.mTableId = tableId; - } - - return NS_OK; -} - -nsresult -nsUrlClassifierStore::ReadEntry(PRUint32 id, - nsUrlClassifierEntry& entry) -{ - entry.Clear(); - entry.mId = id; - - mozStorageStatementScoper scoper(mLookupWithIDStatement); - - nsresult rv = mLookupWithIDStatement->BindInt32Parameter(0, id); - NS_ENSURE_SUCCESS(rv, rv); - rv = mLookupWithIDStatement->BindInt32Parameter(0, id); - NS_ENSURE_SUCCESS(rv, rv); - - PRBool exists; - rv = mLookupWithIDStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - - if (exists) { - if (!entry.ReadStatement(mLookupWithIDStatement)) - return NS_ERROR_FAILURE; - } - - return NS_OK; -} - nsresult nsUrlClassifierStore::DeleteEntry(nsUrlClassifierEntry& entry) { @@ -1359,13 +1558,9 @@ nsUrlClassifierStore::WriteEntry(nsUrlClassifierEntry& entry) { mozStorageStatementScoper scoper(mUpdateStatement); - if (entry.IsEmpty()) { - return DeleteEntry(entry); - } - PRBool newEntry = (entry.mId == 0); - nsresult rv = entry.BindStatement(mUpdateStatement); + nsresult rv = BindStatement(entry, mUpdateStatement); NS_ENSURE_SUCCESS(rv, rv); rv = mUpdateStatement->Execute(); @@ -1403,7 +1598,7 @@ nsUrlClassifierDBServiceWorker::IsCanonicalizedIP(const nsACString& host) nsresult nsUrlClassifierDBServiceWorker::GetKey(const nsACString& spec, - nsUrlClassifierHash& hash) + nsUrlClassifierDomainHash& hash) { nsACString::const_iterator begin, end, iter; spec.BeginReading(begin); @@ -1417,7 +1612,10 @@ nsUrlClassifierDBServiceWorker::GetKey(const nsACString& spec, const nsCSubstring& host = Substring(begin, iter); if (IsCanonicalizedIP(host)) { - return hash.FromPlaintext(host, mCryptoHash); + nsCAutoString key; + key.Assign(host); + key.Append("/"); + return hash.FromPlaintext(key, mCryptoHash); } nsCStringArray hostComponents; @@ -1442,10 +1640,113 @@ nsUrlClassifierDBServiceWorker::GetKey(const nsACString& spec, return hash.FromPlaintext(lookupHost, mCryptoHash); } +nsresult +nsUrlClassifierDBServiceWorker::GetShaEntries(PRUint32 tableId, + PRUint32 chunkType, + PRUint32 chunkNum, + PRUint32 domainSize, + PRUint32 fragmentSize, + nsACString& chunk, + nsTArray& entries) +{ + PRUint32 start = 0; + while (start + domainSize + 1 <= chunk.Length()) { + nsUrlClassifierDomainHash domain; + domain.Assign(Substring(chunk, start, DOMAIN_LENGTH)); + start += domainSize; + + // then there is a one-byte count of fragments + PRUint8 numEntries = static_cast(chunk[start]); + start++; + + if (numEntries == 0) { + // if there are no fragments, the domain itself is treated as a + // fragment. This will only work if domainHashSize == hashSize + if (domainSize != fragmentSize) { + NS_WARNING("Received 0-fragment entry where domainSize != fragmentSize"); + return NS_ERROR_FAILURE; + } + + nsUrlClassifierEntry* entry = entries.AppendElement(); + if (!entry) return NS_ERROR_OUT_OF_MEMORY; + + entry->mKey = domain; + entry->mTableId = tableId; + entry->mChunkId = chunkNum; + entry->SetHash(domain); + + if (chunkType == CHUNK_SUB) { + if (start + 4 > chunk.Length()) { + // there isn't as much data as there should be. + NS_WARNING("Received a zero-entry sub chunk without an associated add."); + return NS_ERROR_FAILURE; + } + const nsCSubstring& str = Substring(chunk, start, 4); + const PRUint32 *p = reinterpret_cast(str.BeginReading()); + entry->mAddChunkId = PR_ntohl(*p); + if (entry->mAddChunkId == 0) { + NS_WARNING("Received invalid chunk number."); + return NS_ERROR_FAILURE; + } + start += 4; + } + } else { + PRUint32 entrySize = fragmentSize; + if (chunkType == CHUNK_SUB) { + entrySize += 4; + } + if (start + (numEntries * entrySize) > chunk.Length()) { + // there isn't as much data as they said there would be. + NS_WARNING("Received a chunk without enough data"); + return NS_ERROR_FAILURE; + } + + for (PRUint8 i = 0; i < numEntries; i++) { + nsUrlClassifierEntry* entry = entries.AppendElement(); + if (!entry) return NS_ERROR_OUT_OF_MEMORY; + + entry->mKey = domain; + entry->mTableId = tableId; + entry->mChunkId = chunkNum; + + if (chunkType == CHUNK_SUB) { + const nsCSubstring& str = Substring(chunk, start, 4); + const PRUint32 *p = reinterpret_cast(str.BeginReading()); + entry->mAddChunkId = PR_ntohl(*p); + if (entry->mAddChunkId == 0) { + NS_WARNING("Received invalid chunk number."); + return NS_ERROR_FAILURE; + } + start += 4; + } + + if (fragmentSize == PARTIAL_LENGTH) { + nsUrlClassifierPartialHash hash; + hash.Assign(Substring(chunk, start, PARTIAL_LENGTH)); + entry->SetHash(hash); + } else if (fragmentSize == COMPLETE_LENGTH) { + nsUrlClassifierCompleteHash hash; + hash.Assign(Substring(chunk, start, COMPLETE_LENGTH)); + entry->SetHash(hash); + } else { + NS_ASSERTION(PR_FALSE, "Invalid fragment size!"); + return NS_ERROR_FAILURE; + } + + start += fragmentSize; + } + } + } + + return NS_OK; +} + nsresult nsUrlClassifierDBServiceWorker::GetChunkEntries(const nsACString& table, PRUint32 tableId, + PRUint32 chunkType, PRUint32 chunkNum, + PRUint32 hashSize, nsACString& chunk, nsTArray& entries) { @@ -1456,59 +1757,54 @@ nsUrlClassifierDBServiceWorker::GetChunkEntries(const nsACString& table, NS_ENSURE_SUCCESS(rv, rv); } - if (StringEndsWith(table, NS_LITERAL_CSTRING("-sha128"))) { - PRUint32 start = 0; - while (start + KEY_LENGTH + 1 <= chunk.Length()) { - nsUrlClassifierEntry* entry = entries.AppendElement(); - if (!entry) return NS_ERROR_OUT_OF_MEMORY; - - // first 16 bytes are the domain/key - entry->mKey.Assign(Substring(chunk, start, KEY_LENGTH)); - - start += KEY_LENGTH; - // then there is a one-byte count of fragments - PRUint8 numEntries = static_cast(chunk[start]); - start++; - - if (numEntries == 0) { - // if there are no fragments, the domain itself is treated as a - // fragment - entry->AddFragment(entry->mKey, chunkNum); - } else { - if (start + (numEntries * KEY_LENGTH) > chunk.Length()) { - // there isn't as much data as they said there would be. - return NS_ERROR_FAILURE; - } - - for (PRUint8 i = 0; i < numEntries; i++) { - nsUrlClassifierHash hash; - hash.Assign(Substring(chunk, start, KEY_LENGTH)); - entry->AddFragment(hash, chunkNum); - start += KEY_LENGTH; - } - } - } + if (StringEndsWith(table, NS_LITERAL_CSTRING("-shavar"))) { + rv = GetShaEntries(tableId, chunkType, chunkNum, DOMAIN_LENGTH, hashSize, + chunk, entries); + NS_ENSURE_SUCCESS(rv, rv); } else { nsCStringArray lines; lines.ParseString(PromiseFlatCString(chunk).get(), "\n"); - nsUrlClassifierEntry* entry = nsnull; // non-hashed tables need to be hashed for (PRInt32 i = 0; i < lines.Count(); i++) { - nsUrlClassifierHash key; - rv = GetKey(*lines[i], key); - NS_ENSURE_SUCCESS(rv, rv); + nsUrlClassifierEntry *entry = entries.AppendElement(); + if (!entry) + return NS_ERROR_OUT_OF_MEMORY; - if (!entry || key != entry->mKey) { - entry = entries.AppendElement(); - if (!entry) return NS_ERROR_OUT_OF_MEMORY; - entry->mKey = key; + nsCAutoString entryStr; + if (chunkType == CHUNK_SUB) { + nsCString::const_iterator begin, iter, end; + lines[i]->BeginReading(begin); + lines[i]->EndReading(end); + iter = begin; + if (!FindCharInReadable(':', iter, end) || + PR_sscanf(lines[i]->get(), "%d:", &entry->mAddChunkId) != 1) { + NS_WARNING("Received sub chunk without associated add chunk."); + return NS_ERROR_FAILURE; + } + iter++; + entryStr = Substring(iter, end); + } else { + entryStr = *lines[i]; } + rv = GetKey(entryStr, entry->mKey); + NS_ENSURE_SUCCESS(rv, rv); + entry->mTableId = tableId; - nsUrlClassifierHash hash; - hash.FromPlaintext(*lines[i], mCryptoHash); - entry->AddFragment(hash, mChunkNum); + entry->mChunkId = chunkNum; + if (hashSize == PARTIAL_LENGTH) { + nsUrlClassifierPartialHash hash; + hash.FromPlaintext(entryStr, mCryptoHash); + entry->SetHash(hash); + } else if (hashSize == COMPLETE_LENGTH) { + nsUrlClassifierCompleteHash hash; + hash.FromPlaintext(entryStr, mCryptoHash); + entry->SetHash(hash); + } else { + NS_ASSERTION(PR_FALSE, "Invalid fragment size!"); + return NS_ERROR_FAILURE; + } } } @@ -1697,8 +1993,16 @@ nsUrlClassifierDBServiceWorker::FlushChunkLists() nsresult rv = SetChunkLists(mCachedListsTable, mCachedAddsStr, mCachedSubsStr); - // clear out the cache before checking/returning the error here. + // clear out the cache before checking/returning the error here. + ClearCachedChunkLists(); + + return rv; +} + +void +nsUrlClassifierDBServiceWorker::ClearCachedChunkLists() +{ mCachedAddsStr.Truncate(); mCachedSubsStr.Truncate(); mCachedListsTable = PR_UINT32_MAX; @@ -1709,8 +2013,6 @@ nsUrlClassifierDBServiceWorker::FlushChunkLists() mCachedSubChunks.Clear(); mHaveCachedSubChunks = PR_FALSE; - - return rv; } nsresult @@ -1733,50 +2035,46 @@ nsUrlClassifierDBServiceWorker::AddChunk(PRUint32 tableId, nsTArray entryIDs; + nsAutoTArray subEntries; + nsUrlClassifierDomainHash lastKey; + for (PRUint32 i = 0; i < entries.Length(); i++) { nsUrlClassifierEntry& thisEntry = entries[i]; HandlePendingLookups(); - nsUrlClassifierEntry existingEntry; - rv = mPendingSubStore.ReadEntry(thisEntry.mKey, tableId, existingEntry); - NS_ENSURE_SUCCESS(rv, rv); - - if (thisEntry.SubtractFragments(existingEntry)) { - // We've modified this pending subtraction, write it back to the - // pending subs store. - rv = mPendingSubStore.WriteEntry(existingEntry); + if (i == 0 || lastKey != thisEntry.mKey) { + subEntries.Clear(); + rv = mPendingSubStore.ReadSubEntries(thisEntry, subEntries); NS_ENSURE_SUCCESS(rv, rv); + lastKey = thisEntry.mKey; + } - if (thisEntry.IsEmpty()) { - // We removed all the adds from this entry, skip to the next one. - continue; + PRBool writeEntry = PR_TRUE; + for (PRUint32 j = 0; j < subEntries.Length(); j++) { + if (thisEntry.SubMatch(subEntries[j])) { + rv = mPendingSubStore.DeleteEntry(subEntries[j]); + NS_ENSURE_SUCCESS(rv, rv); + subEntries.RemoveElementAt(j); + + writeEntry = PR_FALSE; + break; } } - existingEntry.Clear(); - rv = mMainStore.ReadEntry(thisEntry.mKey, tableId, existingEntry); - NS_ENSURE_SUCCESS(rv, rv); - - if (!existingEntry.Merge(thisEntry)) - return NS_ERROR_FAILURE; - HandlePendingLookups(); - rv = mMainStore.WriteEntry(existingEntry); - NS_ENSURE_SUCCESS(rv, rv); - - entryIDs.AppendElement(existingEntry.mId); + if (writeEntry) { + rv = mMainStore.WriteEntry(thisEntry); + NS_ENSURE_SUCCESS(rv, rv); + } } - rv = mMainStore.SetChunkEntries(tableId, chunkNum, entryIDs); - NS_ENSURE_SUCCESS(rv, rv); - #if defined(PR_LOGGING) if (LOG_ENABLED()) { PRIntervalTime clockEnd = PR_IntervalNow(); - printf("adding chunk %d took %dms\n", chunkNum, - PR_IntervalToMilliseconds(clockEnd - clockStart)); + LOG(("adding chunk %d took %dms\n", chunkNum, + PR_IntervalToMilliseconds(clockEnd - clockStart))); } #endif @@ -1784,53 +2082,20 @@ nsUrlClassifierDBServiceWorker::AddChunk(PRUint32 tableId, } nsresult -nsUrlClassifierStore::Expire(PRUint32 tableId, - PRUint32 chunkNum) +nsUrlClassifierStore::Expire(PRUint32 tableId, PRUint32 chunkNum) { LOG(("Expiring chunk %d\n", chunkNum)); - mozStorageStatementScoper getChunkEntriesScoper(mGetChunkEntriesStatement); + mozStorageStatementScoper expireScoper(mExpireStatement); - nsresult rv = mGetChunkEntriesStatement->BindInt32Parameter(0, chunkNum); + nsresult rv = mExpireStatement->BindInt32Parameter(0, tableId); NS_ENSURE_SUCCESS(rv, rv); - rv = mGetChunkEntriesStatement->BindInt32Parameter(1, tableId); + rv = mExpireStatement->BindInt32Parameter(1, chunkNum); NS_ENSURE_SUCCESS(rv, rv); mWorker->HandlePendingLookups(); - PRBool exists; - rv = mGetChunkEntriesStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - while (exists) { - PRUint32 size; - const PRUint8* blob = mGetChunkEntriesStatement->AsSharedBlob(0, &size); - if (blob) { - const PRUint32* entries = reinterpret_cast(blob); - for (PRUint32 i = 0; i < (size / sizeof(PRUint32)); i++) { - mWorker->HandlePendingLookups(); - - nsUrlClassifierEntry entry; - rv = ReadEntry(entries[i], entry); - NS_ENSURE_SUCCESS(rv, rv); - - entry.SubtractChunk(chunkNum); - - rv = WriteEntry(entry); - NS_ENSURE_SUCCESS(rv, rv); - } - } - - mWorker->HandlePendingLookups(); - rv = mGetChunkEntriesStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - } - - mWorker->HandlePendingLookups(); - - mozStorageStatementScoper removeScoper(mDeleteChunkEntriesStatement); - mDeleteChunkEntriesStatement->BindInt32Parameter(0, tableId); - mDeleteChunkEntriesStatement->BindInt32Parameter(1, chunkNum); - rv = mDeleteChunkEntriesStatement->Execute(); + rv = mExpireStatement->Execute(); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; @@ -1855,47 +2120,38 @@ nsUrlClassifierDBServiceWorker::SubChunk(PRUint32 tableId, nsresult rv = CacheChunkLists(tableId, PR_FALSE, PR_TRUE); mCachedSubChunks.AppendElement(chunkNum); - nsTArray entryIDs; + nsAutoTArray existingEntries; + nsUrlClassifierDomainHash lastKey; for (PRUint32 i = 0; i < entries.Length(); i++) { nsUrlClassifierEntry& thisEntry = entries[i]; HandlePendingLookups(); - nsUrlClassifierEntry existingEntry; - rv = mMainStore.ReadEntry(thisEntry.mKey, tableId, existingEntry); - NS_ENSURE_SUCCESS(rv, rv); - - if (existingEntry.SubtractFragments(thisEntry)) { - // We removed fragments, write the entry back. - rv = mMainStore.WriteEntry(existingEntry); + if (i == 0 || lastKey != thisEntry.mKey) { + existingEntries.Clear(); + rv = mMainStore.ReadEntries(thisEntry.mKey, thisEntry.mTableId, + thisEntry.mAddChunkId, existingEntries); NS_ENSURE_SUCCESS(rv, rv); + lastKey = thisEntry.mKey; } - HandlePendingLookups(); - - if (!thisEntry.IsEmpty()) { - // There are leftover subtracts in this entry. Save them in the - // pending subtraction store. - existingEntry.Clear(); - rv = mPendingSubStore.ReadEntry(thisEntry.mKey, tableId, existingEntry); - NS_ENSURE_SUCCESS(rv, rv); - - if (!existingEntry.Merge(thisEntry)) - return NS_ERROR_FAILURE; - - rv = mPendingSubStore.WriteEntry(existingEntry); - NS_ENSURE_SUCCESS(rv, rv); - - entryIDs.AppendElement(existingEntry.mId); + PRUint32 writeEntry = PR_TRUE; + for (PRUint32 j = 0; j < existingEntries.Length(); j++) { + if (existingEntries[j].SubMatch(thisEntry)) { + rv = mMainStore.DeleteEntry(existingEntries[j]); + NS_ENSURE_SUCCESS(rv, rv); + existingEntries.RemoveElementAt(j); + writeEntry = PR_FALSE; + break; + } } - } - HandlePendingLookups(); - - if (entryIDs.Length() > 0) { - rv = mPendingSubStore.SetChunkEntries(tableId, chunkNum, entryIDs); - NS_ENSURE_SUCCESS(rv, rv); + if (writeEntry) { + // Save this entry in the pending subtraction store. + rv = mPendingSubStore.WriteEntry(thisEntry); + NS_ENSURE_SUCCESS(rv, rv); + } } return NS_OK; @@ -1914,17 +2170,12 @@ nsUrlClassifierDBServiceWorker::ExpireSub(PRUint32 tableId, PRUint32 chunkNum) nsresult nsUrlClassifierDBServiceWorker::ProcessChunk(PRBool* done) { - // wait until the chunk plus terminating \n has been read - if (mPendingStreamUpdate.Length() <= static_cast(mChunkLen)) { + // wait until the chunk has been read + if (mPendingStreamUpdate.Length() < static_cast(mChunkLen)) { *done = PR_TRUE; return NS_OK; } - if (mPendingStreamUpdate[mChunkLen] != '\n') { - LOG(("Didn't get a terminating newline after the chunk, failing the update")); - return NS_ERROR_FAILURE; - } - nsCAutoString chunk; chunk.Assign(Substring(mPendingStreamUpdate, 0, mChunkLen)); mPendingStreamUpdate = Substring(mPendingStreamUpdate, mChunkLen); @@ -1932,7 +2183,8 @@ nsUrlClassifierDBServiceWorker::ProcessChunk(PRBool* done) LOG(("Handling a chunk sized %d", chunk.Length())); nsTArray entries; - nsresult rv = GetChunkEntries(mUpdateTable, mUpdateTableId, mChunkNum, chunk, entries); + nsresult rv = GetChunkEntries(mUpdateTable, mUpdateTableId, mChunkType, + mChunkNum, mHashSize, chunk, entries); NS_ENSURE_SUCCESS(rv, rv); if (mChunkType == CHUNK_ADD) { @@ -1941,9 +2193,6 @@ nsUrlClassifierDBServiceWorker::ProcessChunk(PRBool* done) rv = SubChunk(mUpdateTableId, mChunkNum, entries); } - // pop off the chunk and the trailing \n - mPendingStreamUpdate = Substring(mPendingStreamUpdate, 1); - mState = STATE_LINE; *done = PR_FALSE; @@ -1997,9 +2246,10 @@ nsUrlClassifierDBServiceWorker::ProcessResponseLines(PRBool* done) const nsCSubstring& data = Substring(line, 2); PRInt32 space; if ((space = data.FindChar(' ')) == kNotFound) { - mUpdateObserver->UpdateUrlRequested(data); + mUpdateObserver->UpdateUrlRequested(data, mUpdateTable); } else { - mUpdateObserver->UpdateUrlRequested(Substring(data, 0, space)); + mUpdateObserver->UpdateUrlRequested(Substring(data, 0, space), + mUpdateTable); // The rest is the mac, which we don't support for now } } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("a:")) || @@ -2007,10 +2257,19 @@ nsUrlClassifierDBServiceWorker::ProcessResponseLines(PRBool* done) mState = STATE_CHUNK; char command; if (PR_sscanf(PromiseFlatCString(line).get(), - "%c:%d:%d", &command, &mChunkNum, &mChunkLen) != 3 || - mChunkLen > MAX_CHUNK_SIZE) { + "%c:%d:%d:%d", &command, &mChunkNum, &mHashSize, &mChunkLen) != 4) { return NS_ERROR_FAILURE; } + + if (mChunkLen > MAX_CHUNK_SIZE) { + return NS_ERROR_FAILURE; + } + + if (!(mHashSize == PARTIAL_LENGTH || mHashSize == COMPLETE_LENGTH)) { + NS_WARNING("Invalid hash size specified in update."); + return NS_ERROR_FAILURE; + } + mChunkType = (command == 'a') ? CHUNK_ADD : CHUNK_SUB; // Done parsing lines, move to chunk state now @@ -2053,6 +2312,7 @@ nsUrlClassifierDBServiceWorker::ResetStream() { mState = STATE_LINE; mChunkNum = 0; + mHashSize = 0; mChunkLen = 0; mInStream = PR_FALSE; mPrimaryStream = PR_FALSE; @@ -2068,6 +2328,13 @@ nsUrlClassifierDBServiceWorker::ResetUpdate() mUpdateObserver = nsnull; } +NS_IMETHODIMP +nsUrlClassifierDBServiceWorker::SetHashCompleter(const nsACString &tableName, + nsIUrlClassifierHashCompleter *completer) +{ + return NS_ERROR_NOT_IMPLEMENTED; +} + NS_IMETHODIMP nsUrlClassifierDBServiceWorker::BeginUpdate(nsIUrlClassifierUpdateObserver *observer) { @@ -2111,7 +2378,7 @@ nsUrlClassifierDBServiceWorker::BeginUpdate(nsIUrlClassifierUpdateObserver *obse } NS_IMETHODIMP -nsUrlClassifierDBServiceWorker::BeginStream() +nsUrlClassifierDBServiceWorker::BeginStream(const nsACString &table) { if (gShuttingDownThread) return NS_ERROR_NOT_INITIALIZED; @@ -2121,6 +2388,12 @@ nsUrlClassifierDBServiceWorker::BeginStream() mInStream = PR_TRUE; + if (!table.IsEmpty()) { + mUpdateTable = table; + GetTableId(mUpdateTable, &mUpdateTableId); + LOG(("update table: '%s' (%d)", mUpdateTable.get(), mUpdateTableId)); + } + return NS_OK; } @@ -2247,6 +2520,8 @@ nsUrlClassifierDBServiceWorker::FinishUpdate() NS_IMETHODIMP nsUrlClassifierDBServiceWorker::ResetDatabase() { + ClearCachedChunkLists(); + nsresult rv = CloseDb(); NS_ENSURE_SUCCESS(rv, rv); @@ -2263,6 +2538,7 @@ nsUrlClassifierDBServiceWorker::CancelUpdate() if (mUpdateObserver) { mUpdateStatus = NS_BINDING_ABORTED; + ClearCachedChunkLists(); mConnection->RollbackTransaction(); mUpdateObserver->UpdateError(mUpdateStatus); @@ -2376,13 +2652,11 @@ nsUrlClassifierDBServiceWorker::OpenDb() NS_ENSURE_SUCCESS(rv, rv); rv = mMainStore.Init(this, connection, - NS_LITERAL_CSTRING("moz_classifier"), - NS_LITERAL_CSTRING("moz_chunks")); + NS_LITERAL_CSTRING("moz_classifier")); NS_ENSURE_SUCCESS(rv, rv); rv = mPendingSubStore.Init(this, connection, - NS_LITERAL_CSTRING("moz_subs"), - NS_LITERAL_CSTRING("moz_sub_chunks")); + NS_LITERAL_CSTRING("moz_subs")); rv = connection->CreateStatement (NS_LITERAL_CSTRING("SELECT add_chunks, sub_chunks FROM moz_tables" @@ -2438,53 +2712,45 @@ nsUrlClassifierDBServiceWorker::MaybeCreateTables(mozIStorageConnection* connect NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_classifier" " (id INTEGER PRIMARY KEY," " domain BLOB," - " data BLOB," + " partial_data BLOB," + " complete_data BLOB," + " chunk_id INTEGER," " table_id INTEGER)")); NS_ENSURE_SUCCESS(rv, rv); rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE UNIQUE INDEX IF NOT EXISTS" + NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" " moz_classifier_domain_index" - " ON moz_classifier(domain, table_id)")); + " ON moz_classifier(domain)")); NS_ENSURE_SUCCESS(rv, rv); rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_chunks" - " (chunk_id INTEGER," - " table_id INTEGER," - " entries BLOB)")); + NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" + " moz_classifier_chunk_index" + " ON moz_classifier(chunk_id)")); NS_ENSURE_SUCCESS(rv, rv); - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS moz_chunks_id" - " ON moz_chunks(chunk_id)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_subs" " (id INTEGER PRIMARY KEY," " domain BLOB," - " data BLOB," - " table_id INTEGER)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE UNIQUE INDEX IF NOT EXISTS" - " moz_subs_domain_index" - " ON moz_subs(domain, table_id)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_sub_chunks" - " (chunk_id INTEGER," + " partial_data BLOB," + " complete_data BLOB," + " chunk_id INTEGER," " table_id INTEGER," - " entries BLOB)")); + " add_chunk_id INTEGER)")); NS_ENSURE_SUCCESS(rv, rv); rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS moz_sub_chunks_id" - " ON moz_sub_chunks(chunk_id)")); + NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" + " moz_subs_domain_index" + " ON moz_subs(domain)")); + NS_ENSURE_SUCCESS(rv, rv); + + rv = connection->ExecuteSimpleSQL( + NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" + " moz_subs_chunk_index" + " ON moz_subs(chunk_id)")); NS_ENSURE_SUCCESS(rv, rv); rv = connection->ExecuteSimpleSQL( @@ -2498,6 +2764,170 @@ nsUrlClassifierDBServiceWorker::MaybeCreateTables(mozIStorageConnection* connect return rv; } +// ------------------------------------------------------------------------- +// nsUrlClassifierLookupCallback +// +// This class takes the results of a lookup found on the worker thread +// and handles any necessary partial hash expansions before calling +// the client callback. + +class nsUrlClassifierLookupCallback : public nsIUrlClassifierLookupCallback + , public nsIUrlClassifierHashCompleterCallback +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIURLCLASSIFIERLOOKUPCALLBACK + NS_DECL_NSIURLCLASSIFIERHASHCOMPLETERCALLBACK + + nsUrlClassifierLookupCallback(nsUrlClassifierDBService *dbservice, + nsIUrlClassifierCallback *c) + : mDBService(dbservice) + , mResults(nsnull) + , mPendingCompletions(0) + , mCallback(c) + {} + +private: + nsresult HandleResults(); + + nsRefPtr mDBService; + nsAutoPtr > mResults; + PRUint32 mPendingCompletions; + nsCOMPtr mCallback; +}; + +NS_IMPL_THREADSAFE_ISUPPORTS2(nsUrlClassifierLookupCallback, + nsIUrlClassifierLookupCallback, + nsIUrlClassifierHashCompleterCallback) + +NS_IMETHODIMP +nsUrlClassifierLookupCallback::LookupComplete(nsTArray* results) +{ + NS_ASSERTION(mResults == nsnull, + "Should only get one set of results per nsUrlClassifierLookupCallback!"); + + if (!results) { + HandleResults(); + return NS_OK; + } + + mResults = results; + + // Check the results for partial matches. Partial matches will need to be + // completed. + for (PRUint32 i = 0; i < results->Length(); i++) { + nsUrlClassifierLookupResult& result = results->ElementAt(i); + if (!result.mConfirmed) { + nsCOMPtr completer; + if (mDBService->GetCompleter(result.mTableName, + getter_AddRefs(completer))) { + nsCAutoString partialHash; + partialHash.Assign(reinterpret_cast(result.mEntry.mPartialHash.buf), + PARTIAL_LENGTH); + + nsresult rv = completer->Complete(partialHash, this); + if (NS_SUCCEEDED(rv)) { + mPendingCompletions++; + } + } else { + NS_WARNING("Partial match in a table without a valid completer, ignoring partial match."); + } + } + } + + if (mPendingCompletions == 0) { + // All results were complete, we're ready! + HandleResults(); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierLookupCallback::CompletionFinished(nsresult status) +{ + LOG(("nsUrlClassifierLookupCallback::CompletionFinished [%p, %08x]", + this, status)); + if (NS_FAILED(status)) { + NS_WARNING("gethash response failed."); + } + + mPendingCompletions--; + if (mPendingCompletions == 0) { + HandleResults(); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierLookupCallback::Completion(const nsACString& completeHash, + const nsACString& tableName, + PRUint32 chunkId) +{ + LOG(("nsUrlClassifierLookupCallback::Completion [%p]", this)); + nsUrlClassifierCompleteHash hash; + hash.Assign(completeHash); + + for (PRUint32 i = 0; i < mResults->Length(); i++) { + nsUrlClassifierLookupResult& result = mResults->ElementAt(i); + + if (!result.mEntry.mHaveComplete && + hash.StartsWith(result.mEntry.mPartialHash) && + // XXX: We really want to be comparing the table name to make + // sure it matches. Due to a short-lived server bug, they + // won't just yet. This should be fixed as soon as the server is. +#if 0 + result.mTableName == tableName && +#endif + result.mEntry.mChunkId == chunkId) { + // We have a completion for this entry. Fill it in... + result.mEntry.SetHash(hash); + + // ... and make sure that it was the entry we were looking for. + if (result.mLookupFragment == hash) + result.mConfirmed = PR_TRUE; + + // XXX: Schedule this result for caching. + } + } + + return NS_OK; +} + +nsresult +nsUrlClassifierLookupCallback::HandleResults() +{ + if (!mResults) { + // No results, this URI is clean. + return mCallback->HandleEvent(NS_LITERAL_CSTRING("")); + } + + // Build a stringified list of result tables. + mResults->Sort(); + PRUint32 lastTableId = 0; + nsCAutoString tables; + for (PRUint32 i = 0; i < mResults->Length(); i++) { + nsUrlClassifierLookupResult& result = mResults->ElementAt(i); + // Leave out results that weren't confirmed, as their existence on + // the list can't be verified. + if (!result.mConfirmed) + continue; + + if (tables.Length() > 0) { + if (lastTableId == result.mEntry.mTableId) + continue; + tables.Append(","); + } + + tables.Append(result.mTableName); + lastTableId = result.mEntry.mTableId; + } + + return mCallback->HandleEvent(tables); +} + + // ------------------------------------------------------------------------- // Helper class for nsIURIClassifier implementation, translates table names // to nsIURIClassifier enums. @@ -2605,9 +3035,6 @@ nsUrlClassifierDBService::~nsUrlClassifierDBService() nsresult nsUrlClassifierDBService::Init() { - NS_ASSERTION(sizeof(nsUrlClassifierHash) == KEY_LENGTH, - "nsUrlClassifierHash must be KEY_LENGTH bytes long!"); - #if defined(PR_LOGGING) if (!gUrlClassifierDbServiceLog) gUrlClassifierDbServiceLog = PR_NewLogModule("UrlClassifierDbService"); @@ -2663,6 +3090,8 @@ nsUrlClassifierDBService::Init() getter_AddRefs(mWorkerProxy)); NS_ENSURE_SUCCESS(rv, rv); + mCompleters.Init(); + // Add an observer for shutdown nsCOMPtr observerService = do_GetService("@mozilla.org/observer-service;1"); @@ -2691,7 +3120,7 @@ nsUrlClassifierDBService::Classify(nsIURI *uri, new nsUrlClassifierClassifyCallback(c, mCheckMalware, mCheckPhishing); if (!callback) return NS_ERROR_OUT_OF_MEMORY; - nsresult rv = LookupURI(uri, callback, PR_TRUE); + nsresult rv = LookupURI(uri, callback); if (rv == NS_ERROR_MALFORMED_URI) { // The URI had no hostname, don't try to classify it. *result = PR_FALSE; @@ -2704,8 +3133,7 @@ nsUrlClassifierDBService::Classify(nsIURI *uri, NS_IMETHODIMP nsUrlClassifierDBService::Lookup(const nsACString& spec, - nsIUrlClassifierCallback* c, - PRBool needsProxy) + nsIUrlClassifierCallback* c) { NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED); @@ -2719,13 +3147,12 @@ nsUrlClassifierDBService::Lookup(const nsACString& spec, return NS_ERROR_FAILURE; } - return LookupURI(uri, c, needsProxy); + return LookupURI(uri, c); } nsresult nsUrlClassifierDBService::LookupURI(nsIURI* uri, - nsIUrlClassifierCallback* c, - PRBool needsProxy) + nsIUrlClassifierCallback* c) { NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED); @@ -2736,25 +3163,29 @@ nsUrlClassifierDBService::LookupURI(nsIURI* uri, nsresult rv = utilsService->GetKeyForURI(uri, key); NS_ENSURE_SUCCESS(rv, rv); - nsCOMPtr proxyCallback; - if (needsProxy) { - // The proxy callback uses the current thread. - rv = NS_GetProxyForObject(NS_PROXY_TO_CURRENT_THREAD, - NS_GET_IID(nsIUrlClassifierCallback), - c, - NS_PROXY_ASYNC, - getter_AddRefs(proxyCallback)); - NS_ENSURE_SUCCESS(rv, rv); - } else { - proxyCallback = c; - } + // Create an nsUrlClassifierLookupCallback object. This object will + // take care of confirming partial hash matches if necessary before + // calling the client's callback. + nsCOMPtr callback = + new nsUrlClassifierLookupCallback(this, c); + if (!callback) + return NS_ERROR_OUT_OF_MEMORY; + + nsCOMPtr proxyCallback; + // The proxy callback uses the current thread. + rv = NS_GetProxyForObject(NS_PROXY_TO_CURRENT_THREAD, + NS_GET_IID(nsIUrlClassifierLookupCallback), + callback, + NS_PROXY_ASYNC, + getter_AddRefs(proxyCallback)); + NS_ENSURE_SUCCESS(rv, rv); // Queue this lookup and call the lookup function to flush the queue if // necessary. rv = mWorker->QueueLookup(key, proxyCallback); NS_ENSURE_SUCCESS(rv, rv); - return mWorkerProxy->Lookup(EmptyCString(), nsnull, PR_FALSE); + return mWorkerProxy->Lookup(EmptyCString(), nsnull); } NS_IMETHODIMP @@ -2775,6 +3206,21 @@ nsUrlClassifierDBService::GetTables(nsIUrlClassifierCallback* c) return mWorkerProxy->GetTables(proxyCallback); } +NS_IMETHODIMP +nsUrlClassifierDBService::SetHashCompleter(const nsACString &tableName, + nsIUrlClassifierHashCompleter *completer) +{ + if (completer) { + if (!mCompleters.Put(tableName, completer)) { + return NS_ERROR_OUT_OF_MEMORY; + } + } else { + mCompleters.Remove(tableName); + } + + return NS_OK; +} + NS_IMETHODIMP nsUrlClassifierDBService::BeginUpdate(nsIUrlClassifierUpdateObserver *observer) { @@ -2800,11 +3246,11 @@ nsUrlClassifierDBService::BeginUpdate(nsIUrlClassifierUpdateObserver *observer) } NS_IMETHODIMP -nsUrlClassifierDBService::BeginStream() +nsUrlClassifierDBService::BeginStream(const nsACString &table) { NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED); - return mWorkerProxy->BeginStream(); + return mWorkerProxy->BeginStream(table); } NS_IMETHODIMP @@ -2888,6 +3334,8 @@ nsUrlClassifierDBService::Shutdown() if (!gDbBackgroundThread) return NS_OK; + mCompleters.Clear(); + nsCOMPtr prefs = do_GetService(NS_PREFSERVICE_CONTRACTID); if (prefs) { prefs->RemoveObserver(CHECK_MALWARE_PREF, this); diff --git a/toolkit/components/url-classifier/src/nsUrlClassifierDBService.h b/toolkit/components/url-classifier/src/nsUrlClassifierDBService.h index af9a09896a5a..f4d829ccf561 100644 --- a/toolkit/components/url-classifier/src/nsUrlClassifierDBService.h +++ b/toolkit/components/url-classifier/src/nsUrlClassifierDBService.h @@ -43,9 +43,21 @@ #include #include "nsID.h" +#include "nsInterfaceHashtable.h" #include "nsIObserver.h" +#include "nsIUrlClassifierHashCompleter.h" #include "nsIUrlClassifierDBService.h" #include "nsIURIClassifier.h" +#include "nsToolkitCompsCID.h" + +// The hash length for a domain key. +#define DOMAIN_LENGTH 4 + +// The hash length of a partial hash entry. +#define PARTIAL_LENGTH 4 + +// The hash length of a complete hash entry. +#define COMPLETE_LENGTH 32 class nsUrlClassifierDBServiceWorker; @@ -74,17 +86,20 @@ public: NS_DECL_NSIURICLASSIFIER NS_DECL_NSIOBSERVER + PRBool GetCompleter(const nsACString& tableName, + nsIUrlClassifierHashCompleter** completer) { + return mCompleters.Get(tableName, completer); + } + private: // No subclassing ~nsUrlClassifierDBService(); - nsresult LookupURI(nsIURI* uri, - nsIUrlClassifierCallback* c, - PRBool needsProxy); - // Disallow copy constructor nsUrlClassifierDBService(nsUrlClassifierDBService&); + nsresult LookupURI(nsIURI* uri, nsIUrlClassifierCallback* c); + // Make sure the event queue is intialized before we use it. void EnsureThreadStarted(); @@ -94,6 +109,8 @@ private: nsCOMPtr mWorker; nsCOMPtr mWorkerProxy; + nsInterfaceHashtable mCompleters; + // TRUE if the nsURIClassifier implementation should check for malware // uris on document loads. PRBool mCheckMalware; diff --git a/toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.cpp b/toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.cpp new file mode 100644 index 000000000000..69576b5a8968 --- /dev/null +++ b/toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.cpp @@ -0,0 +1,493 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-/ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Mozilla Corporation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsUrlClassifierHashCompleter.h" +#include "nsIChannel.h" +#include "nsIHttpChannel.h" +#include "nsIObserverService.h" +#include "nsIUploadChannel.h" +#include "nsNetUtil.h" +#include "nsStreamUtils.h" +#include "nsStringStream.h" +#include "nsServiceManagerUtils.h" +#include "nsThreadUtils.h" +#include "nsUrlClassifierDBService.h" +#include "prlog.h" +#include "prprf.h" + +// NSPR_LOG_MODULES=UrlClassifierHashCompleter:5 +#if defined(PR_LOGGING) +static const PRLogModuleInfo *gUrlClassifierHashCompleterLog = nsnull; +#define LOG(args) PR_LOG(gUrlClassifierHashCompleterLog, PR_LOG_DEBUG, args) +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierHashCompleterLog, 4) +#else +#define LOG(args) +#define LOG_ENABLED() (PR_FALSE) +#endif + +NS_IMPL_ISUPPORTS3(nsUrlClassifierHashCompleterRequest, + nsIRequestObserver, + nsIStreamListener, + nsIObserver) + +nsresult +nsUrlClassifierHashCompleterRequest::Begin() +{ + LOG(("nsUrlClassifierHashCompleterRequest::Begin [%p]", this)); + nsCOMPtr observerService = + do_GetService("@mozilla.org/observer-service;1"); + if (observerService) + observerService->AddObserver(this, NS_XPCOM_SHUTDOWN_OBSERVER_ID, PR_FALSE); + + nsresult rv = OpenChannel(); + if (NS_FAILED(rv)) { + NotifyFailure(rv); + return rv; + } + + return NS_OK; +} + +nsresult +nsUrlClassifierHashCompleterRequest::Add(const nsACString& partialHash, + nsIUrlClassifierHashCompleterCallback *c) +{ + LOG(("nsUrlClassifierHashCompleterRequest::Add [%p]", this)); + Request *request = mRequests.AppendElement(); + if (!request) + return NS_ERROR_OUT_OF_MEMORY; + + request->partialHash = partialHash; + request->callback = c; + + return NS_OK; +} + + +nsresult +nsUrlClassifierHashCompleterRequest::OpenChannel() +{ + LOG(("nsUrlClassifierHashCompleterRequest::OpenChannel [%p]", this)); + nsresult rv; + + rv = NS_NewChannel(getter_AddRefs(mChannel), mURI); + NS_ENSURE_SUCCESS(rv, rv); + + nsCAutoString requestBody; + rv = BuildRequest(requestBody); + NS_ENSURE_SUCCESS(rv, rv); + + rv = AddRequestBody(requestBody); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mChannel->AsyncOpen(this, nsnull); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +nsUrlClassifierHashCompleterRequest::BuildRequest(nsCAutoString &aRequestBody) +{ + LOG(("nsUrlClassifierHashCompleterRequest::BuildRequest [%p]", this)); + + nsCAutoString body; + for (PRUint32 i = 0; i < mRequests.Length(); i++) { + Request &request = mRequests[i]; + body.Append(request.partialHash); + } + + aRequestBody.AppendInt(PARTIAL_LENGTH); + aRequestBody.Append(':'); + aRequestBody.AppendInt(body.Length()); + aRequestBody.Append('\n'); + aRequestBody.Append(body); + + return NS_OK; +} + +nsresult +nsUrlClassifierHashCompleterRequest::AddRequestBody(const nsACString &aRequestBody) +{ + LOG(("nsUrlClassifierHashCompleterRequest::AddRequestBody [%p]", this)); + + nsresult rv; + nsCOMPtr strStream = + do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + rv = strStream->SetData(aRequestBody.BeginReading(), + aRequestBody.Length()); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr uploadChannel = do_QueryInterface(mChannel, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + rv = uploadChannel->SetUploadStream(strStream, + NS_LITERAL_CSTRING("text/plain"), + -1); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr httpChannel = do_QueryInterface(mChannel, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + rv = httpChannel->SetRequestMethod(NS_LITERAL_CSTRING("POST")); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +nsUrlClassifierHashCompleterRequest::HandleItem(const nsACString& item, + const nsACString& tableName, + PRUint32 chunkId) +{ + // If this item matches any of the requested partial hashes, add them + // to the response. + for (PRUint32 i = 0; i < mRequests.Length(); i++) { + Request &request = mRequests[i]; + if (StringBeginsWith(item, request.partialHash)) { + Response *response = request.responses.AppendElement(); + if (!response) + return NS_ERROR_OUT_OF_MEMORY; + response->completeHash = item; + response->tableName = tableName; + response->chunkId = chunkId; + } + } + + return NS_OK; +} + + +/** + * Reads one table of results from the response. Leaves begin pointing at the + * next table. + */ +nsresult +nsUrlClassifierHashCompleterRequest::HandleTable(const nsACString& response, + nsACString::const_iterator& begin) +{ + nsACString::const_iterator iter, end; + iter = begin; + response.EndReading(end); + if (!FindCharInReadable(':', iter, end)) { + // No table line. + NS_WARNING("Received badly-formatted gethash response."); + return NS_ERROR_FAILURE; + } + + const nsCSubstring& tableName = Substring(begin, iter); + iter++; + begin = iter; + + if (!FindCharInReadable('\n', iter, end)) { + // Unterminated header line. + NS_WARNING("Received badly-formatted gethash response."); + return NS_ERROR_FAILURE; + } + + const nsCSubstring& remaining = Substring(begin, iter); + iter++; + begin = iter; + + PRUint32 chunkId; + PRInt32 size; + if (PR_sscanf(PromiseFlatCString(remaining).get(), + "%u:%d", &chunkId, &size) != 2) { + NS_WARNING("Received badly-formatted gethash response."); + return NS_ERROR_FAILURE; + } + + if (size % COMPLETE_LENGTH != 0) { + NS_WARNING("Unexpected gethash response length"); + return NS_ERROR_FAILURE; + } + + // begin now refers to the hash data. + + if (begin.size_forward() < size) { + NS_WARNING("Response does not match the expected response length."); + return NS_ERROR_FAILURE; + } + + for (PRInt32 i = 0; i < (size / COMPLETE_LENGTH); i++) { + // Read the complete hash. + iter.advance(COMPLETE_LENGTH); + + nsresult rv = HandleItem(Substring(begin, iter), tableName, chunkId); + NS_ENSURE_SUCCESS(rv, rv); + + begin = iter; + } + + // begin now points at the end of the hash data. + + return NS_OK; +} + +nsresult +nsUrlClassifierHashCompleterRequest::HandleResponse() +{ + if (mResponse.IsEmpty()) { + // Empty response, we're done. + return NS_OK; + } + + nsCString::const_iterator begin, end; + mResponse.BeginReading(begin); + mResponse.EndReading(end); + + while (begin != end) { + nsresult rv = HandleTable(mResponse, begin); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +void +nsUrlClassifierHashCompleterRequest::NotifySuccess() +{ + LOG(("nsUrlClassifierHashCompleterRequest::NotifySuccess [%p]", this)); + + for (PRUint32 i = 0; i < mRequests.Length(); i++) { + Request &request = mRequests[i]; + + for (PRUint32 j = 0; j < request.responses.Length(); j++) { + Response &response = request.responses[j]; + request.callback->Completion(response.completeHash, + response.tableName, + response.chunkId); + } + + request.callback->CompletionFinished(NS_OK); + } +} + +void +nsUrlClassifierHashCompleterRequest::NotifyFailure(nsresult status) +{ + LOG(("nsUrlClassifierHashCompleterRequest::NotifyFailure [%p]", this)); + + for (PRUint32 i = 0; i < mRequests.Length(); i++) { + Request &request = mRequests[i]; + request.callback->CompletionFinished(status); + } +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleterRequest::OnStartRequest(nsIRequest *request, + nsISupports *context) +{ + LOG(("nsUrlClassifierHashCompleter::OnStartRequest [%p]", this)); + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleterRequest::OnDataAvailable(nsIRequest *request, + nsISupports *context, + nsIInputStream *stream, + PRUint32 sourceOffset, + PRUint32 length) +{ + LOG(("nsUrlClassifierHashCompleter::OnDataAvailable [%p]", this)); + + if (mShuttingDown) + return NS_ERROR_ABORT; + + nsCAutoString piece; + nsresult rv = NS_ConsumeStream(stream, length, piece); + NS_ENSURE_SUCCESS(rv, rv); + + mResponse.Append(piece); + + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleterRequest::OnStopRequest(nsIRequest *request, + nsISupports *context, + nsresult status) +{ + LOG(("nsUrlClassifierHashCompleter::OnStopRequest [%p, status=%d]", + this, status)); + + nsCOMPtr observerService = + do_GetService("@mozilla.org/observer-service;1"); + if (observerService) + observerService->RemoveObserver(this, NS_XPCOM_SHUTDOWN_OBSERVER_ID); + + if (mShuttingDown) + return NS_ERROR_ABORT; + + if (NS_SUCCEEDED(status)) { + nsCOMPtr channel = do_QueryInterface(request); + if (channel) { + PRBool success; + status = channel->GetRequestSucceeded(&success); + if (NS_SUCCEEDED(status) && !success) { + status = NS_ERROR_ABORT; + } + } + } + + if (NS_SUCCEEDED(status)) + status = HandleResponse(); + + if (NS_SUCCEEDED(status)) + NotifySuccess(); + else + NotifyFailure(status); + + mChannel = nsnull; + + return NS_OK; +} + + +NS_IMETHODIMP +nsUrlClassifierHashCompleterRequest::Observe(nsISupports *subject, + const char *topic, + const PRUnichar *data) +{ + if (!strcmp(topic, NS_XPCOM_SHUTDOWN_OBSERVER_ID)) { + mShuttingDown = PR_TRUE; + if (mChannel) + mChannel->Cancel(NS_ERROR_ABORT); + } + + return NS_OK; +} + +NS_IMPL_ISUPPORTS4(nsUrlClassifierHashCompleter, + nsIUrlClassifierHashCompleter, + nsIRunnable, + nsIObserver, + nsISupportsWeakReference) + +nsresult +nsUrlClassifierHashCompleter::Init() +{ +#if defined(PR_LOGGING) + if (!gUrlClassifierHashCompleterLog) + gUrlClassifierHashCompleterLog = PR_NewLogModule("UrlClassifierHashCompleter"); +#endif + + nsCOMPtr observerService = + do_GetService("@mozilla.org/observer-service;1"); + if (observerService) + observerService->AddObserver(this, NS_XPCOM_SHUTDOWN_OBSERVER_ID, PR_TRUE); + + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleter::Complete(const nsACString &partialHash, + nsIUrlClassifierHashCompleterCallback *c) +{ + LOG(("nsUrlClassifierHashCompleter::Complete [%p]", this)); + + if (mShuttingDown) + return NS_ERROR_NOT_INITIALIZED; + + if (!mURI) { + NS_WARNING("Trying to use nsUrlClassifierHashCompleter without setting the gethash URI."); + return NS_ERROR_NOT_INITIALIZED; + } + + // We batch all of the requested completions in a single request until the + // next time we reach the main loop. + if (!mRequest) { + mRequest = new nsUrlClassifierHashCompleterRequest(mURI); + if (!mRequest) { + return NS_ERROR_OUT_OF_MEMORY; + } + + // Schedule ourselves to start this request on the main loop. + NS_DispatchToCurrentThread(this); + } + + return mRequest->Add(partialHash, c); +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleter::SetGethashUrl(const nsACString &url) +{ + return NS_NewURI(getter_AddRefs(mURI), url); +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleter::GetGethashUrl(nsACString &url) +{ + url.Truncate(); + if (mURI) { + return mURI->GetSpec(url); + } + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleter::Run() +{ + LOG(("nsUrlClassifierHashCompleter::Run [%p]\n", this)); + + if (mShuttingDown) { + mRequest = nsnull; + return NS_ERROR_NOT_INITIALIZED; + } + + if (!mRequest) + return NS_OK; + + // Dispatch the http request. + nsresult rv = mRequest->Begin(); + mRequest = nsnull; + return rv; +} + +NS_IMETHODIMP +nsUrlClassifierHashCompleter::Observe(nsISupports *subject, const char *topic, + const PRUnichar *data) +{ + if (!strcmp(topic, NS_XPCOM_SHUTDOWN_OBSERVER_ID)) { + mShuttingDown = PR_TRUE; + } + + return NS_OK; +} diff --git a/toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.h b/toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.h new file mode 100644 index 000000000000..4a193cdd46f6 --- /dev/null +++ b/toolkit/components/url-classifier/src/nsUrlClassifierHashCompleter.h @@ -0,0 +1,129 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-/ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Mozilla Corporation. + * Portions created by the Initial Developer are Copyright (C) 2008 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef nsUrlClassifierHashCompleter_h_ +#define nsUrlClassifierHashCompleter_h_ + +#include "nsIUrlClassifierHashCompleter.h" + +#include "nsAutoPtr.h" +#include "nsCOMPtr.h" +#include "nsIChannel.h" +#include "nsIObserver.h" +#include "nsIRunnable.h" +#include "nsIStreamListener.h" +#include "nsIURI.h" +#include "nsTArray.h" +#include "nsString.h" +#include "nsWeakReference.h" + +class nsUrlClassifierHashCompleterRequest : public nsIStreamListener + , public nsIObserver +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIOBSERVER + + nsUrlClassifierHashCompleterRequest(nsIURI *uri) + : mShuttingDown(PR_FALSE) + , mURI(uri) { } + ~nsUrlClassifierHashCompleterRequest() { } + + nsresult Begin(); + nsresult Add(const nsACString &partialHash, + nsIUrlClassifierHashCompleterCallback *c); + +private: + nsresult OpenChannel(); + nsresult BuildRequest(nsCAutoString &request); + nsresult AddRequestBody(const nsACString &requestBody); + nsresult HandleItem(const nsACString &item, + const nsACString &table, + PRUint32 chunkId); + nsresult HandleTable(const nsACString &response, + nsACString::const_iterator &begin); + nsresult HandleResponse(); + void NotifySuccess(); + void NotifyFailure(nsresult status); + + PRBool mShuttingDown; + nsCOMPtr mURI; + nsCOMPtr mChannel; + nsCString mResponse; + + struct Response { + nsCString completeHash; + nsCString tableName; + PRUint32 chunkId; + }; + + struct Request { + nsCString partialHash; + nsTArray responses; + nsCOMPtr callback; + }; + + nsTArray mRequests; +}; + +class nsUrlClassifierHashCompleter : public nsIUrlClassifierHashCompleter + , public nsIRunnable + , public nsIObserver + , public nsSupportsWeakReference +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIURLCLASSIFIERHASHCOMPLETER + NS_DECL_NSIRUNNABLE + NS_DECL_NSIOBSERVER + + nsUrlClassifierHashCompleter() : mShuttingDown(PR_FALSE) {} + ~nsUrlClassifierHashCompleter() {} + + nsresult Init(); + +private: + nsRefPtr mRequest; + nsCOMPtr mURI; + PRBool mShuttingDown; + +}; + +#endif // nsUrlClassifierHashCompleter_h_ diff --git a/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.cpp b/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.cpp index 68b60c02b85b..4f4199d038ee 100644 --- a/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.cpp +++ b/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.cpp @@ -75,15 +75,15 @@ nsUrlClassifierStreamUpdater::nsUrlClassifierStreamUpdater() } -NS_IMPL_ISUPPORTS8(nsUrlClassifierStreamUpdater, - nsIUrlClassifierStreamUpdater, - nsIUrlClassifierUpdateObserver, - nsIRequestObserver, - nsIStreamListener, - nsIObserver, - nsIBadCertListener2, - nsISSLErrorListener, - nsIInterfaceRequestor) +NS_IMPL_THREADSAFE_ISUPPORTS8(nsUrlClassifierStreamUpdater, + nsIUrlClassifierStreamUpdater, + nsIUrlClassifierUpdateObserver, + nsIRequestObserver, + nsIStreamListener, + nsIObserver, + nsIBadCertListener2, + nsISSLErrorListener, + nsIInterfaceRequestor) /** * Clear out the update. @@ -94,7 +94,7 @@ nsUrlClassifierStreamUpdater::DownloadDone() LOG(("nsUrlClassifierStreamUpdater::DownloadDone [this=%p]", this)); mIsUpdating = PR_FALSE; - mPendingUpdateUrls.Clear(); + mPendingUpdates.Clear(); mSuccessCallback = nsnull; mUpdateErrorCallback = nsnull; mDownloadErrorCallback = nsnull; @@ -127,7 +127,8 @@ nsUrlClassifierStreamUpdater::SetUpdateUrl(const nsACString & aUpdateUrl) nsresult nsUrlClassifierStreamUpdater::FetchUpdate(nsIURI *aUpdateUrl, - const nsACString & aRequestBody) + const nsACString & aRequestBody, + const nsACString & aStreamTable) { nsresult rv; rv = NS_NewChannel(getter_AddRefs(mChannel), aUpdateUrl, nsnull, nsnull, this); @@ -142,18 +143,23 @@ nsUrlClassifierStreamUpdater::FetchUpdate(nsIURI *aUpdateUrl, rv = mChannel->AsyncOpen(this, nsnull); NS_ENSURE_SUCCESS(rv, rv); + mStreamTable = aStreamTable; + return NS_OK; } nsresult nsUrlClassifierStreamUpdater::FetchUpdate(const nsACString & aUpdateUrl, - const nsACString & aRequestBody) + const nsACString & aRequestBody, + const nsACString & aStreamTable) { nsCOMPtr uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), aUpdateUrl); NS_ENSURE_SUCCESS(rv, rv); - return FetchUpdate(uri, aRequestBody); + LOG(("Fetching update from %s\n", PromiseFlatCString(aUpdateUrl).get())); + + return FetchUpdate(uri, aRequestBody, aStreamTable); } NS_IMETHODIMP @@ -215,23 +221,29 @@ nsUrlClassifierStreamUpdater::DownloadUpdates( *_retval = PR_TRUE; - return FetchUpdate(mUpdateUrl, aRequestBody); + return FetchUpdate(mUpdateUrl, aRequestBody, EmptyCString()); } /////////////////////////////////////////////////////////////////////////////// // nsIUrlClassifierUpdateObserver implementation NS_IMETHODIMP -nsUrlClassifierStreamUpdater::UpdateUrlRequested(const nsACString &aUrl) +nsUrlClassifierStreamUpdater::UpdateUrlRequested(const nsACString &aUrl, + const nsACString &aTable) { LOG(("Queuing requested update from %s\n", PromiseFlatCString(aUrl).get())); + PendingUpdate *update = mPendingUpdates.AppendElement(); + if (!update) + return NS_ERROR_OUT_OF_MEMORY; + // Allow data: urls for unit testing purposes, otherwise assume http if (StringBeginsWith(aUrl, NS_LITERAL_CSTRING("data:"))) { - mPendingUpdateUrls.AppendElement(aUrl); + update->mUrl = aUrl; } else { - mPendingUpdateUrls.AppendElement(NS_LITERAL_CSTRING("http://") + aUrl); + update->mUrl = NS_LITERAL_CSTRING("http://") + aUrl; } + update->mTable = aTable; return NS_OK; } @@ -242,15 +254,16 @@ nsUrlClassifierStreamUpdater::StreamFinished() nsresult rv; // Pop off a pending URL and update it. - if (mPendingUpdateUrls.Length() > 0) { - rv = FetchUpdate(mPendingUpdateUrls[0], NS_LITERAL_CSTRING("")); + if (mPendingUpdates.Length() > 0) { + PendingUpdate &update = mPendingUpdates[0]; + rv = FetchUpdate(update.mUrl, EmptyCString(), update.mTable); if (NS_FAILED(rv)) { - LOG(("Error fetching update url: %s\n", mPendingUpdateUrls[0].get())); + LOG(("Error fetching update url: %s\n", update.mUrl.get())); mDBService->CancelUpdate(); return rv; } - mPendingUpdateUrls.RemoveElementAt(0); + mPendingUpdates.RemoveElementAt(0); } else { mDBService->FinishUpdate(); } @@ -262,7 +275,7 @@ NS_IMETHODIMP nsUrlClassifierStreamUpdater::UpdateSuccess(PRUint32 requestedTimeout) { LOG(("nsUrlClassifierStreamUpdater::UpdateSuccess [this=%p]", this)); - NS_ASSERTION(mPendingUpdateUrls.Length() == 0, + NS_ASSERTION(mPendingUpdates.Length() == 0, "Didn't fetch all update URLs."); // DownloadDone() clears mSuccessCallback, so we save it off here. @@ -330,12 +343,14 @@ nsUrlClassifierStreamUpdater::AddRequestBody(const nsACString &aRequestBody) // nsIStreamListenerObserver implementation NS_IMETHODIMP -nsUrlClassifierStreamUpdater::OnStartRequest(nsIRequest *request, nsISupports* context) +nsUrlClassifierStreamUpdater::OnStartRequest(nsIRequest *request, + nsISupports* context) { nsresult rv; - rv = mDBService->BeginStream(); + rv = mDBService->BeginStream(mStreamTable); NS_ENSURE_SUCCESS(rv, rv); + mStreamTable.Truncate(); nsCOMPtr httpChannel = do_QueryInterface(request); if (httpChannel) { diff --git a/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.h b/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.h index 25bcb12a7ccf..b3b0ad656ea7 100644 --- a/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.h +++ b/toolkit/components/url-classifier/src/nsUrlClassifierStreamUpdater.h @@ -87,16 +87,25 @@ private: nsresult AddRequestBody(const nsACString &aRequestBody); - nsresult FetchUpdate(nsIURI *aURI, const nsACString &aRequestBody); - nsresult FetchUpdate(const nsACString &aURI, const nsACString &aRequestBody); + nsresult FetchUpdate(nsIURI *aURI, + const nsACString &aRequestBody, + const nsACString &aTable); + nsresult FetchUpdate(const nsACString &aURI, + const nsACString &aRequestBody, + const nsACString &aTable); PRBool mIsUpdating; PRBool mInitialized; nsCOMPtr mUpdateUrl; + nsCString mStreamTable; nsCOMPtr mChannel; nsCOMPtr mDBService; - nsTArray mPendingUpdateUrls; + struct PendingUpdate { + nsCString mUrl; + nsCString mTable; + }; + nsTArray mPendingUpdates; nsCOMPtr mSuccessCallback; nsCOMPtr mUpdateErrorCallback; diff --git a/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js b/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js index fb9cc9bb6cb2..b91a41b46d14 100644 --- a/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js +++ b/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js @@ -65,17 +65,21 @@ var streamUpdater = Cc["@mozilla.org/url-classifier/streamupdater;1"] * } */ -function buildUpdate(update) { +function buildUpdate(update, hashSize) { + if (!hashSize) { + hashSize = 32; + } var updateStr = "n:1000\n"; for (var tableName in update) { - updateStr += "i:" + tableName + "\n"; + if (tableName != "") + updateStr += "i:" + tableName + "\n"; var chunks = update[tableName]; for (var j = 0; j < chunks.length; j++) { var chunk = chunks[j]; var chunkType = chunk.chunkType ? chunk.chunkType : 'a'; var chunkNum = chunk.chunkNum ? chunk.chunkNum : j; - updateStr += chunkType + ':' + chunkNum; + updateStr += chunkType + ':' + chunkNum + ':' + hashSize; if (chunk.urls) { var chunkData = chunk.urls.join("\n"); @@ -89,8 +93,12 @@ function buildUpdate(update) { return updateStr; } -function buildPhishingUpdate(chunks) { - return buildUpdate({"test-phish-simple" : chunks}); +function buildPhishingUpdate(chunks, hashSize) { + return buildUpdate({"test-phish-simple" : chunks}, hashSize); +} + +function buildBareUpdate(chunks, hashSize) { + return buildUpdate({"" : chunks}, hashSize); } /** @@ -113,7 +121,7 @@ function doSimpleUpdate(updateText, success, failure) { }; dbservice.beginUpdate(listener); - dbservice.beginStream(); + dbservice.beginStream(""); dbservice.updateStream(updateText); dbservice.finishStream(); dbservice.finishUpdate(); @@ -152,6 +160,8 @@ tableData : function(expectedTables, cb) checkUrls: function(urls, expected, cb) { + // work with a copy of the list. + urls = urls.slice(0); var doLookup = function() { if (urls.length > 0) { var fragment = urls.shift(); @@ -177,6 +187,11 @@ urlsExist: function(urls, cb) this.checkUrls(urls, 'test-phish-simple', cb); }, +malwareUrlsExist: function(urls, cb) +{ + this.checkUrls(urls, 'test-malware-simple', cb); +}, + subsDontExist: function(urls, cb) { // XXX: there's no interface for checking items in the subs table @@ -217,8 +232,6 @@ function updateError(arg) // Runs a set of updates, and then checks a set of assertions. function doUpdateTest(updates, assertions, successCallback, errorCallback) { - dbservice.resetDatabase(); - var runUpdate = function() { if (updates.length > 0) { var update = updates.shift(); @@ -241,6 +254,10 @@ function runNextTest() return; } + dbservice.resetDatabase(); + dbservice.setHashCompleter('test-phish-simple', null); + dumpn("running " + gTests[gNextTest]); + gTests[gNextTest++](); } @@ -250,4 +267,22 @@ function runTests(tests) runNextTest(); } +function Timer(delay, cb) { + this.cb = cb; + var timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + timer.initWithCallback(this, delay, timer.TYPE_ONE_SHOT); +} + +Timer.prototype = { +QueryInterface: function(iid) { + if (!iid.equals(Ci.nsISupports) && !iid.equals(Ci.nsITimerCallback)) { + throw Cr.NS_ERROR_NO_INTERFACE; + } + return this; + }, +notify: function(timer) { + this.cb(); + } +} + cleanUp(); diff --git a/toolkit/components/url-classifier/tests/unit/test_addsub.js b/toolkit/components/url-classifier/tests/unit/test_addsub.js index eead25b3536a..fa864212aab9 100644 --- a/toolkit/components/url-classifier/tests/unit/test_addsub.js +++ b/toolkit/components/url-classifier/tests/unit/test_addsub.js @@ -44,19 +44,19 @@ function testMultipleAdds() { function testSimpleSub() { var addUrls = ["foo.com/a", "bar.com/b"]; - var subUrls = ["foo.com/a"]; + var subUrls = ["1:foo.com/a"]; var addUpdate = buildPhishingUpdate( [{ "chunkNum" : 1, // adds and subtracts don't share a chunk numbering space "urls": addUrls }]); var subUpdate = buildPhishingUpdate( - [{ "chunkNum" : 1, + [{ "chunkNum" : 50, "chunkType" : "s", "urls": subUrls }]); var assertions = { - "tableData" : "test-phish-simple;a:1:s:1", + "tableData" : "test-phish-simple;a:1:s:50", "urlsExist" : [ "bar.com/b" ], "urlsDontExist": ["foo.com/a" ], "subsDontExist" : [ "foo.com/a" ] @@ -69,20 +69,20 @@ function testSimpleSub() // Same as testSimpleSub(), but the sub comes in before the add. function testSubEmptiesAdd() { - var subUrls = ["foo.com/a"]; + var subUrls = ["1:foo.com/a"]; var addUrls = ["foo.com/a", "bar.com/b"]; var subUpdate = buildPhishingUpdate( - [{ "chunkNum" : 1, + [{ "chunkNum" : 50, "chunkType" : "s", "urls": subUrls }]); var addUpdate = buildPhishingUpdate( - [{ "chunkNum" : 1, // adds and subtracts don't share a chunk numbering space + [{ "chunkNum" : 1, "urls": addUrls }]); var assertions = { - "tableData" : "test-phish-simple;a:1:s:1", + "tableData" : "test-phish-simple;a:1:s:50", "urlsExist" : [ "bar.com/b" ], "urlsDontExist": ["foo.com/a" ], "subsDontExist" : [ "foo.com/a" ] // this sub was found, it shouldn't exist anymore @@ -95,7 +95,7 @@ function testSubEmptiesAdd() // still have an item left over that needs to be synced. function testSubPartiallyEmptiesAdd() { - var subUrls = ["foo.com/a"]; + var subUrls = ["1:foo.com/a"]; var addUrls = ["foo.com/a", "foo.com/b", "bar.com/b"]; var subUpdate = buildPhishingUpdate( @@ -123,7 +123,7 @@ function testSubPartiallyEmptiesAdd() // then adding it twice should leave the url intact. function testPendingSubRemoved() { - var subUrls = ["foo.com/a", "foo.com/b"]; + var subUrls = ["1:foo.com/a", "2:foo.com/b"]; var addUrls = ["foo.com/a", "foo.com/b"]; var subUpdate = buildPhishingUpdate( @@ -151,7 +151,7 @@ function testPendingSubRemoved() // Make sure that a saved sub is removed when the sub chunk is expired. function testPendingSubExpire() { - var subUrls = ["foo.com/a", "foo.com/b"]; + var subUrls = ["1:foo.com/a", "1:foo.com/b"]; var addUrls = ["foo.com/a", "foo.com/b"]; var subUpdate = buildPhishingUpdate( @@ -176,7 +176,7 @@ function testPendingSubExpire() doTest([subUpdate, expireUpdate, addUpdate], assertions); } -// Two adds plus one sub of the same URL will leave one of the adds there +// Make sure that the sub url removes from only the chunk that it specifies function testDuplicateAdds() { var urls = ["foo.com/a"]; @@ -190,7 +190,7 @@ function testDuplicateAdds() var subUpdate = buildPhishingUpdate( [{ "chunkNum" : 3, "chunkType" : "s", - "urls": urls }]); + "urls": ["2:foo.com/a"]}]); var assertions = { "tableData" : "test-phish-simple;a:1-2:s:3", @@ -204,17 +204,17 @@ function testDuplicateAdds() // Tests a sub which matches some existing adds but leaves others. function testSubPartiallyMatches() { - var addUrls = ["foo.com/a"]; - var subUrls = ["foo.com/a", "foo.com/b"]; + var subUrls = ["foo.com/a"]; + var addUrls = ["1:foo.com/a", "2:foo.com/b"]; var addUpdate = buildPhishingUpdate( [{ "chunkNum" : 1, - "chunkType" : "s", "urls" : addUrls }]); var subUpdate = buildPhishingUpdate( [{ "chunkNum" : 1, - "urls" : subUrls }]); + "chunkType" : "s", + "urls" : addUrls }]); var assertions = { "tableData" : "test-phish-simple;a:1:s:1", @@ -232,7 +232,7 @@ function testSubPartiallyMatches() function testSubPartiallyMatches2() { var addUrls = ["foo.com/a"]; - var subUrls = ["foo.com/a", "foo.com/b"]; + var subUrls = ["1:foo.com/a", "2:foo.com/b"]; var addUrls2 = ["foo.com/b"]; var addUpdate = buildPhishingUpdate( @@ -258,11 +258,10 @@ function testSubPartiallyMatches2() } // Verify that two subs for the same domain but from different chunks -// match (tests that existing sub entries are properly updated, and -// helps exercise nsUrlClassifierEntry::RemoveFragments(). +// match (tests that existing sub entries are properly updated) function testSubsDifferentChunks() { - var subUrls1 = [ "foo.com/a" ]; - var subUrls2 = [ "foo.com/b" ]; + var subUrls1 = [ "3:foo.com/a" ]; + var subUrls2 = [ "3:foo.com/b" ]; var addUrls = [ "foo.com/a", "foo.com/b", "foo.com/c" ]; diff --git a/toolkit/components/url-classifier/tests/unit/test_dbservice.js b/toolkit/components/url-classifier/tests/unit/test_dbservice.js index a1cdfe0e3e4d..f155c704833c 100644 --- a/toolkit/components/url-classifier/tests/unit/test_dbservice.js +++ b/toolkit/components/url-classifier/tests/unit/test_dbservice.js @@ -23,6 +23,12 @@ var chunk3Urls = [ ]; var chunk3 = chunk3Urls.join("\n"); +var chunk3SubUrls = [ + "1:test.com/a", + "1:foo.bar.com/a", + "2:blah.com/a" ]; +var chunk3Sub = chunk3SubUrls.join("\n"); + var chunk4Urls = [ "a.com/b", "b.com/c", @@ -204,8 +210,8 @@ function do_subs() { var data = "n:1000\n" + "i:testing-phish-simple\n" + - "s:3:" + chunk3.length + "\n" + - chunk3 + "\n" + + "s:3:32:" + chunk3Sub.length + "\n" + + chunk3Sub + "\n" + "ad:1\n" + "ad:4-6\n"; @@ -226,18 +232,18 @@ function do_adds() { var data = "n:1000\n" + "i:testing-phish-simple\n" + - "a:1:" + chunk1.length + "\n" + + "a:1:32:" + chunk1.length + "\n" + chunk1 + "\n" + - "a:2:" + chunk2.length + "\n" + + "a:2:32:" + chunk2.length + "\n" + chunk2 + "\n" + - "a:4:" + chunk4.length + "\n" + + "a:4:32:" + chunk4.length + "\n" + chunk4 + "\n" + - "a:5:" + chunk5.length + "\n" + + "a:5:32:" + chunk5.length + "\n" + chunk5 + "\n" + - "a:6:" + chunk6.length + "\n" + + "a:6:32:" + chunk6.length + "\n" + chunk6 + "\n" + "i:testing-malware-simple\n" + - "a:1:" + chunk2.length + "\n" + + "a:1:32:" + chunk2.length + "\n" + chunk2 + "\n"; doSimpleUpdate(data, testAddSuccess, testFailure); diff --git a/toolkit/components/url-classifier/tests/unit/test_partial.js b/toolkit/components/url-classifier/tests/unit/test_partial.js new file mode 100644 index 000000000000..99b110bf6c2c --- /dev/null +++ b/toolkit/components/url-classifier/tests/unit/test_partial.js @@ -0,0 +1,403 @@ + +/** + * DummyCompleter() lets tests easily specify the results of a partial + * hash completion request. + */ +function DummyCompleter() { + this.fragments = {}; + this.queries = []; +} + +DummyCompleter.prototype = +{ +QueryInterface: function(iid) +{ + if (!iid.equals(Ci.nsISupports) && + !iid.equals(Ci.nsIUrlClassifierHashCompleter)) { + throw Cr.NS_ERROR_NO_INTERFACE; + } + return this; +}, + +complete: function(partialHash, cb) +{ + this.queries.push(partialHash); + var fragments = this.fragments; + var doCallback = function() { + if (this.alwaysFail) { + cb.completionFinished(1); + return; + } + var results; + if (fragments[partialHash]) { + for (var i = 0; i < fragments[partialHash].length; i++) { + var chunkId = fragments[partialHash][i][0]; + var hash = fragments[partialHash][i][1]; + cb.completion(hash, "test-phish-simple", chunkId); + } + } + cb.completionFinished(0); + } + var timer = new Timer(0, doCallback); +}, + +getHash: function(fragment) +{ + var converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]. + createInstance(Ci.nsIScriptableUnicodeConverter); + converter.charset = "UTF-8"; + var result = {}; + var data = converter.convertToByteArray(fragment, result); + var ch = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash); + ch.init(ch.SHA256); + ch.update(data, data.length); + var hash = ch.finish(false); + return hash.slice(0, 32); +}, + +addFragment: function(chunkId, fragment) +{ + this.addHash(chunkId, this.getHash(fragment)); +}, + +// This method allows the caller to generate complete hashes that match the +// prefix of a real fragment, but have different complete hashes. +addConflict: function(chunkId, fragment) +{ + var realHash = this.getHash(fragment); + var invalidHash = this.getHash("blah blah blah blah blah"); + this.addHash(chunkId, realHash.slice(0, 4) + invalidHash.slice(4, 32)); +}, + +addHash: function(chunkId, hash) +{ + var partial = hash.slice(0, 4); + if (this.fragments[partial]) { + this.fragments[partial].push([chunkId, hash]); + } else { + this.fragments[partial] = [[chunkId, hash]]; + } +}, + +compareQueries: function(fragments) +{ + var expectedQueries = []; + for (var i = 0; i < fragments.length; i++) { + expectedQueries.push(this.getHash(fragments[i]).slice(0, 4)); + } + expectedQueries.sort(); + this.queries.sort(); + for (var i = 0; i < this.queries.length; i++) { + do_check_eq(this.queries[i], expectedQueries[i]); + } + do_check_eq(this.queries.length, expectedQueries.length); +} +}; + +function setupCompleter(table, hits, conflicts, alwaysFail) +{ + var completer = new DummyCompleter(); + for (var i = 0; i < hits.length; i++) { + var chunkId = hits[i][0]; + var fragments = hits[i][1]; + for (var j = 0; j < fragments.length; j++) { + completer.addFragment(chunkId, fragments[j]); + } + } + for (var i = 0; i < conflicts.length; i++) { + var chunkId = conflicts[i][0]; + var fragments = conflicts[i][1]; + for (var j = 0; j < fragments.length; j++) { + completer.addConflict(chunkId, fragments[j]); + } + } + + dbservice.setHashCompleter(table, completer); + + return completer; +} + +function installCompleter(table, fragments, conflictFragments) +{ + return setupCompleter(table, fragments, conflictFragments, false); +} + +function installFailingCompleter(table) { + return setupCompleter(table, [], [], true); +} + +// Helper assertion for checking dummy completer queries +gAssertions.completerQueried = function(data, cb) +{ + var completer = data[0]; + completer.compareQueries(data[1]); + cb(); +} + +function doTest(updates, assertions) +{ + doUpdateTest(updates, assertions, runNextTest, updateError); +} + +// Test an add of two partial urls to a fresh database +function testPartialAdds() { + var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 4); + + + var completer = installCompleter('test-phish-simple', [[1, addUrls]], []); + + var assertions = { + "tableData" : "test-phish-simple;a:1", + "urlsExist" : addUrls, + "completerQueried" : [completer, addUrls] + }; + + + doTest([update], assertions); +} + +function testPartialAddsWithConflicts() { + var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 4); + + // Each result will have both a real match and a conflict + var completer = installCompleter('test-phish-simple', + [[1, addUrls]], + [[1, addUrls]]); + + var assertions = { + "tableData" : "test-phish-simple;a:1", + "urlsExist" : addUrls, + "completerQueried" : [completer, addUrls] + }; + + doTest([update], assertions); +} + +function testFalsePositives() { + var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 4); + + // Each result will have no matching complete hashes and a non-matching + // conflict + var completer = installCompleter('test-phish-simple', [], [[1, addUrls]]); + + var assertions = { + "tableData" : "test-phish-simple;a:1", + "urlsDontExist" : addUrls, + "completerQueried" : [completer, addUrls] + }; + + doTest([update], assertions); +} + +function testEmptyCompleter() { + var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 4); + + // Completer will never return full hashes + var completer = installCompleter('test-phish-simple', [], []); + + var assertions = { + "tableData" : "test-phish-simple;a:1", + "urlsDontExist" : addUrls, + "completerQueried" : [completer, addUrls] + }; + + doTest([update], assertions); +} + +function testCompleterFailure() { + var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 4); + + // Completer will never return full hashes + var completer = installFailingCompleter('test-phish-simple'); + + var assertions = { + "tableData" : "test-phish-simple;a:1", + "urlsDontExist" : addUrls, + "completerQueried" : [completer, addUrls] + }; + + doTest([update], assertions); +} + +function testMixedSizesSameDomain() { + var add1Urls = [ "foo.com/a" ]; + var add2Urls = [ "foo.com/b" ]; + + var update1 = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : add1Urls }], + 4); + var update2 = buildPhishingUpdate( + [ + { "chunkNum" : 2, + "urls" : add2Urls }], + 32); + + // We should only need to complete the partial hashes + var completer = installCompleter('test-phish-simple', [[1, add1Urls]], []); + + var assertions = { + "tableData" : "test-phish-simple;a:1-2", + // both urls should match... + "urlsExist" : add1Urls.concat(add2Urls), + // ... but the completer should only be queried for the partial entry + "completerQueried" : [completer, add1Urls] + }; + + doTest([update1, update2], assertions); +} + +function testMixedSizesDifferentDomains() { + var add1Urls = [ "foo.com/a" ]; + var add2Urls = [ "bar.com/b" ]; + + var update1 = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : add1Urls }], + 4); + var update2 = buildPhishingUpdate( + [ + { "chunkNum" : 2, + "urls" : add2Urls }], + 32); + + // We should only need to complete the partial hashes + var completer = installCompleter('test-phish-simple', [[1, add1Urls]], []); + + var assertions = { + "tableData" : "test-phish-simple;a:1-2", + // both urls should match... + "urlsExist" : add1Urls.concat(add2Urls), + // ... but the completer should only be queried for the partial entry + "completerQueried" : [completer, add1Urls] + }; + + doTest([update1, update2], assertions); +} + +function testMixedSizesNoCompleter() { + var add1Urls = [ "foo.com/a" ]; + var add2Urls = [ "foo.com/b" ]; + + var update1 = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : add1Urls }], + 4); + var update2 = buildPhishingUpdate( + [ + { "chunkNum" : 2, + "urls" : add2Urls }], + 32); + + var assertions = { + "tableData" : "test-phish-simple;a:1-2", + // add1Urls shouldn't work, because there is no completer. + "urlsDontExist" : add1Urls, + // but add2Urls were complete, they should work. + "urlsExist" : add2Urls + }; + + doTest([update1, update2], assertions); +} + +function testInvalidHashSize() +{ + var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 12); // only 4 and 32 are legal hash sizes + + var completer = installCompleter('test-phish-simple', [[1, addUrls]], []); + + var assertions = { + "tableData" : "", + "urlsDontExist" : addUrls + }; + + // A successful update will trigger an error + doUpdateTest([update], assertions, updateError, runNextTest); +} + +function testWrongTable() +{ + var addUrls = [ "foo.com/a" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 32); + var completer = installCompleter('test-malware-simple', // wrong table + [[1, addUrls]]); + + doTest([update], assertions); +} + +function testWrongChunk() +{ + var addUrls = [ "foo.com/a" ]; + var update = buildPhishingUpdate( + [ + { "chunkNum" : 1, + "urls" : addUrls + }], + 32); + var completer = installCompleter('test-phish-simple', + [[2, // Wrong chunk number + addUrls]]); + + doTest([update], assertions); +} + +function run_test() +{ + runTests([ + testPartialAdds, + testPartialAddsWithConflicts, + testFalsePositives, + testEmptyCompleter, + testCompleterFailure, + testMixedSizesSameDomain, + testMixedSizesDifferentDomains, + testMixedSizesNoCompleter, + testInvalidHashSize + ]); +} + +do_test_pending(); diff --git a/toolkit/components/url-classifier/tests/unit/test_streamupdater.js b/toolkit/components/url-classifier/tests/unit/test_streamupdater.js index d81f74f97b0d..dd8d3fe46de5 100644 --- a/toolkit/components/url-classifier/tests/unit/test_streamupdater.js +++ b/toolkit/components/url-classifier/tests/unit/test_streamupdater.js @@ -13,18 +13,19 @@ function testSimpleForward() { var add3Urls = [ "bar.com/d" ]; var update = "n:1000\n"; + update += "i:test-phish-simple\n"; - var update1 = buildPhishingUpdate( + var update1 = buildBareUpdate( [{ "chunkNum" : 1, "urls" : add1Urls }]); update += "u:data:," + encodeURIComponent(update1) + "\n"; - var update2 = buildPhishingUpdate( + var update2 = buildBareUpdate( [{ "chunkNum" : 2, "urls" : add2Urls }]); update += "u:data:," + encodeURIComponent(update2) + "\n"; - var update3 = buildPhishingUpdate( + var update3 = buildBareUpdate( [{ "chunkNum" : 3, "urls" : add3Urls }]); update += "u:data:," + encodeURIComponent(update3) + "\n"; @@ -44,15 +45,16 @@ function testNestedForward() { var add2Urls = [ "foo.com/b" ]; var update = "n:1000\n"; + update += "i:test-phish-simple\n"; - var update1 = buildPhishingUpdate( + var update1 = buildBareUpdate( [{ "chunkNum" : 1, "urls" : add1Urls }]); update += "u:data:," + encodeURIComponent(update1) + "\n"; - var update2 = buildPhishingUpdate( + var update2 = buildBareUpdate( [{ "chunkNum" : 2 }]); - var update3 = buildPhishingUpdate( + var update3 = buildBareUpdate( [{ "chunkNum" : 3, "urls" : add1Urls }]); @@ -102,13 +104,48 @@ function testErrorUrlForward() { doTest([update], assertions, true); } +function testMultipleTables() { + var add1Urls = [ "foo.com/a", "bar.com/c" ]; + var add2Urls = [ "foo.com/b" ]; + var add3Urls = [ "bar.com/d" ]; + + var update = "n:1000\n"; + update += "i:test-phish-simple\n"; + + var update1 = buildBareUpdate( + [{ "chunkNum" : 1, + "urls" : add1Urls }]); + update += "u:data:," + encodeURIComponent(update1) + "\n"; + + var update2 = buildBareUpdate( + [{ "chunkNum" : 2, + "urls" : add2Urls }]); + update += "u:data:," + encodeURIComponent(update2) + "\n"; + + update += "i:test-malware-simple\n"; + + var update3 = buildBareUpdate( + [{ "chunkNum" : 3, + "urls" : add3Urls }]); + update += "u:data:," + encodeURIComponent(update3) + "\n"; + + var assertions = { + "tableData" : "test-malware-simple;a:3\ntest-phish-simple;a:1-2", + "urlsExist" : add1Urls.concat(add2Urls), + "malwareUrlsExist" : add3Urls + }; + + doTest([update], assertions, false); +} + function run_test() { runTests([ testSimpleForward, testNestedForward, testInvalidUrlForward, - testErrorUrlForward + testErrorUrlForward, + testMultipleTables ]); }