From 00125c4e7af86dad445f74dc9027eb8c8aaef0b4 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Tue, 6 Dec 2011 19:03:05 +0100 Subject: [PATCH] Bug 673470 - Replace the SQLite SafeBrowsing store with an optimized store. r=dcamp --- storage/src/TelemetryVFS.cpp | 1 - toolkit/components/build/nsToolkitCompsCID.h | 6 +- .../telemetry/TelemetryHistograms.h | 9 +- .../components/url-classifier/ChunkSet.cpp | 136 + toolkit/components/url-classifier/ChunkSet.h | 90 + .../components/url-classifier/Classifier.cpp | 653 +++ .../components/url-classifier/Classifier.h | 128 + toolkit/components/url-classifier/Entries.h | 335 ++ .../components/url-classifier/HashStore.cpp | 950 +++++ toolkit/components/url-classifier/HashStore.h | 213 + .../components/url-classifier/LookupCache.cpp | 776 ++++ .../components/url-classifier/LookupCache.h | 186 + toolkit/components/url-classifier/Makefile.in | 6 + .../url-classifier/ProtocolParser.cpp | 777 ++++ .../url-classifier/ProtocolParser.h | 151 + .../url-classifier/content/listmanager.js | 2 +- .../nsCheckSummedOutputStream.cpp | 92 + .../nsCheckSummedOutputStream.h | 86 + .../nsIUrlClassifierDBService.idl | 16 +- .../nsIUrlClassifierPrefixSet.idl | 14 +- .../nsUrlClassifierDBService.cpp | 3729 ++--------------- .../url-classifier/nsUrlClassifierDBService.h | 9 +- .../nsUrlClassifierPrefixSet.cpp | 157 +- .../url-classifier/nsUrlClassifierPrefixSet.h | 16 +- .../url-classifier/nsUrlClassifierProxies.cpp | 19 +- .../url-classifier/nsUrlClassifierProxies.h | 27 +- .../nsUrlClassifierStreamUpdater.cpp | 12 +- .../tests/unit/head_urlclassifier.js | 25 +- .../url-classifier/tests/unit/test_addsub.js | 9 +- .../tests/unit/test_cleankeycache.js | 195 - .../url-classifier/tests/unit/test_partial.js | 59 +- .../tests/unit/test_prefixset.js | 39 +- .../tests/unit/test_streamupdater.js | 16 +- .../url-classifier/tests/unit/xpcshell.ini | 1 - 34 files changed, 5149 insertions(+), 3791 deletions(-) create mode 100644 toolkit/components/url-classifier/ChunkSet.cpp create mode 100644 toolkit/components/url-classifier/ChunkSet.h create mode 100644 toolkit/components/url-classifier/Classifier.cpp create mode 100644 toolkit/components/url-classifier/Classifier.h create mode 100644 toolkit/components/url-classifier/Entries.h create mode 100644 toolkit/components/url-classifier/HashStore.cpp create mode 100644 toolkit/components/url-classifier/HashStore.h create mode 100644 toolkit/components/url-classifier/LookupCache.cpp create mode 100644 toolkit/components/url-classifier/LookupCache.h create mode 100644 toolkit/components/url-classifier/ProtocolParser.cpp create mode 100644 toolkit/components/url-classifier/ProtocolParser.h create mode 100644 toolkit/components/url-classifier/nsCheckSummedOutputStream.cpp create mode 100644 toolkit/components/url-classifier/nsCheckSummedOutputStream.h delete mode 100644 toolkit/components/url-classifier/tests/unit/test_cleankeycache.js diff --git a/storage/src/TelemetryVFS.cpp b/storage/src/TelemetryVFS.cpp index 3d74dea45d28..6120ed17bd43 100644 --- a/storage/src/TelemetryVFS.cpp +++ b/storage/src/TelemetryVFS.cpp @@ -67,7 +67,6 @@ struct Histograms { Histograms gHistograms[] = { SQLITE_TELEMETRY("places.sqlite", PLACES), - SQLITE_TELEMETRY("urlclassifier3.sqlite", URLCLASSIFIER), SQLITE_TELEMETRY("cookies.sqlite", COOKIES), SQLITE_TELEMETRY("webappsstore.sqlite", WEBAPPS), SQLITE_TELEMETRY(NULL, OTHER) diff --git a/toolkit/components/build/nsToolkitCompsCID.h b/toolkit/components/build/nsToolkitCompsCID.h index 8f501ed13608..c24332cf7e23 100644 --- a/toolkit/components/build/nsToolkitCompsCID.h +++ b/toolkit/components/build/nsToolkitCompsCID.h @@ -163,9 +163,9 @@ #define NS_TYPEAHEADFIND_CID \ { 0xe7f70966, 0x9a37, 0x48d7, { 0x8a, 0xeb, 0x35, 0x99, 0x8f, 0x31, 0x09, 0x0e} } -// {15a892dd-cb0f-4a9f-a27f-8291d5e16653} -#define NS_URLCLASSIFIERPREFIXSET_CID \ -{ 0x15a892dd, 0xcb0f, 0x4a9f, { 0xa2, 0x7f, 0x82, 0x91, 0xd5, 0xe1, 0x66, 0x53} } +// {b21b0fa1-20d2-422a-b2cc-b289c9325811} + #define NS_URLCLASSIFIERPREFIXSET_CID \ +{ 0xb21b0fa1, 0x20d2, 0x422a, { 0xb2, 0xcc, 0xb2, 0x89, 0xc9, 0x32, 0x58, 0x11} } // {5eb7c3c1-ec1f-4007-87cc-eefb37d68ce6} #define NS_URLCLASSIFIERDBSERVICE_CID \ diff --git a/toolkit/components/telemetry/TelemetryHistograms.h b/toolkit/components/telemetry/TelemetryHistograms.h index be77bc7b6d94..063ca844c5d5 100644 --- a/toolkit/components/telemetry/TelemetryHistograms.h +++ b/toolkit/components/telemetry/TelemetryHistograms.h @@ -225,7 +225,6 @@ HISTOGRAM(CHECK_JAVA_ENABLED, 1, 3000, 10, EXPONENTIAL, "Time spent checking if SQLITE_TIME_SPENT(OTHER_ ## NAME, DESC) \ SQLITE_TIME_SPENT(PLACES_ ## NAME, DESC) \ SQLITE_TIME_SPENT(COOKIES_ ## NAME, DESC) \ - SQLITE_TIME_SPENT(URLCLASSIFIER_ ## NAME, DESC) \ SQLITE_TIME_SPENT(WEBAPPS_ ## NAME, DESC) SQLITE_TIME_SPENT(OPEN, "Time spent on SQLite open() (ms)") @@ -238,11 +237,9 @@ SQLITE_TIME_PER_FILE(SYNC, "Time spent on SQLite fsync() (ms)") HISTOGRAM(MOZ_SQLITE_OTHER_READ_B, 1, 32768, 3, LINEAR, "SQLite read() (bytes)") HISTOGRAM(MOZ_SQLITE_PLACES_READ_B, 1, 32768, 3, LINEAR, "SQLite read() (bytes)") HISTOGRAM(MOZ_SQLITE_COOKIES_READ_B, 1, 32768, 3, LINEAR, "SQLite read() (bytes)") -HISTOGRAM(MOZ_SQLITE_URLCLASSIFIER_READ_B, 1, 32768, 3, LINEAR, "SQLite read() (bytes)") HISTOGRAM(MOZ_SQLITE_WEBAPPS_READ_B, 1, 32768, 3, LINEAR, "SQLite read() (bytes)") HISTOGRAM(MOZ_SQLITE_PLACES_WRITE_B, 1, 32768, 3, LINEAR, "SQLite write (bytes)") HISTOGRAM(MOZ_SQLITE_COOKIES_WRITE_B, 1, 32768, 3, LINEAR, "SQLite write (bytes)") -HISTOGRAM(MOZ_SQLITE_URLCLASSIFIER_WRITE_B, 1, 32768, 3, LINEAR, "SQLite write (bytes)") HISTOGRAM(MOZ_SQLITE_WEBAPPS_WRITE_B, 1, 32768, 3, LINEAR, "SQLite write (bytes)") HISTOGRAM(MOZ_SQLITE_OTHER_WRITE_B, 1, 32768, 3, LINEAR, "SQLite write (bytes)") HISTOGRAM(MOZ_STORAGE_ASYNC_REQUESTS_MS, 1, 32768, 20, EXPONENTIAL, "mozStorage async requests completion (ms)") @@ -263,10 +260,14 @@ HISTOGRAM(NETWORK_DISK_CACHE_OUTPUT_STREAM_CLOSE_INTERNAL_MAIN_THREAD, 1, 10000, * Url-Classifier telemetry */ #ifdef MOZ_URL_CLASSIFIER +HISTOGRAM(URLCLASSIFIER_LOOKUP_TIME, 1, 500, 10, EXPONENTIAL, "Time spent per dbservice lookup (ms)") +HISTOGRAM(URLCLASSIFIER_CL_CHECK_TIME, 1, 500, 10, EXPONENTIAL, "Time spent per classifier lookup (ms)") +HISTOGRAM(URLCLASSIFIER_CL_UPDATE_TIME, 20, 15000, 15, EXPONENTIAL, "Time spent per classifier update (ms)") HISTOGRAM(URLCLASSIFIER_PS_FILELOAD_TIME, 1, 1000, 10, EXPONENTIAL, "Time spent loading PrefixSet from file (ms)") HISTOGRAM(URLCLASSIFIER_PS_FALLOCATE_TIME, 1, 1000, 10, EXPONENTIAL, "Time spent fallocating PrefixSet (ms)") HISTOGRAM(URLCLASSIFIER_PS_CONSTRUCT_TIME, 1, 5000, 15, EXPONENTIAL, "Time spent constructing PrefixSet from DB (ms)") -HISTOGRAM(URLCLASSIFIER_PS_LOOKUP_TIME, 1, 500, 10, EXPONENTIAL, "Time spent per PrefixSet lookup (ms)") +HISTOGRAM(URLCLASSIFIER_LC_PREFIXES, 1, 1500000, 15, LINEAR, "Size of the prefix cache in entries") +HISTOGRAM(URLCLASSIFIER_LC_COMPLETIONS, 1, 200, 10, EXPONENTIAL, "Size of the completion cache in entries") HISTOGRAM_BOOLEAN(URLCLASSIFIER_PS_OOM, "Did UrlClassifier run out of memory during PrefixSet construction?") #endif diff --git a/toolkit/components/url-classifier/ChunkSet.cpp b/toolkit/components/url-classifier/ChunkSet.cpp new file mode 100644 index 000000000000..43b1b2068ca0 --- /dev/null +++ b/toolkit/components/url-classifier/ChunkSet.cpp @@ -0,0 +1,136 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "ChunkSet.h" + +namespace mozilla { +namespace safebrowsing { + +nsresult +ChunkSet::Serialize(nsACString& aChunkStr) +{ + aChunkStr.Truncate(); + + PRUint32 i = 0; + while (i < mChunks.Length()) { + if (i != 0) { + aChunkStr.Append(','); + } + aChunkStr.AppendInt((PRInt32)mChunks[i]); + + PRUint32 first = i; + PRUint32 last = first; + i++; + while (i < mChunks.Length() && (mChunks[i] == mChunks[i - 1] + 1 || mChunks[i] == mChunks[i - 1])) { + last = i++; + } + + if (last != first) { + aChunkStr.Append('-'); + aChunkStr.AppendInt((PRInt32)mChunks[last]); + } + } + + return NS_OK; +} + +nsresult +ChunkSet::Set(PRUint32 aChunk) +{ + PRUint32 idx = mChunks.BinaryIndexOf(aChunk); + if (idx == nsTArray::NoIndex) { + mChunks.InsertElementSorted(aChunk); + } + return NS_OK; +} + +nsresult +ChunkSet::Unset(PRUint32 aChunk) +{ + mChunks.RemoveElementSorted(aChunk); + + return NS_OK; +} + +bool +ChunkSet::Has(PRUint32 aChunk) const +{ + return mChunks.BinaryIndexOf(aChunk) != nsTArray::NoIndex; +} + +nsresult +ChunkSet::Merge(const ChunkSet& aOther) +{ + const uint32 *dupIter = aOther.mChunks.Elements(); + const uint32 *end = aOther.mChunks.Elements() + aOther.mChunks.Length(); + + for (const uint32 *iter = dupIter; iter != end; iter++) { + nsresult rv = Set(*iter); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +nsresult +ChunkSet::Remove(const ChunkSet& aOther) +{ + uint32 *addIter = mChunks.Elements(); + uint32 *end = mChunks.Elements() + mChunks.Length(); + + for (uint32 *iter = addIter; iter != end; iter++) { + if (!aOther.Has(*iter)) { + *addIter = *iter; + addIter++; + } + } + + mChunks.SetLength(addIter - mChunks.Elements()); + + return NS_OK; +} + +void +ChunkSet::Clear() +{ + mChunks.Clear(); +} + +} +} diff --git a/toolkit/components/url-classifier/ChunkSet.h b/toolkit/components/url-classifier/ChunkSet.h new file mode 100644 index 000000000000..0a465060e508 --- /dev/null +++ b/toolkit/components/url-classifier/ChunkSet.h @@ -0,0 +1,90 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef ChunkSet_h__ +#define ChunkSet_h__ + + +#include "Entries.h" +#include "nsString.h" +#include "nsTArray.h" + +namespace mozilla { +namespace safebrowsing { + +/** + * Store the chunks as an array of uint32. + * XXX: We should optimize this further to compress the + * many consecutive numbers. + */ +class ChunkSet { +public: + ChunkSet() {} + ~ChunkSet() {} + + nsresult Serialize(nsACString& aStr); + nsresult Set(PRUint32 aChunk); + nsresult Unset(PRUint32 aChunk); + void Clear(); + nsresult Merge(const ChunkSet& aOther); + nsresult Remove(const ChunkSet& aOther); + + bool Has(PRUint32 chunk) const; + + uint32 Length() const { return mChunks.Length(); } + + nsresult Write(nsIOutputStream* aOut) { + return WriteTArray(aOut, mChunks); + } + + nsresult Read(nsIInputStream* aIn, PRUint32 aNumElements) { + return ReadTArray(aIn, &mChunks, aNumElements); + } + + uint32 *Begin() { return mChunks.Elements(); } + uint32 *End() { return mChunks.Elements() + mChunks.Length(); } + +private: + nsTArray mChunks; +}; + +} +} + +#endif diff --git a/toolkit/components/url-classifier/Classifier.cpp b/toolkit/components/url-classifier/Classifier.cpp new file mode 100644 index 000000000000..0f613d307b80 --- /dev/null +++ b/toolkit/components/url-classifier/Classifier.cpp @@ -0,0 +1,653 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "Classifier.h" +#include "nsISimpleEnumerator.h" +#include "nsIRandomGenerator.h" +#include "nsIInputStream.h" +#include "nsISeekableStream.h" +#include "nsIFile.h" +#include "nsAutoPtr.h" +#include "mozilla/Telemetry.h" +#include "prlog.h" + +// NSPR_LOG_MODULES=UrlClassifierDbService:5 +extern PRLogModuleInfo *gUrlClassifierDbServiceLog; +#if defined(PR_LOGGING) +#define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) +#else +#define LOG(args) +#define LOG_ENABLED() (PR_FALSE) +#endif + +namespace mozilla { +namespace safebrowsing { + +Classifier::Classifier() + : mFreshTime(45 * 60) +{ +} + +Classifier::~Classifier() +{ + Close(); +} + +/* + * Generate a unique 32-bit key for this user, which we will + * use to rehash all prefixes. This ensures that different users + * will get hash collisions on different prefixes, which in turn + * avoids that "unlucky" URLs get mysterious slowdowns, and that + * the servers get spammed if any such URL should get slashdotted. + * https://bugzilla.mozilla.org/show_bug.cgi?id=669407#c10 + */ +nsresult +Classifier::InitKey() +{ + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->AppendNative(NS_LITERAL_CSTRING("classifier.hashkey")); + NS_ENSURE_SUCCESS(rv, rv); + + bool exists; + rv = storeFile->Exists(&exists); + NS_ENSURE_SUCCESS(rv, rv); + + if (!exists) { + // generate and store key + nsCOMPtr rg = + do_GetService("@mozilla.org/security/random-generator;1"); + NS_ENSURE_STATE(rg); + + PRUint8 *temp; + nsresult rv = rg->GenerateRandomBytes(sizeof(mHashKey), &temp); + NS_ENSURE_SUCCESS(rv, rv); + memcpy(&mHashKey, temp, sizeof(mHashKey)); + NS_Free(temp); + + nsCOMPtr out; + rv = NS_NewSafeLocalFileOutputStream(getter_AddRefs(out), storeFile, + -1, -1, 0); + NS_ENSURE_SUCCESS(rv, rv); + + PRUint32 written; + rv = out->Write(reinterpret_cast(&mHashKey), sizeof(PRUint32), &written); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr safeOut = do_QueryInterface(out); + rv = safeOut->Finish(); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Initialized classifier, key = %X", mHashKey)); + } else { + // read key + nsCOMPtr inputStream; + rv = NS_NewLocalFileInputStream(getter_AddRefs(inputStream), storeFile, + -1, -1, 0); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr seekable = do_QueryInterface(inputStream); + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(rv, rv); + + void *buffer = &mHashKey; + rv = NS_ReadInputStreamToBuffer(inputStream, + &buffer, + sizeof(PRUint32)); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Loaded classifier key = %X", mHashKey)); + } + + return NS_OK; +} + +nsresult +Classifier::Open(nsIFile& aCacheDirectory) +{ + nsresult rv; + + mCryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + // Ensure the safebrowsing directory exists. + rv = aCacheDirectory.Clone(getter_AddRefs(mStoreDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mStoreDirectory->AppendNative(NS_LITERAL_CSTRING("safebrowsing")); + NS_ENSURE_SUCCESS(rv, rv); + + bool storeExists; + rv = mStoreDirectory->Exists(&storeExists); + NS_ENSURE_SUCCESS(rv, rv); + + if (!storeExists) { + rv = mStoreDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755); + NS_ENSURE_SUCCESS(rv, rv); + } else { + bool storeIsDir; + rv = mStoreDirectory->IsDirectory(&storeIsDir); + NS_ENSURE_SUCCESS(rv, rv); + if (!storeIsDir) + return NS_ERROR_FILE_DESTINATION_NOT_DIR; + } + + rv = InitKey(); + if (NS_FAILED(rv)) { + // Without a usable key the database is useless + Reset(); + return NS_ERROR_FAILURE; + } + + if (!mTableFreshness.Init()) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +Classifier::Close() +{ + DropStores(); + + return NS_OK; +} + +nsresult +Classifier::Reset() +{ + DropStores(); + + nsCOMPtr entries; + nsresult rv = mStoreDirectory->GetDirectoryEntries(getter_AddRefs(entries)); + NS_ENSURE_SUCCESS(rv, rv); + + bool hasMore; + while (NS_SUCCEEDED(rv = entries->HasMoreElements(&hasMore)) && hasMore) { + nsCOMPtr file; + rv = entries->GetNext(getter_AddRefs(file)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = file->Remove(PR_FALSE); + NS_ENSURE_SUCCESS(rv, rv); + } + NS_ENSURE_SUCCESS(rv, rv); + + mTableFreshness.Clear(); + + return NS_OK; +} + +void +Classifier::TableRequest(nsACString& aResult) +{ + nsTArray tables; + ActiveTables(tables); + for (uint32 i = 0; i < tables.Length(); i++) { + nsAutoPtr store(new HashStore(tables[i], mStoreDirectory)); + if (!store) + continue; + + nsresult rv = store->Open(); + if (NS_FAILED(rv)) + continue; + + aResult.Append(store->TableName()); + aResult.Append(";"); + + ChunkSet &adds = store->AddChunks(); + ChunkSet &subs = store->SubChunks(); + + if (adds.Length() > 0) { + aResult.Append("a:"); + nsCAutoString addList; + adds.Serialize(addList); + aResult.Append(addList); + } + + if (subs.Length() > 0) { + if (adds.Length() > 0) + aResult.Append(':'); + aResult.Append("s:"); + nsCAutoString subList; + subs.Serialize(subList); + aResult.Append(subList); + } + + aResult.Append('\n'); + } +} + +nsresult +Classifier::Check(const nsACString& aSpec, LookupResultArray& aResults) +{ + Telemetry::AutoTimer timer; + + // Get the set of fragments to look up. + nsTArray fragments; + nsresult rv = LookupCache::GetLookupFragments(aSpec, &fragments); + NS_ENSURE_SUCCESS(rv, rv); + + nsTArray activeTables; + ActiveTables(activeTables); + + nsTArray cacheArray; + for (PRUint32 i = 0; i < activeTables.Length(); i++) { + LookupCache *cache = GetLookupCache(activeTables[i]); + if (cache) { + cacheArray.AppendElement(cache); + } else { + return NS_ERROR_FAILURE; + } + } + + // Now check each lookup fragment against the entries in the DB. + for (PRUint32 i = 0; i < fragments.Length(); i++) { + Completion lookupHash; + lookupHash.FromPlaintext(fragments[i], mCryptoHash); + + // Get list of host keys to look up + Completion hostKey; + rv = LookupCache::GetKey(fragments[i], &hostKey, mCryptoHash); + if (NS_FAILED(rv)) { + // Local host on the network + continue; + } + +#if DEBUG && defined(PR_LOGGING) + if (LOG_ENABLED()) { + nsCAutoString checking; + lookupHash.ToString(checking); + LOG(("Checking %s (%X)", checking.get(), lookupHash.ToUint32())); + } +#endif + for (PRUint32 i = 0; i < cacheArray.Length(); i++) { + LookupCache *cache = cacheArray[i]; + bool has, complete; + Prefix codedPrefix; + rv = cache->Has(lookupHash, hostKey, mHashKey, + &has, &complete, &codedPrefix); + NS_ENSURE_SUCCESS(rv, rv); + if (has) { + LookupResult *result = aResults.AppendElement(); + if (!result) + return NS_ERROR_OUT_OF_MEMORY; + + PRInt64 age; + bool found = mTableFreshness.Get(cache->TableName(), &age); + if (!found) { + age = 24 * 60 * 60; // just a large number + } else { + PRInt64 now = (PR_Now() / PR_USEC_PER_SEC); + age = now - age; + } + + LOG(("Found a result in %s: %s (Age: %Lds)", + cache->TableName().get(), + complete ? "complete." : "Not complete.", + age)); + + result->hash.complete = lookupHash; + result->mCodedPrefix = codedPrefix; + result->mComplete = complete; + result->mFresh = (age < mFreshTime); + result->mTableName.Assign(cache->TableName()); + } + } + + } + + return NS_OK; +} + +nsresult +Classifier::ApplyUpdates(nsTArray* aUpdates) +{ + Telemetry::AutoTimer timer; + +#if defined(PR_LOGGING) + PRIntervalTime clockStart = 0; + if (LOG_ENABLED() || true) { + clockStart = PR_IntervalNow(); + } +#endif + + LOG(("Applying table updates.")); + + nsresult rv; + + for (uint32 i = 0; i < aUpdates->Length(); i++) { + // Previous ApplyTableUpdates() may have consumed this update.. + if ((*aUpdates)[i]) { + // Run all updates for one table + rv = ApplyTableUpdates(aUpdates, aUpdates->ElementAt(i)->TableName()); + if (NS_FAILED(rv)) { + Reset(); + return rv; + } + } + } + aUpdates->Clear(); + LOG(("Done applying updates.")); + +#if defined(PR_LOGGING) + if (LOG_ENABLED() || true) { + PRIntervalTime clockEnd = PR_IntervalNow(); + LOG(("update took %dms\n", + PR_IntervalToMilliseconds(clockEnd - clockStart))); + } +#endif + + return NS_OK; +} + +nsresult +Classifier::MarkSpoiled(nsTArray& aTables) +{ + for (uint32 i = 0; i < aTables.Length(); i++) { + LOG(("Spoiling table: %s", aTables[i].get())); + // Spoil this table by marking it as no known freshness + mTableFreshness.Remove(aTables[i]); + } + return NS_OK; +} + +void +Classifier::DropStores() +{ + for (uint32 i = 0; i < mHashStores.Length(); i++) { + delete mHashStores[i]; + } + mHashStores.Clear(); + for (uint32 i = 0; i < mLookupCaches.Length(); i++) { + delete mLookupCaches[i]; + } + mLookupCaches.Clear(); +} + +nsresult +Classifier::ScanStoreDir(nsTArray& aTables) +{ + nsCOMPtr entries; + nsresult rv = mStoreDirectory->GetDirectoryEntries(getter_AddRefs(entries)); + NS_ENSURE_SUCCESS(rv, rv); + + bool hasMore; + while (NS_SUCCEEDED(rv = entries->HasMoreElements(&hasMore)) && hasMore) { + nsCOMPtr file; + rv = entries->GetNext(getter_AddRefs(file)); + NS_ENSURE_SUCCESS(rv, rv); + + nsCString leafName; + rv = file->GetNativeLeafName(leafName); + NS_ENSURE_SUCCESS(rv, rv); + + nsCString suffix(NS_LITERAL_CSTRING(".sbstore")); + + PRInt32 dot = leafName.RFind(suffix, 0); + if (dot != -1) { + leafName.Cut(dot, suffix.Length()); + aTables.AppendElement(leafName); + } + } + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +Classifier::ActiveTables(nsTArray& aTables) +{ + aTables.Clear(); + + nsTArray foundTables; + ScanStoreDir(foundTables); + + for (uint32 i = 0; i < foundTables.Length(); i++) { + nsAutoPtr store(new HashStore(nsCString(foundTables[i]), mStoreDirectory)); + if (!store) + return NS_ERROR_OUT_OF_MEMORY; + + nsresult rv = store->Open(); + if (NS_FAILED(rv)) + continue; + + LookupCache *lookupCache = GetLookupCache(store->TableName()); + if (!lookupCache) { + continue; + } + + const ChunkSet &adds = store->AddChunks(); + const ChunkSet &subs = store->SubChunks(); + + if (adds.Length() == 0 && subs.Length() == 0) + continue; + + LOG(("Active table: %s", store->TableName().get())); + aTables.AppendElement(store->TableName()); + } + + return NS_OK; +} + +/* + * This will consume+delete updates from the passed nsTArray. +*/ +nsresult +Classifier::ApplyTableUpdates(nsTArray* aUpdates, + const nsACString& aTable) +{ + LOG(("Classifier::ApplyTableUpdates(%s)", + PromiseFlatCString(aTable).get())); + + nsAutoPtr store(new HashStore(aTable, mStoreDirectory)); + + if (!store) + return NS_ERROR_FAILURE; + + // take the quick exit if there is no valid update for us + // (common case) + uint32 validupdates = 0; + + for (uint32 i = 0; i < aUpdates->Length(); i++) { + TableUpdate *update = aUpdates->ElementAt(i); + if (!update || !update->TableName().Equals(store->TableName())) + continue; + if (update->Empty()) { + aUpdates->ElementAt(i) = nsnull; + delete update; + continue; + } + validupdates++; + } + + if (!validupdates) { + return NS_OK; + } + + nsresult rv = store->Open(); + NS_ENSURE_SUCCESS(rv, rv); + rv = store->BeginUpdate(); + NS_ENSURE_SUCCESS(rv, rv); + + // Read the part of the store that is (only) in the cache + LookupCache *prefixSet = GetLookupCache(store->TableName()); + if (!prefixSet) { + return NS_ERROR_FAILURE; + } + nsTArray AddPrefixHashes; + rv = prefixSet->GetPrefixes(&AddPrefixHashes); + NS_ENSURE_SUCCESS(rv, rv); + rv = store->AugmentAdds(AddPrefixHashes); + NS_ENSURE_SUCCESS(rv, rv); + + uint32 applied = 0; + bool updateFreshness = false; + + for (uint32 i = 0; i < aUpdates->Length(); i++) { + TableUpdate *update = aUpdates->ElementAt(i); + if (!update || !update->TableName().Equals(store->TableName())) + continue; + + rv = store->ApplyUpdate(*update); + NS_ENSURE_SUCCESS(rv, rv); + + applied++; + + LOG(("Applied update to table %s:", PromiseFlatCString(store->TableName()).get())); + LOG((" %d add chunks", update->AddChunks().Length())); + LOG((" %d add prefixes", update->AddPrefixes().Length())); + LOG((" %d add completions", update->AddCompletes().Length())); + LOG((" %d sub chunks", update->SubChunks().Length())); + LOG((" %d sub prefixes", update->SubPrefixes().Length())); + LOG((" %d sub completions", update->SubCompletes().Length())); + LOG((" %d add expirations", update->AddExpirations().Length())); + LOG((" %d sub expirations", update->SubExpirations().Length())); + + if (!update->IsLocalUpdate()) { + updateFreshness = true; + LOG(("Remote update, updating freshness")); + } + + aUpdates->ElementAt(i) = nsnull; + delete update; + } + + LOG(("Applied %d update(s) to %s.", applied, PromiseFlatCString(store->TableName()).get())); + + rv = store->Rebuild(); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Table %s now has:", PromiseFlatCString(store->TableName()).get())); + LOG((" %d add chunks", store->AddChunks().Length())); + LOG((" %d add prefixes", store->AddPrefixes().Length())); + LOG((" %d add completions", store->AddCompletes().Length())); + LOG((" %d sub chunks", store->SubChunks().Length())); + LOG((" %d sub prefixes", store->SubPrefixes().Length())); + LOG((" %d sub completions", store->SubCompletes().Length())); + + rv = store->WriteFile(); + NS_ENSURE_SUCCESS(rv, rv); + + // At this point the store is updated and written out to disk, but + // the data is still in memory. Build our quick-lookup table here. + rv = prefixSet->Build(store->AddPrefixes(), store->AddCompletes()); + NS_ENSURE_SUCCESS(rv, rv); +#if defined(DEBUG) && defined(PR_LOGGING) + prefixSet->Dump(); +#endif + prefixSet->WriteFile(); + + // This will drop all the temporary storage used during the update. + rv = store->FinishUpdate(); + NS_ENSURE_SUCCESS(rv, rv); + + if (updateFreshness) { + PRInt64 now = (PR_Now() / PR_USEC_PER_SEC); + LOG(("Successfully updated %s", PromiseFlatCString(store->TableName()).get())); + rv = (mTableFreshness.Put(store->TableName(), now) ? NS_OK : NS_ERROR_FAILURE); + } + + return rv; +} + +LookupCache * +Classifier::GetLookupCache(const nsACString& aTable) +{ + for (uint32 i = 0; i < mLookupCaches.Length(); i++) { + if (mLookupCaches[i]->TableName().Equals(aTable)) { + return mLookupCaches[i]; + } + } + + LookupCache *cache = new LookupCache(aTable, mStoreDirectory); + nsresult rv = cache->Init(); + if (NS_FAILED(rv)) { + return nsnull; + } + rv = cache->Open(); + if (NS_FAILED(rv)) { + if (rv == NS_ERROR_FILE_CORRUPTED) { + Reset(); + } + return nsnull; + } + mLookupCaches.AppendElement(cache); + return cache; +} + +nsresult +Classifier::ReadNoiseEntries(const Prefix& aPrefix, + const nsACString& aTableName, + PRInt32 aCount, + PrefixArray* aNoiseEntries) +{ + LookupCache *cache = GetLookupCache(aTableName); + if (!cache) { + return NS_ERROR_FAILURE; + } + + nsTArray prefixes; + nsresult rv = cache->GetPrefixes(&prefixes); + NS_ENSURE_SUCCESS(rv, rv); + + PRInt32 idx = prefixes.BinaryIndexOf(aPrefix.ToUint32()); + + if (idx == nsTArray::NoIndex) { + NS_WARNING("Could not find prefix in PrefixSet during noise lookup"); + return NS_ERROR_FAILURE; + } + + idx -= idx % aCount; + + for (PRInt32 i = 0; (i < aCount) && ((idx+i) < prefixes.Length()); i++) { + Prefix newPref; + newPref.FromUint32(prefixes[idx+i]); + aNoiseEntries->AppendElement(newPref); + } + + return NS_OK; +} + +} +} diff --git a/toolkit/components/url-classifier/Classifier.h b/toolkit/components/url-classifier/Classifier.h new file mode 100644 index 000000000000..b6eb00a73b67 --- /dev/null +++ b/toolkit/components/url-classifier/Classifier.h @@ -0,0 +1,128 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef Classifier_h__ +#define Classifier_h__ + +#include "Entries.h" +#include "HashStore.h" +#include "ProtocolParser.h" +#include "LookupCache.h" +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsIFile.h" +#include "nsICryptoHash.h" +#include "nsDataHashtable.h" + +namespace mozilla { +namespace safebrowsing { + +/** + * Maintains the stores and LookupCaches for the url classifier. + */ +class Classifier { +public: + Classifier(); + ~Classifier(); + + nsresult Open(nsIFile& aCacheDirectory); + nsresult Close(); + nsresult Reset(); + + /** + * Get the list of active tables and their chunks in a format + * suitable for an update request. + */ + void TableRequest(nsACString& aResult); + + /* + * Get all tables that we know about. + */ + nsresult ActiveTables(nsTArray& aTables); + + /** + * Check a URL against the database. + */ + nsresult Check(const nsACString& aSpec, LookupResultArray& aResults); + + /** + * Apply the table updates in the array. Takes ownership of + * the updates in the array and clears it. Wacky! + */ + nsresult ApplyUpdates(nsTArray* aUpdates); + /** + * Failed update. Spoil the entries so we don't block hosts + * unnecessarily + */ + nsresult MarkSpoiled(nsTArray& aTables); + nsresult CacheCompletions(const CacheResultArray& aResults); + PRUint32 GetHashKey(void) { return mHashKey; }; + void SetFreshTime(PRUint32 aTime) { mFreshTime = aTime; }; + /* + * Get a bunch of extra prefixes to query for completion + * and mask the real entry being requested + */ + nsresult ReadNoiseEntries(const Prefix& aPrefix, + const nsACString& aTableName, + PRInt32 aCount, + PrefixArray* aNoiseEntries); +private: + void DropStores(); + nsresult ScanStoreDir(nsTArray& aTables); + + nsresult ApplyTableUpdates(nsTArray* aUpdates, + const nsACString& aTable); + + LookupCache *GetLookupCache(const nsACString& aTable); + nsresult InitKey(); + + nsCOMPtr mCryptoHash; + nsCOMPtr mStoreDirectory; + nsTArray mHashStores; + nsTArray mLookupCaches; + PRUint32 mHashKey; + // Stores the last time a given table was updated (seconds). + nsDataHashtable mTableFreshness; + PRUint32 mFreshTime; +}; + +} +} + +#endif diff --git a/toolkit/components/url-classifier/Entries.h b/toolkit/components/url-classifier/Entries.h new file mode 100644 index 000000000000..357b0ccec2ce --- /dev/null +++ b/toolkit/components/url-classifier/Entries.h @@ -0,0 +1,335 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +#ifndef SBEntries_h__ +#define SBEntries_h__ + +#include "nsTArray.h" +#include "nsString.h" +#include "nsICryptoHash.h" +#include "nsNetUtil.h" +#include "prlog.h" + +extern PRLogModuleInfo *gUrlClassifierDbServiceLog; +#if defined(PR_LOGGING) +#define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) +#else +#define LOG(args) +#define LOG_ENABLED() (PR_FALSE) +#endif + +#if DEBUG +#include "plbase64.h" +#endif + +namespace mozilla { +namespace safebrowsing { + +#define PREFIX_SIZE 4 +#define COMPLETE_SIZE 32 + +template +struct SafebrowsingHash +{ + static const uint32 sHashSize = S; + typedef SafebrowsingHash self_type; + uint8 buf[S]; + + nsresult FromPlaintext(const nsACString& aPlainText, nsICryptoHash* aHash) { + // From the protocol doc: + // Each entry in the chunk is composed + // of the SHA 256 hash of a suffix/prefix expression. + + nsresult rv = aHash->Init(nsICryptoHash::SHA256); + NS_ENSURE_SUCCESS(rv, rv); + + rv = aHash->Update + (reinterpret_cast(aPlainText.BeginReading()), + aPlainText.Length()); + NS_ENSURE_SUCCESS(rv, rv); + + nsCAutoString hashed; + rv = aHash->Finish(PR_FALSE, hashed); + NS_ENSURE_SUCCESS(rv, rv); + + NS_ASSERTION(hashed.Length() >= sHashSize, + "not enough characters in the hash"); + + memcpy(buf, hashed.BeginReading(), sHashSize); + + return NS_OK; + } + + void Assign(const nsACString& aStr) { + NS_ASSERTION(aStr.Length() >= sHashSize, + "string must be at least sHashSize characters long"); + memcpy(buf, aStr.BeginReading(), sHashSize); + } + + int Compare(const self_type& aOther) const { + return Comparator::Compare(buf, aOther.buf); + } + + bool operator==(const self_type& aOther) const { + return Comparator::Compare(buf, aOther.buf) == 0; + } + + bool operator!=(const self_type& aOther) const { + return Comparator::Compare(buf, aOther.buf) != 0; + } + + bool operator<(const self_type& aOther) const { + return Comparator::Compare(buf, aOther.buf) < 0; + } + +#ifdef DEBUG + void ToString(nsACString& aStr) const { + uint32 len = ((sHashSize + 2) / 3) * 4; + aStr.SetCapacity(len + 1); + PL_Base64Encode((char*)buf, sHashSize, aStr.BeginWriting()); + aStr.BeginWriting()[len] = '\0'; + } +#endif + PRUint32 ToUint32() const { + PRUint32 res = 0; + memcpy(&res, buf, NS_MIN(4, S)); + return res; + } + void FromUint32(PRUint32 aHash) { + memcpy(buf, &aHash, NS_MIN(4, S)); + } +}; + +class PrefixComparator { +public: + static int Compare(const PRUint8* a, const PRUint8* b) { + return *((uint32*)a) - *((uint32*)b); + } +}; +typedef SafebrowsingHash Prefix; +typedef nsTArray PrefixArray; + +class CompletionComparator { +public: + static int Compare(const PRUint8* a, const PRUint8* b) { + return memcmp(a, b, COMPLETE_SIZE); + } +}; +typedef SafebrowsingHash Completion; +typedef nsTArray CompletionArray; + +struct AddPrefix { + Prefix prefix; + uint32 addChunk; + + AddPrefix() : addChunk(0) {} + + uint32 Chunk() const { return addChunk; } + const Prefix &PrefixHash() const { return prefix; } + + template + int Compare(const T& other) const { + int cmp = prefix.Compare(other.PrefixHash()); + if (cmp != 0) { + return cmp; + } + return addChunk - other.addChunk; + } +}; + +struct AddComplete { + union { + Prefix prefix; + Completion complete; + } hash; + uint32 addChunk; + + AddComplete() : addChunk(0) {} + + uint32 Chunk() const { return addChunk; } + const Prefix &PrefixHash() const { return hash.prefix; } + const Completion &CompleteHash() const { return hash.complete; } + + template + int Compare(const T& other) const { + int cmp = hash.complete.Compare(other.CompleteHash()); + if (cmp != 0) { + return cmp; + } + return addChunk - other.addChunk; + } +}; + +struct SubPrefix { + Prefix prefix; + uint32 addChunk; + uint32 subChunk; + + SubPrefix(): addChunk(0), subChunk(0) {} + + uint32 Chunk() const { return subChunk; } + uint32 AddChunk() const { return addChunk; } + const Prefix &PrefixHash() const { return prefix; } + + template + int Compare(const T& aOther) const { + int cmp = prefix.Compare(aOther.PrefixHash()); + if (cmp != 0) + return cmp; + if (addChunk != aOther.addChunk) + return addChunk - aOther.addChunk; + return subChunk - aOther.subChunk; + } + + template + int CompareAlt(const T& aOther) const { + int cmp = prefix.Compare(aOther.PrefixHash()); + if (cmp != 0) + return cmp; + return addChunk - aOther.addChunk; + } +}; + +struct SubComplete { + union { + Prefix prefix; + Completion complete; + } hash; + uint32 addChunk; + uint32 subChunk; + + SubComplete() : addChunk(0), subChunk(0) {} + + uint32 Chunk() const { return subChunk; } + uint32 AddChunk() const { return addChunk; } + const Prefix &PrefixHash() const { return hash.prefix; } + const Completion &CompleteHash() const { return hash.complete; } + + int Compare(const SubComplete& aOther) const { + int cmp = hash.complete.Compare(aOther.hash.complete); + if (cmp != 0) + return cmp; + if (addChunk != aOther.addChunk) + return addChunk - aOther.addChunk; + return subChunk - aOther.subChunk; + } +}; + +typedef nsTArray AddPrefixArray; +typedef nsTArray AddCompleteArray; +typedef nsTArray SubPrefixArray; +typedef nsTArray SubCompleteArray; + +/** + * Compares chunks by their add chunk, then their prefix. + */ +template +class EntryCompare { +public: + typedef T elem_type; + static int Compare(const void* e1, const void* e2, void* data) { + const elem_type* a = static_cast(e1); + const elem_type* b = static_cast(e2); + return a->Compare(*b); + } +}; + +template<> +class EntryCompare { +public: + typedef SubPrefix elem_type; + static int Compare(const void* e1, const void* e2, void* data) { + const elem_type* a = static_cast(e1); + const elem_type* b = static_cast(e2); + return a->Compare(*b); + } +}; + +template<> +class EntryCompare { +public: + typedef SubComplete elem_type; + static int Compare(const void* e1, const void* e2, void* data) { + const elem_type *a = static_cast(e1); + const elem_type *b = static_cast(e2); + return a->Compare(*b); + } +}; + +/** + * Sort an array of store entries. nsTArray::Sort uses Equal/LessThan + * to sort, this does a single Compare so it's a bit quicker over the + * large sorts we do. + */ +template +void +EntrySort(nsTArray& aArray) +{ + NS_QuickSort(aArray.Elements(), aArray.Length(), sizeof(T), + EntryCompare::Compare, 0); +} + +template +nsresult +ReadTArray(nsIInputStream* aStream, nsTArray* aArray, PRUint32 aNumElements) +{ + if (!aArray->SetLength(aNumElements)) + return NS_ERROR_OUT_OF_MEMORY; + + void *buffer = aArray->Elements(); + nsresult rv = NS_ReadInputStreamToBuffer(aStream, &buffer, + (aNumElements * sizeof(T))); + NS_ENSURE_SUCCESS(rv, rv); + return NS_OK; +} + +template +nsresult +WriteTArray(nsIOutputStream* aStream, nsTArray& aArray) +{ + PRUint32 written; + return aStream->Write(reinterpret_cast(aArray.Elements()), + aArray.Length() * sizeof(T), + &written); +} + +} +} +#endif diff --git a/toolkit/components/url-classifier/HashStore.cpp b/toolkit/components/url-classifier/HashStore.cpp new file mode 100644 index 000000000000..234b4c12bc55 --- /dev/null +++ b/toolkit/components/url-classifier/HashStore.cpp @@ -0,0 +1,950 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +// Originally based on Chrome sources: +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#include "HashStore.h" +#include "nsAutoPtr.h" +#include "nsICryptoHash.h" +#include "nsISeekableStream.h" +#include "nsIStreamConverterService.h" +#include "nsNetUtil.h" +#include "nsCheckSummedOutputStream.h" +#include "prlog.h" +#include "zlib.h" + +// Main store for SafeBrowsing protocol data. We store +// known add/sub chunks, prefixe and completions s in memory +// during an update, and serialize to disk. +// We do not store the add prefixes, those are retrieved by +// decompressing the PrefixSet cache whenever we need to apply +// an update. + +// Data format: +// uint32 magic +// uint32 version +// uint32 numAddChunks +// uint32 numSubChunks +// uint32 numAddPrefixes +// uint32 numSubPrefixes +// uint32 numAddCompletes +// uint32 numSubCompletes +// 0...numAddChunks uint32 addChunk +// 0...numSubChunks uint32 subChunk +// uint32 compressed-size +// compressed-size bytes zlib inflate data +// 0...numAddPrefixes uint32 addChunk +// uint32 compressed-size +// compressed-size bytes zlib inflate data +// 0...numSubPrefixes uint32 addChunk +// uint32 compressed-size +// compressed-size bytes zlib inflate data +// 0...numSubPrefixes uint32 subChunk +// 0...numSubPrefixes uint32 subPrefix +// 0...numAddCompletes 32-byte Completions +// 0...numSubCompletes 32-byte Completions +// 16-byte MD5 of all preceding data + +// NSPR_LOG_MODULES=UrlClassifierDbService:5 +extern PRLogModuleInfo *gUrlClassifierDbServiceLog; +#if defined(PR_LOGGING) +#define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) +#else +#define LOG(args) +#define LOG_ENABLED() (PR_FALSE) +#endif + +namespace mozilla { +namespace safebrowsing { + +const uint32 STORE_MAGIC = 0x1231af3b; +const uint32 CURRENT_VERSION = 1; + +void +TableUpdate::NewAddPrefix(PRUint32 aAddChunk, const Prefix& aHash) +{ + AddPrefix *add = mAddPrefixes.AppendElement(); + add->addChunk = aAddChunk; + add->prefix = aHash; +} + +void +TableUpdate::NewSubPrefix(PRUint32 aAddChunk, const Prefix& aHash, PRUint32 aSubChunk) +{ + SubPrefix *sub = mSubPrefixes.AppendElement(); + sub->addChunk = aAddChunk; + sub->prefix = aHash; + sub->subChunk = aSubChunk; +} + +void +TableUpdate::NewAddComplete(PRUint32 aAddChunk, const Completion& aHash) +{ + AddComplete *add = mAddCompletes.AppendElement(); + add->addChunk = aAddChunk; + add->hash.complete = aHash; +} + +void +TableUpdate::NewSubComplete(PRUint32 aAddChunk, const Completion& aHash, PRUint32 aSubChunk) +{ + SubComplete *sub = mSubCompletes.AppendElement(); + sub->addChunk = aAddChunk; + sub->hash.complete = aHash; + sub->subChunk = aSubChunk; +} + + +HashStore::HashStore(const nsACString& aTableName, nsIFile* aStoreDir) + : mTableName(aTableName) + , mStoreDirectory(aStoreDir) + , mInUpdate(false) +{ +} + +HashStore::~HashStore() +{ +} + +nsresult +HashStore::Reset() +{ + LOG(("HashStore resetting")); + + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore")); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->Remove(PR_FALSE); + NS_ENSURE_SUCCESS(rv, rv); + + Clear(); + + return NS_OK; +} + +nsresult +HashStore::CheckChecksum(nsIFile* aStoreFile) +{ + // Check for file corruption by + // comparing the stored checksum to actual checksum of data + nsCAutoString hash; + nsCAutoString compareHash; + char *data; + PRUint32 read; + + PRInt64 fileSize; + nsresult rv = aStoreFile->GetFileSize(&fileSize); + NS_ENSURE_SUCCESS(rv, rv); + + if (fileSize < 0) { + return NS_ERROR_FAILURE; + } + + rv = CalculateChecksum(hash, true); + NS_ENSURE_SUCCESS(rv, rv); + + compareHash.GetMutableData(&data, hash.Length()); + + nsCOMPtr seekIn = do_QueryInterface(mInputStream); + rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, fileSize-hash.Length()); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mInputStream->Read(data, hash.Length(), &read); + NS_ENSURE_SUCCESS(rv, rv); + NS_ASSERTION(read == hash.Length(), "Could not read hash bytes"); + + if (!hash.Equals(compareHash)) { + NS_WARNING("Safebrowing file failed checksum."); + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +HashStore::Open() +{ + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore")); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr origStream; + rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile, + PR_RDONLY); + + if (NS_FAILED(rv) && rv != NS_ERROR_FILE_NOT_FOUND) { + Reset(); + return rv; + } + + if (rv == NS_ERROR_FILE_NOT_FOUND) { + Clear(); + UpdateHeader(); + return NS_OK; + } + + rv = NS_NewBufferedInputStream(getter_AddRefs(mInputStream), origStream, + BUFFER_SIZE); + NS_ENSURE_SUCCESS(rv, rv); + + rv = CheckChecksum(storeFile); + if (NS_FAILED(rv)) { + Reset(); + return rv; + } + + rv = ReadHeader(); + if (NS_FAILED(rv)) { + Reset(); + return rv; + } + + rv = SanityCheck(storeFile); + if (NS_FAILED(rv)) { + NS_WARNING("Safebrowsing file failed sanity check. probably out of date."); + Reset(); + return rv; + } + + rv = ReadChunkNumbers(); + if (NS_FAILED(rv)) { + Reset(); + return rv; + } + + return NS_OK; +} + +void +HashStore::Clear() +{ + mAddChunks.Clear(); + mSubChunks.Clear(); + mAddExpirations.Clear(); + mSubExpirations.Clear(); + mAddPrefixes.Clear(); + mSubPrefixes.Clear(); + mAddCompletes.Clear(); + mSubCompletes.Clear(); +} + +nsresult +HashStore::ReadEntireStore() +{ + Clear(); + + nsresult rv = ReadHeader(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadChunkNumbers(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadHashes(); + if (NS_FAILED(rv)) { + // we are the only one reading this so it's up to us to detect corruption + Reset(); + } + + return rv; +} + +nsresult +HashStore::ReadHeader() +{ + if (!mInputStream) { + Clear(); + UpdateHeader(); + return NS_OK; + } + + nsCOMPtr seekable = do_QueryInterface(mInputStream); + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(rv, rv); + + void *buffer = &mHeader; + rv = NS_ReadInputStreamToBuffer(mInputStream, + &buffer, + sizeof(Header)); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +HashStore::SanityCheck(nsIFile *storeFile) +{ + if (mHeader.magic != STORE_MAGIC || mHeader.version != CURRENT_VERSION) { + NS_WARNING("Unexpected header data in the store."); + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +HashStore::CalculateChecksum(nsCAutoString& aChecksum, bool aChecksumPresent) +{ + aChecksum.Truncate(); + + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore")); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr hashStream; + + rv = NS_NewLocalFileInputStream(getter_AddRefs(hashStream), storeFile, + PR_RDONLY); + + if (NS_FAILED(rv) && rv != NS_ERROR_FILE_NOT_FOUND) { + Reset(); + return rv; + } + + PRInt64 fileSize; + rv = storeFile->GetFileSize(&fileSize); + NS_ENSURE_SUCCESS(rv, rv); + + if (fileSize < 0) { + return NS_ERROR_FAILURE; + } + + nsCOMPtr hash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + // Size of MD5 hash in bytes + const uint32 CHECKSUM_SIZE = 16; + + rv = hash->Init(nsICryptoHash::MD5); + NS_ENSURE_SUCCESS(rv, rv); + + if (!aChecksumPresent) { + // Hash entire file + rv = hash->UpdateFromStream(hashStream, PR_UINT32_MAX); + } else { + // Hash everything but last checksum bytes + rv = hash->UpdateFromStream(hashStream, fileSize-CHECKSUM_SIZE); + } + NS_ENSURE_SUCCESS(rv, rv); + + rv = hash->Finish(PR_FALSE, aChecksum); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +void +HashStore::UpdateHeader() +{ + mHeader.magic = STORE_MAGIC; + mHeader.version = CURRENT_VERSION; + + mHeader.numAddChunks = mAddChunks.Length(); + mHeader.numSubChunks = mSubChunks.Length(); + mHeader.numAddPrefixes = mAddPrefixes.Length(); + mHeader.numSubPrefixes = mSubPrefixes.Length(); + mHeader.numAddCompletes = mAddCompletes.Length(); + mHeader.numSubCompletes = mSubCompletes.Length(); +} + +nsresult +HashStore::ReadChunkNumbers() +{ + if (!mInputStream) { + LOG(("Clearing.")); + Clear(); + return NS_OK; + } + + nsCOMPtr seekable = do_QueryInterface(mInputStream); + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, + sizeof(Header)); + + rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks); + NS_ENSURE_SUCCESS(rv, rv); + NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks."); + + rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks); + NS_ENSURE_SUCCESS(rv, rv); + NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks."); + + return NS_OK; +} + +nsresult +HashStore::ReadHashes() +{ + if (!mInputStream) { + return NS_OK; + } + + nsCOMPtr seekable = do_QueryInterface(mInputStream); + + uint32 offset = sizeof(Header); + offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32); + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset); + + rv = ReadAddPrefixes(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadSubPrefixes(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +HashStore::BeginUpdate() +{ + mInUpdate = true; + + nsresult rv = ReadEntireStore(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +template +static nsresult +Merge(ChunkSet* aStoreChunks, + nsTArray* aStorePrefixes, + ChunkSet& aUpdateChunks, + nsTArray& aUpdatePrefixes) +{ + EntrySort(aUpdatePrefixes); + + T* updateIter = aUpdatePrefixes.Elements(); + T* updateEnd = aUpdatePrefixes.Elements() + aUpdatePrefixes.Length(); + + T* storeIter = aStorePrefixes->Elements(); + T* storeEnd = aStorePrefixes->Elements() + aStorePrefixes->Length(); + + // use a separate array so we can keep the iterators valid + // if the nsTArray grows + nsTArray adds; + + for (; updateIter != updateEnd; updateIter++) { + // XXX: binary search for insertion point might be faster in common + // case? + while (storeIter < storeEnd && (storeIter->Compare(*updateIter) < 0)) { + // skip forward to matching element (or not...) + storeIter++; + } + // no match, add + if (storeIter == storeEnd + || storeIter->Compare(*updateIter) != 0) { + if (!adds.AppendElement(*updateIter)) + return NS_ERROR_OUT_OF_MEMORY; + } + } + + // chunks can be empty, but we should still report we have them + // to make the chunkranges continuous + aStoreChunks->Merge(aUpdateChunks); + + aStorePrefixes->AppendElements(adds); + EntrySort(*aStorePrefixes); + + return NS_OK; +} + +nsresult +HashStore::ApplyUpdate(TableUpdate &update) +{ + nsresult rv = mAddExpirations.Merge(update.AddExpirations()); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mSubExpirations.Merge(update.SubExpirations()); + NS_ENSURE_SUCCESS(rv, rv); + + rv = Expire(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = Merge(&mAddChunks, &mAddPrefixes, + update.AddChunks(), update.AddPrefixes()); + NS_ENSURE_SUCCESS(rv, rv); + + rv = Merge(&mAddChunks, &mAddCompletes, + update.AddChunks(), update.AddCompletes()); + NS_ENSURE_SUCCESS(rv, rv); + + rv = Merge(&mSubChunks, &mSubPrefixes, + update.SubChunks(), update.SubPrefixes()); + NS_ENSURE_SUCCESS(rv, rv); + + rv = Merge(&mSubChunks, &mSubCompletes, + update.SubChunks(), update.SubCompletes()); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +HashStore::Rebuild() +{ + NS_ASSERTION(mInUpdate, "Must be in update to rebuild."); + + nsresult rv = ProcessSubs(); + NS_ENSURE_SUCCESS(rv, rv); + + UpdateHeader(); + + return NS_OK; +} + +template +static void +ExpireEntries(nsTArray* aEntries, ChunkSet& aExpirations) +{ + T* addIter = aEntries->Elements(); + T* end = aEntries->Elements() + aEntries->Length(); + + for (T *iter = addIter; iter != end; iter++) { + if (!aExpirations.Has(iter->Chunk())) { + *addIter = *iter; + addIter++; + } + } + + aEntries->SetLength(addIter - aEntries->Elements()); +} + +nsresult +HashStore::Expire() +{ + ExpireEntries(&mAddPrefixes, mAddExpirations); + ExpireEntries(&mAddCompletes, mAddExpirations); + ExpireEntries(&mSubPrefixes, mSubExpirations); + ExpireEntries(&mSubCompletes, mSubExpirations); + + mAddChunks.Remove(mAddExpirations); + mSubChunks.Remove(mSubExpirations); + + mAddExpirations.Clear(); + mSubExpirations.Clear(); + + return NS_OK; +} + +template +nsresult DeflateWriteTArray(nsIOutputStream* aStream, nsTArray& aIn) +{ + uLongf insize = aIn.Length() * sizeof(T); + uLongf outsize = compressBound(insize); + nsTArray outBuff; + outBuff.SetLength(outsize); + + int zerr = compress(reinterpret_cast(outBuff.Elements()), + &outsize, + reinterpret_cast(aIn.Elements()), + insize); + if (zerr != Z_OK) { + return NS_ERROR_FAILURE; + } + LOG(("DeflateWriteTArray: %d in %d out", insize, outsize)); + + outBuff.TruncateLength(outsize); + + // Length of compressed data stream + PRUint32 dataLen = outBuff.Length(); + PRUint32 written; + nsresult rv = aStream->Write(reinterpret_cast(&dataLen), sizeof(dataLen), &written); + NS_ENSURE_SUCCESS(rv, rv); + + NS_ASSERTION(written == sizeof(dataLen), "Error writing deflate length"); + + // Store to stream + rv = WriteTArray(aStream, outBuff); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +template +nsresult InflateReadTArray(nsIInputStream* aStream, nsTArray* aOut, + PRUint32 aExpectedSize) +{ + + PRUint32 inLen; + PRUint32 read; + nsresult rv = aStream->Read(reinterpret_cast(&inLen), sizeof(inLen), &read); + NS_ENSURE_SUCCESS(rv, rv); + + NS_ASSERTION(read == sizeof(inLen), "Error reading inflate length"); + + nsTArray inBuff; + inBuff.SetLength(inLen); + + rv = ReadTArray(aStream, &inBuff, inLen); + NS_ENSURE_SUCCESS(rv, rv); + + uLongf insize = inLen; + uLongf outsize = aExpectedSize * sizeof(T); + aOut->SetLength(aExpectedSize); + + int zerr = uncompress(reinterpret_cast(aOut->Elements()), + &outsize, + reinterpret_cast(inBuff.Elements()), + insize); + if (zerr != Z_OK) { + return NS_ERROR_FAILURE; + } + LOG(("InflateReadTArray: %d in %d out", insize, outsize)); + + NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch"); + + return NS_OK; +} + +nsresult +HashStore::ReadAddPrefixes() +{ + nsTArray chunks; + PRUint32 count = mHeader.numAddPrefixes; + + nsresult rv = InflateReadTArray(mInputStream, &chunks, count); + NS_ENSURE_SUCCESS(rv, rv); + + mAddPrefixes.SetCapacity(count); + for (uint32 i = 0; i < count; i++) { + AddPrefix *add = mAddPrefixes.AppendElement(); + add->prefix.FromUint32(0); + add->addChunk = chunks[i]; + } + + return NS_OK; +} + +nsresult +HashStore::ReadSubPrefixes() +{ + nsTArray addchunks; + nsTArray subchunks; + nsTArray prefixes; + PRUint32 count = mHeader.numSubPrefixes; + + nsresult rv = InflateReadTArray(mInputStream, &addchunks, count); + NS_ENSURE_SUCCESS(rv, rv); + + rv = InflateReadTArray(mInputStream, &subchunks, count); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadTArray(mInputStream, &prefixes, count); + NS_ENSURE_SUCCESS(rv, rv); + + mSubPrefixes.SetCapacity(count); + for (uint32 i = 0; i < count; i++) { + SubPrefix *sub = mSubPrefixes.AppendElement(); + sub->addChunk = addchunks[i]; + sub->prefix = prefixes[i]; + sub->subChunk = subchunks[i]; + } + + return NS_OK; +} + +// Split up PrefixArray back into the constituents +nsresult +HashStore::WriteAddPrefixes(nsIOutputStream* aOut) +{ + nsTArray chunks; + PRUint32 count = mAddPrefixes.Length(); + chunks.SetCapacity(count); + + for (uint32 i = 0; i < count; i++) { + chunks.AppendElement(mAddPrefixes[i].Chunk()); + } + + nsresult rv = DeflateWriteTArray(aOut, chunks); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +HashStore::WriteSubPrefixes(nsIOutputStream* aOut) +{ + nsTArray addchunks; + nsTArray subchunks; + nsTArray prefixes; + PRUint32 count = mSubPrefixes.Length(); + addchunks.SetCapacity(count); + subchunks.SetCapacity(count); + prefixes.SetCapacity(count); + + for (uint32 i = 0; i < count; i++) { + addchunks.AppendElement(mSubPrefixes[i].AddChunk()); + prefixes.AppendElement(mSubPrefixes[i].PrefixHash()); + subchunks.AppendElement(mSubPrefixes[i].Chunk()); + } + + nsresult rv = DeflateWriteTArray(aOut, addchunks); + NS_ENSURE_SUCCESS(rv, rv); + + rv = DeflateWriteTArray(aOut, subchunks); + NS_ENSURE_SUCCESS(rv, rv); + + // chunk-ordered prefixes are not compressible + rv = WriteTArray(aOut, prefixes); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +HashStore::WriteFile() +{ + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore")); + NS_ENSURE_SUCCESS(rv, rv); + + // Need to close the inputstream here *before* rewriting its file. + // Windows will fail with an access violation if we don't. + if (mInputStream) { + rv = mInputStream->Close(); + NS_ENSURE_SUCCESS(rv, rv); + } + + nsCOMPtr out; + rv = NS_NewCheckSummedOutputStream(getter_AddRefs(out), storeFile, + PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE); + NS_ENSURE_SUCCESS(rv, rv); + + PRUint32 written; + rv = out->Write(reinterpret_cast(&mHeader), sizeof(mHeader), &written); + NS_ENSURE_SUCCESS(rv, rv); + + // Write chunk numbers... + rv = mAddChunks.Write(out); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mSubChunks.Write(out); + NS_ENSURE_SUCCESS(rv, rv); + + // Write hashes.. + rv = WriteAddPrefixes(out); + NS_ENSURE_SUCCESS(rv, rv); + + rv = WriteSubPrefixes(out); + NS_ENSURE_SUCCESS(rv, rv); + + rv = WriteTArray(out, mAddCompletes); + NS_ENSURE_SUCCESS(rv, rv); + + rv = WriteTArray(out, mSubCompletes); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr safeOut = do_QueryInterface(out, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + rv = safeOut->Finish(); + NS_ENSURE_SUCCESS(rv, rv); + + // Reopen the file now that we've rewritten it. + nsCOMPtr origStream; + rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile, + PR_RDONLY); + NS_ENSURE_SUCCESS(rv, rv); + + rv = NS_NewBufferedInputStream(getter_AddRefs(mInputStream), origStream, + BUFFER_SIZE); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +HashStore::FinishUpdate() +{ + // Drop add/sub data, it's only used during updates. + mAddPrefixes.Clear(); + mSubPrefixes.Clear(); + mAddCompletes.Clear(); + mSubCompletes.Clear(); + + return NS_OK; +} + +template +static void +Erase(nsTArray* array, T* iterStart, T* iterEnd) +{ + uint32 start = iterStart - array->Elements(); + uint32 count = iterEnd - iterStart; + + if (count > 0) { + array->RemoveElementsAt(start, count); + } +} + +// Find items matching between |subs| and |adds|, and remove them, +// recording the item from |adds| in |adds_removed|. To minimize +// copies, the inputs are processing in parallel, so |subs| and |adds| +// should be compatibly ordered (either by SBAddPrefixLess or +// SBAddPrefixHashLess). +// +// |predAS| provides add < sub, |predSA| provides sub < add, for the +// tightest compare appropriate (see calls in SBProcessSubs). +template +static void +KnockoutSubs(nsTArray* aSubs, nsTArray* aAdds) +{ + // Keep a pair of output iterators for writing kept items. Due to + // deletions, these may lag the main iterators. Using erase() on + // individual items would result in O(N^2) copies. Using a list + // would work around that, at double or triple the memory cost. + TAdd* addOut = aAdds->Elements(); + TAdd* addIter = aAdds->Elements(); + + TSub* subOut = aSubs->Elements(); + TSub* subIter = aSubs->Elements(); + + TAdd* addEnd = addIter + aAdds->Length(); + TSub* subEnd = subIter + aSubs->Length(); + + while (addIter != addEnd && subIter != subEnd) { + // additer compare, so it compares on add chunk + int32 cmp = addIter->Compare(*subIter); + if (cmp > 0) { + // If |*sub_iter| < |*add_iter|, retain the sub. + *subOut = *subIter; + ++subOut; + ++subIter; + } else if (cmp < 0) { + // If |*add_iter| < |*sub_iter|, retain the add. + *addOut = *addIter; + ++addOut; + ++addIter; + } else { + // Drop equal items + ++addIter; + ++subIter; + } + } + + Erase(aAdds, addOut, addIter); + Erase(aSubs, subOut, subIter); +} + +// Remove items in |removes| from |fullHashes|. |fullHashes| and +// |removes| should be ordered by SBAddPrefix component. +template +static void +RemoveMatchingPrefixes(const SubPrefixArray& aSubs, nsTArray* aFullHashes) +{ + // Where to store kept items. + T* out = aFullHashes->Elements(); + T* hashIter = out; + T* hashEnd = aFullHashes->Elements() + aFullHashes->Length(); + + SubPrefix const * removeIter = aSubs.Elements(); + SubPrefix const * removeEnd = aSubs.Elements() + aSubs.Length(); + + while (hashIter != hashEnd && removeIter != removeEnd) { + int32 cmp = removeIter->CompareAlt(*hashIter); + if (cmp > 0) { + // Keep items less than |*removeIter|. + *out = *hashIter; + ++out; + ++hashIter; + } else if (cmp < 0) { + // No hit for |*removeIter|, bump it forward. + ++removeIter; + } else { + // Drop equal items, there may be multiple hits. + do { + ++hashIter; + } while (hashIter != hashEnd && + !(removeIter->CompareAlt(*hashIter) < 0)); + ++removeIter; + } + } + Erase(aFullHashes, out, hashIter); +} + +nsresult +HashStore::ProcessSubs() +{ + EntrySort(mAddPrefixes); + EntrySort(mSubPrefixes); + EntrySort(mAddCompletes); + EntrySort(mSubCompletes); + + KnockoutSubs(&mSubPrefixes, &mAddPrefixes); + + RemoveMatchingPrefixes(mSubPrefixes, &mAddCompletes); + RemoveMatchingPrefixes(mSubPrefixes, &mSubCompletes); + + KnockoutSubs(&mSubCompletes, &mAddCompletes); + + // Clean up temporary subs used for knocking out completes + ChunkSet dummyChunks; + dummyChunks.Set(0); + ExpireEntries(&mSubPrefixes, dummyChunks); + ExpireEntries(&mSubCompletes, dummyChunks); + mSubChunks.Remove(dummyChunks); + + return NS_OK; +} + +nsresult +HashStore::AugmentAdds(const nsTArray& aPrefixes) +{ + uint32 cnt = aPrefixes.Length(); + if (cnt != mAddPrefixes.Length()) { + LOG(("Amount of prefixes in cache not consistent with store (%d vs %d)", + aPrefixes.Length(), mAddPrefixes.Length())); + return NS_ERROR_FAILURE; + } + for (uint32 i = 0; i < cnt; i++) { + mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]); + } + return NS_OK; +} + +} +} diff --git a/toolkit/components/url-classifier/HashStore.h b/toolkit/components/url-classifier/HashStore.h new file mode 100644 index 000000000000..bf76984eea0b --- /dev/null +++ b/toolkit/components/url-classifier/HashStore.h @@ -0,0 +1,213 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef HashStore_h__ +#define HashStore_h__ + +#include "Entries.h" +#include "ChunkSet.h" + +#include "nsString.h" +#include "nsTArray.h" +#include "nsIFile.h" +#include "nsIFileStreams.h" +#include "nsCOMPtr.h" + +namespace mozilla { +namespace safebrowsing { + +class TableUpdate { +public: + TableUpdate(const nsACString& aTable) + : mTable(aTable), mLocalUpdate(false) {} + const nsCString& TableName() const { return mTable; } + + bool Empty() const { + return mAddChunks.Length() == 0 && + mSubChunks.Length() == 0 && + mAddExpirations.Length() == 0 && + mSubExpirations.Length() == 0 && + mAddPrefixes.Length() == 0 && + mSubPrefixes.Length() == 0 && + mAddCompletes.Length() == 0 && + mSubCompletes.Length() == 0; + } + + void NewAddChunk(PRUint32 aChunk) { mAddChunks.Set(aChunk); } + void NewSubChunk(PRUint32 aChunk) { mSubChunks.Set(aChunk); } + + void NewAddExpiration(PRUint32 aChunk) { mAddExpirations.Set(aChunk); } + void NewSubExpiration(PRUint32 aChunk) { mSubExpirations.Set(aChunk); } + + void NewAddPrefix(PRUint32 aAddChunk, const Prefix& aPrefix); + void NewSubPrefix(PRUint32 aAddChunk, const Prefix& aPprefix, PRUint32 aSubChunk); + void NewAddComplete(PRUint32 aChunk, const Completion& aCompletion); + void NewSubComplete(PRUint32 aAddChunk, const Completion& aCompletion, + PRUint32 aSubChunk); + void SetLocalUpdate(void) { mLocalUpdate = true; }; + bool IsLocalUpdate(void) { return mLocalUpdate; }; + + ChunkSet& AddChunks() { return mAddChunks; } + ChunkSet& SubChunks() { return mSubChunks; } + + ChunkSet& AddExpirations() { return mAddExpirations; } + ChunkSet& SubExpirations() { return mSubExpirations; } + + AddPrefixArray& AddPrefixes() { return mAddPrefixes; } + SubPrefixArray& SubPrefixes() { return mSubPrefixes; } + AddCompleteArray& AddCompletes() { return mAddCompletes; } + SubCompleteArray& SubCompletes() { return mSubCompletes; } + +private: + nsCString mTable; + // Update not from the remote server (no freshness) + bool mLocalUpdate; + + ChunkSet mAddChunks; + ChunkSet mSubChunks; + ChunkSet mAddExpirations; + ChunkSet mSubExpirations; + AddPrefixArray mAddPrefixes; + SubPrefixArray mSubPrefixes; + AddCompleteArray mAddCompletes; + SubCompleteArray mSubCompletes; +}; + +class HashStore { +public: + HashStore(const nsACString& aTableName, nsIFile* aStoreFile); + ~HashStore(); + + const nsCString& TableName() const { return mTableName; }; + + nsresult Open(); + nsresult AugmentAdds(const nsTArray& aPrefixes); + + ChunkSet& AddChunks() { return mAddChunks; } + ChunkSet& SubChunks() { return mSubChunks; } + const AddPrefixArray& AddPrefixes() const { return mAddPrefixes; } + const AddCompleteArray& AddCompletes() const { return mAddCompletes; } + const SubPrefixArray& SubPrefixes() const { return mSubPrefixes; } + const SubCompleteArray& SubCompletes() const { return mSubCompletes; } + + // ======= + // Updates + // ======= + // Begin the update process. Reads the store into memory. + nsresult BeginUpdate(); + + // Imports the data from a TableUpdate. + nsresult ApplyUpdate(TableUpdate &aUpdate); + + // Process expired chunks + nsresult Expire(); + + // Rebuild the store, Incorporating all the applied updates. + nsresult Rebuild(); + + // Write the current state of the store to disk. + // If you call between ApplyUpdate() and Rebuild(), you'll + // have a mess on your hands. + nsresult WriteFile(); + + // Drop memory used during the update process. + nsresult FinishUpdate(); + + // Force the entire store in memory + nsresult ReadEntireStore(); + +private: + static const int BUFFER_SIZE = 6 * 1024 * 1024; + + void Clear(); + nsresult Reset(); + + nsresult ReadHeader(); + nsresult SanityCheck(nsIFile* aStoreFile); + nsresult CalculateChecksum(nsCAutoString& aChecksum, bool aChecksumPresent); + nsresult CheckChecksum(nsIFile* aStoreFile); + void UpdateHeader(); + + nsresult EnsureChunkNumbers(); + nsresult ReadChunkNumbers(); + nsresult ReadHashes(); + nsresult ReadAddPrefixes(); + nsresult ReadSubPrefixes(); + + nsresult WriteAddPrefixes(nsIOutputStream* aOut); + nsresult WriteSubPrefixes(nsIOutputStream* aOut); + + nsresult ProcessSubs(); + + struct Header { + uint32 magic; + uint32 version; + uint32 numAddChunks; + uint32 numSubChunks; + uint32 numAddPrefixes; + uint32 numSubPrefixes; + uint32 numAddCompletes; + uint32 numSubCompletes; + }; + + Header mHeader; + + nsCString mTableName; + nsCOMPtr mStoreDirectory; + + bool mInUpdate; + + nsCOMPtr mInputStream; + + bool haveChunks; + ChunkSet mAddChunks; + ChunkSet mSubChunks; + + ChunkSet mAddExpirations; + ChunkSet mSubExpirations; + + AddPrefixArray mAddPrefixes; + AddCompleteArray mAddCompletes; + SubPrefixArray mSubPrefixes; + SubCompleteArray mSubCompletes; +}; + +} +} + +#endif diff --git a/toolkit/components/url-classifier/LookupCache.cpp b/toolkit/components/url-classifier/LookupCache.cpp new file mode 100644 index 000000000000..22ad00281922 --- /dev/null +++ b/toolkit/components/url-classifier/LookupCache.cpp @@ -0,0 +1,776 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "LookupCache.h" +#include "HashStore.h" +#include "nsISeekableStream.h" +#include "mozilla/Telemetry.h" +#include "prlog.h" +#include "prprf.h" + +// We act as the main entry point for all the real lookups, +// so note that those are not done to the actual HashStore. +// The latter solely exists to store the data needed to handle +// the updates from the protocol. + +// This module has its own store, which stores the Completions, +// mostly caching lookups that have happened over the net. +// The prefixes are cached/checked by looking them up in the +// PrefixSet. + +// Data format for the ".cache" files: +// uint32 magic Identify the file type +// uint32 version Version identifier for file format +// uint32 numCompletions Amount of completions stored +// 0...numCompletions 256-bit Completions + +// Name of the lookupcomplete cache +#define CACHE_SUFFIX ".cache" + +// Name of the persistent PrefixSet storage +#define PREFIXSET_SUFFIX ".pset" + +// NSPR_LOG_MODULES=UrlClassifierDbService:5 +extern PRLogModuleInfo *gUrlClassifierDbServiceLog; +#if defined(PR_LOGGING) +#define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) +#else +#define LOG(args) +#define LOG_ENABLED() (false) +#endif + +namespace mozilla { +namespace safebrowsing { + +const uint32 LOOKUPCACHE_MAGIC = 0x1231af3e; +const uint32 CURRENT_VERSION = 1; + +LookupCache::LookupCache(const nsACString& aTableName, nsIFile* aStoreDir) + : mPrimed(false) + , mTableName(aTableName) + , mStoreDirectory(aStoreDir) +{ +} + +nsresult +LookupCache::Init() +{ + mPrefixSet = new nsUrlClassifierPrefixSet(); + nsresult rv = mPrefixSet->Init(mTableName); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +LookupCache::~LookupCache() +{ +} + +nsresult +LookupCache::Open() +{ + nsCOMPtr storeFile; + + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), storeFile, + PR_RDONLY); + + if (NS_FAILED(rv) && rv != NS_ERROR_FILE_NOT_FOUND) { + Reset(); + return rv; + } + + if (rv == NS_ERROR_FILE_NOT_FOUND) { + Clear(); + UpdateHeader(); + return NS_OK; + } + + rv = ReadHeader(); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("ReadCompletions")); + rv = ReadCompletions(); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Loading PrefixSet")); + rv = LoadPrefixSet(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +LookupCache::Reset() +{ + LOG(("LookupCache resetting")); + + nsCOMPtr storeFile; + nsCOMPtr prefixsetFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = storeFile->Remove(false); + NS_ENSURE_SUCCESS(rv, rv); + rv = prefixsetFile->Remove(false); + NS_ENSURE_SUCCESS(rv, rv); + + Clear(); + + return NS_OK; +} + + +nsresult +LookupCache::Build(const AddPrefixArray& aAddPrefixes, + const AddCompleteArray& aAddCompletes) +{ + mCompletions.Clear(); + mCompletions.SetCapacity(aAddCompletes.Length()); + for (uint32 i = 0; i < aAddCompletes.Length(); i++) { + mCompletions.AppendElement(aAddCompletes[i].CompleteHash()); + } + mCompletions.Sort(); + + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS, + static_cast(mCompletions.Length())); + + nsresult rv = ConstructPrefixSet(aAddPrefixes); + NS_ENSURE_SUCCESS(rv, rv); + mPrimed = true; + + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES, + static_cast(aAddPrefixes.Length())); + + return NS_OK; +} + +#if defined(DEBUG) && defined(PR_LOGGING) +void +LookupCache::Dump() +{ + if (!LOG_ENABLED()) + return; + + for (uint32 i = 0; i < mCompletions.Length(); i++) { + nsCAutoString str; + mCompletions[i].ToString(str); + LOG(("Completion: %s", str.get())); + } +} +#endif + +nsresult +LookupCache::Has(const Completion& aCompletion, + const Completion& aHostkey, + const PRUint32 aHashKey, + bool* aHas, bool* aComplete, + Prefix* aOrigPrefix) +{ + *aHas = *aComplete = false; + + // check completion store first + if (mCompletions.BinaryIndexOf(aCompletion) != nsTArray::NoIndex) { + LOG(("Complete in %s", mTableName.get())); + *aComplete = true; + *aHas = true; + return NS_OK; + } + + PRUint32 prefix = aCompletion.ToUint32(); + PRUint32 hostkey = aHostkey.ToUint32(); + PRUint32 codedkey; + nsresult rv = KeyedHash(prefix, hostkey, aHashKey, &codedkey); + NS_ENSURE_SUCCESS(rv, rv); + + Prefix codedPrefix; + codedPrefix.FromUint32(codedkey); + *aOrigPrefix = codedPrefix; + + bool ready = true; + bool found; + rv = mPrefixSet->Probe(codedkey, &ready, &found); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Probe in %s: %X, ready: %d found %d", mTableName.get(), prefix, ready, found)); + + if (found) { + *aHas = true; + } + + return NS_OK; +} + +nsresult +LookupCache::WriteFile() +{ + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr out; + rv = NS_NewSafeLocalFileOutputStream(getter_AddRefs(out), storeFile, + PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE); + NS_ENSURE_SUCCESS(rv, rv); + + UpdateHeader(); + LOG(("Writing %d completions", mHeader.numCompletions)); + + PRUint32 written; + rv = out->Write(reinterpret_cast(&mHeader), sizeof(mHeader), &written); + NS_ENSURE_SUCCESS(rv, rv); + + rv = WriteTArray(out, mCompletions); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr safeOut = do_QueryInterface(out); + rv = safeOut->Finish(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = EnsureSizeConsistent(); + NS_ENSURE_SUCCESS(rv, rv); + + // Reopen the file now that we've rewritten it. + rv = NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), storeFile, + PR_RDONLY); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr psFile; + rv = mStoreDirectory->Clone(getter_AddRefs(psFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mPrefixSet->StoreToFile(psFile); + NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "failed to store the prefixset"); + + return NS_OK; +} + +void +LookupCache::Clear() +{ + mCompletions.Clear(); + mPrefixSet->SetPrefixes(nsnull, 0); + mPrimed = false; +} + +void +LookupCache::UpdateHeader() +{ + mHeader.magic = LOOKUPCACHE_MAGIC; + mHeader.version = CURRENT_VERSION; + mHeader.numCompletions = mCompletions.Length(); +} + +nsresult +LookupCache::EnsureSizeConsistent() +{ + nsCOMPtr storeFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); + NS_ENSURE_SUCCESS(rv, rv); + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + + PRInt64 fileSize; + rv = storeFile->GetFileSize(&fileSize); + NS_ENSURE_SUCCESS(rv, rv); + + if (fileSize < 0) { + return NS_ERROR_FAILURE; + } + + PRInt64 expectedSize = sizeof(mHeader) + + mHeader.numCompletions*sizeof(Completion); + if (expectedSize != fileSize) { + NS_WARNING("File length does not match. Probably corrupted."); + Reset(); + return NS_ERROR_FILE_CORRUPTED; + } + + return NS_OK; +} + +nsresult +LookupCache::ReadHeader() +{ + if (!mInputStream) { + Clear(); + UpdateHeader(); + return NS_OK; + } + + nsCOMPtr seekable = do_QueryInterface(mInputStream); + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(rv, rv); + + void *buffer = &mHeader; + rv = NS_ReadInputStreamToBuffer(mInputStream, + &buffer, + sizeof(Header)); + NS_ENSURE_SUCCESS(rv, rv); + + if (mHeader.magic != LOOKUPCACHE_MAGIC || mHeader.version != CURRENT_VERSION) { + NS_WARNING("Unexpected header data in the store."); + Reset(); + return NS_ERROR_FILE_CORRUPTED; + } + LOG(("%d completions present", mHeader.numCompletions)); + + rv = EnsureSizeConsistent(); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +LookupCache::ReadCompletions() +{ + if (!mHeader.numCompletions) { + mCompletions.Clear(); + return NS_OK; + } + + nsCOMPtr seekable = do_QueryInterface(mInputStream); + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, sizeof(Header)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ReadTArray(mInputStream, &mCompletions, mHeader.numCompletions); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Read %d completions", mCompletions.Length())); + + return NS_OK; +} + +/* static */ bool +LookupCache::IsCanonicalizedIP(const nsACString& aHost) +{ + // The canonicalization process will have left IP addresses in dotted + // decimal with no surprises. + PRUint32 i1, i2, i3, i4; + char c; + if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c", + &i1, &i2, &i3, &i4, &c) == 4) { + return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF); + } + + return false; +} + +/* static */ nsresult +LookupCache::GetKey(const nsACString& aSpec, + Completion* aHash, + nsCOMPtr& aCryptoHash) +{ + nsACString::const_iterator begin, end, iter; + aSpec.BeginReading(begin); + aSpec.EndReading(end); + + iter = begin; + if (!FindCharInReadable('/', iter, end)) { + return NS_OK; + } + + const nsCSubstring& host = Substring(begin, iter); + + if (IsCanonicalizedIP(host)) { + nsCAutoString key; + key.Assign(host); + key.Append("/"); + return aHash->FromPlaintext(key, aCryptoHash); + } + + nsTArray hostComponents; + ParseString(PromiseFlatCString(host), '.', hostComponents); + + if (hostComponents.Length() < 2) + return NS_ERROR_FAILURE; + + PRInt32 last = PRInt32(hostComponents.Length()) - 1; + nsCAutoString lookupHost; + + if (hostComponents.Length() > 2) { + lookupHost.Append(hostComponents[last - 2]); + lookupHost.Append("."); + } + + lookupHost.Append(hostComponents[last - 1]); + lookupHost.Append("."); + lookupHost.Append(hostComponents[last]); + lookupHost.Append("/"); + + return aHash->FromPlaintext(lookupHost, aCryptoHash); +} + +/* static */ nsresult +LookupCache::GetLookupFragments(const nsACString& aSpec, + nsTArray* aFragments) + +{ + aFragments->Clear(); + + nsACString::const_iterator begin, end, iter; + aSpec.BeginReading(begin); + aSpec.EndReading(end); + + iter = begin; + if (!FindCharInReadable('/', iter, end)) { + return NS_OK; + } + + const nsCSubstring& host = Substring(begin, iter++); + nsCAutoString path; + path.Assign(Substring(iter, end)); + + /** + * From the protocol doc: + * For the hostname, the client will try at most 5 different strings. They + * are: + * a) The exact hostname of the url + * b) The 4 hostnames formed by starting with the last 5 components and + * successivly removing the leading component. The top-level component + * can be skipped. This is not done if the hostname is a numerical IP. + */ + nsTArray hosts; + hosts.AppendElement(host); + + if (!IsCanonicalizedIP(host)) { + host.BeginReading(begin); + host.EndReading(end); + int numHostComponents = 0; + while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) && + numHostComponents < MAX_HOST_COMPONENTS) { + // don't bother checking toplevel domains + if (++numHostComponents >= 2) { + host.EndReading(iter); + hosts.AppendElement(Substring(end, iter)); + } + end = begin; + host.BeginReading(begin); + } + } + + /** + * From the protocol doc: + * For the path, the client will also try at most 6 different strings. + * They are: + * a) the exact path of the url, including query parameters + * b) the exact path of the url, without query parameters + * c) the 4 paths formed by starting at the root (/) and + * successively appending path components, including a trailing + * slash. This behavior should only extend up to the next-to-last + * path component, that is, a trailing slash should never be + * appended that was not present in the original url. + */ + nsTArray paths; + nsCAutoString pathToAdd; + + path.BeginReading(begin); + path.EndReading(end); + iter = begin; + if (FindCharInReadable('?', iter, end)) { + pathToAdd = Substring(begin, iter); + paths.AppendElement(pathToAdd); + end = iter; + } + + int numPathComponents = 1; + iter = begin; + while (FindCharInReadable('/', iter, end) && + numPathComponents < MAX_PATH_COMPONENTS) { + iter++; + pathToAdd.Assign(Substring(begin, iter)); + paths.AppendElement(pathToAdd); + numPathComponents++; + } + + // If we haven't already done so, add the full path + if (!pathToAdd.Equals(path)) { + paths.AppendElement(path); + } + // Check an empty path (for whole-domain blacklist entries) + paths.AppendElement(EmptyCString()); + + for (PRUint32 hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) { + for (PRUint32 pathIndex = 0; pathIndex < paths.Length(); pathIndex++) { + nsCString key; + key.Assign(hosts[hostIndex]); + key.Append('/'); + key.Append(paths[pathIndex]); + LOG(("Chking %s", key.get())); + + aFragments->AppendElement(key); + } + } + + return NS_OK; +} + +/* static */ nsresult +LookupCache::GetHostKeys(const nsACString& aSpec, + nsTArray* aHostKeys) +{ + nsACString::const_iterator begin, end, iter; + aSpec.BeginReading(begin); + aSpec.EndReading(end); + + iter = begin; + if (!FindCharInReadable('/', iter, end)) { + return NS_OK; + } + + const nsCSubstring& host = Substring(begin, iter); + + if (IsCanonicalizedIP(host)) { + nsCString *key = aHostKeys->AppendElement(); + if (!key) + return NS_ERROR_OUT_OF_MEMORY; + + key->Assign(host); + key->Append("/"); + return NS_OK; + } + + nsTArray hostComponents; + ParseString(PromiseFlatCString(host), '.', hostComponents); + + if (hostComponents.Length() < 2) { + // no host or toplevel host, this won't match anything in the db + return NS_OK; + } + + // First check with two domain components + PRInt32 last = PRInt32(hostComponents.Length()) - 1; + nsCString *lookupHost = aHostKeys->AppendElement(); + if (!lookupHost) + return NS_ERROR_OUT_OF_MEMORY; + + lookupHost->Assign(hostComponents[last - 1]); + lookupHost->Append("."); + lookupHost->Append(hostComponents[last]); + lookupHost->Append("/"); + + // Now check with three domain components + if (hostComponents.Length() > 2) { + nsCString *lookupHost2 = aHostKeys->AppendElement(); + if (!lookupHost2) + return NS_ERROR_OUT_OF_MEMORY; + lookupHost2->Assign(hostComponents[last - 2]); + lookupHost2->Append("."); + lookupHost2->Append(*lookupHost); + } + + return NS_OK; +} + +/* We have both a prefix and a domain. Drop the domain, but + hash the domain, the prefix and a random value together, + ensuring any collisions happens at a different points for + different users. +*/ +/* static */ nsresult LookupCache::KeyedHash(PRUint32 aPref, PRUint32 aDomain, + PRUint32 aKey, PRUint32* aOut) +{ + /* This is a reimplementation of MurmurHash3 32-bit + based on the public domain C++ sources. + http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + for nblocks = 2 + */ + PRUint32 c1 = 0xCC9E2D51; + PRUint32 c2 = 0x1B873593; + PRUint32 c3 = 0xE6546B64; + PRUint32 c4 = 0x85EBCA6B; + PRUint32 c5 = 0xC2B2AE35; + PRUint32 h1 = aPref; // seed + PRUint32 k1; + PRUint32 karr[2]; + + karr[0] = aDomain; + karr[1] = aKey; + + for (PRUint32 i = 0; i < 2; i++) { + k1 = karr[i]; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> (32-15)); + k1 *= c2; + + h1 ^= k1; + h1 = (h1 << 13) | (h1 >> (32-13)); + h1 *= 5; + h1 += c3; + } + + h1 ^= 2; // len + // fmix + h1 ^= h1 >> 16; + h1 *= c4; + h1 ^= h1 >> 13; + h1 *= c5; + h1 ^= h1 >> 16; + + *aOut = h1; + + return NS_OK; +} + +bool LookupCache::IsPrimed() +{ + return mPrimed; +} + +nsresult +LookupCache::ConstructPrefixSet(const AddPrefixArray& aAddPrefixes) +{ + Telemetry::AutoTimer timer; + + nsTArray array; + array.SetCapacity(aAddPrefixes.Length()); + + for (uint32 i = 0; i < aAddPrefixes.Length(); i++) { + array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32()); + } + + // clear old tree + if (array.IsEmpty()) { + // DB is empty, but put a sentinel to show that we looked + array.AppendElement(0); + } + // PrefixSet requires sorted order + array.Sort(); + + // construct new one, replace old entries + nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length()); + if (NS_FAILED(rv)) { + goto error_bailout; + } + +#ifdef DEBUG + PRUint32 size; + size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of); + LOG(("SB tree done, size = %d bytes\n", size)); +#endif + + mPrimed = true; + + return NS_OK; + + error_bailout: + // load an empty prefixset so the browser can work + nsAutoTArray sentinel; + sentinel.Clear(); + sentinel.AppendElement(0); + mPrefixSet->SetPrefixes(sentinel.Elements(), sentinel.Length()); + if (rv == NS_ERROR_OUT_OF_MEMORY) { + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PS_OOM, 1); + } + return rv; +} + +nsresult +LookupCache::LoadPrefixSet() +{ + nsCOMPtr psFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX)); + NS_ENSURE_SUCCESS(rv, rv); + + bool exists; + rv = psFile->Exists(&exists); + NS_ENSURE_SUCCESS(rv, rv); + + if (exists) { + LOG(("stored PrefixSet exists, loading from disk")); + rv = mPrefixSet->LoadFromFile(psFile); + } + if (!exists || NS_FAILED(rv)) { + LOG(("no (usable) stored PrefixSet found")); + } else { + mPrimed = true; + } + +#ifdef DEBUG + if (mPrimed) { + PRUint32 size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of); + LOG(("SB tree done, size = %d bytes\n", size)); + } +#endif + + return NS_OK; +} + +nsresult +LookupCache::GetPrefixes(nsTArray* aAddPrefixes) +{ + if (!mPrimed) { + // This can happen if its a new table, so no error. + LOG(("GetPrefixes from empty LookupCache")); + return NS_OK; + } + PRUint32 cnt; + PRUint32 *arr; + nsresult rv = mPrefixSet->GetPrefixes(&cnt, &arr); + NS_ENSURE_SUCCESS(rv, rv); + if (!aAddPrefixes->AppendElements(arr, cnt)) + return NS_ERROR_FAILURE; + nsMemory::Free(arr); + return NS_OK; +} + + +} +} diff --git a/toolkit/components/url-classifier/LookupCache.h b/toolkit/components/url-classifier/LookupCache.h new file mode 100644 index 000000000000..5ba07d5fe099 --- /dev/null +++ b/toolkit/components/url-classifier/LookupCache.h @@ -0,0 +1,186 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef LookupCache_h__ +#define LookupCache_h__ + +#include "Entries.h" +#include "nsString.h" +#include "nsTArray.h" +#include "nsAutoPtr.h" +#include "nsCOMPtr.h" +#include "nsIFile.h" +#include "nsUrlClassifierPrefixSet.h" +#include "prlog.h" + +namespace mozilla { +namespace safebrowsing { + +#define MAX_HOST_COMPONENTS 5 +#define MAX_PATH_COMPONENTS 4 + +class LookupResult { +public: + LookupResult() : mComplete(false), mNoise(false), mFresh(false), mProtocolConfirmed(false) {} + + // The fragment that matched in the LookupCache + union { + Prefix prefix; + Completion complete; + } hash; + + const Prefix &PrefixHash() { return hash.prefix; } + const Completion &CompleteHash() { return hash.complete; } + + bool Confirmed() const { return (mComplete && mFresh) || mProtocolConfirmed; } + bool Complete() const { return mComplete; } + + // True if we have a complete match for this hash in the table. + bool mComplete; + + // True if this is a noise entry, i.e. an extra entry + // that is inserted to mask the true URL we are requesting + bool mNoise; + + // Value of actual key looked up in the prefixset (coded with client key) + Prefix mCodedPrefix; + + // True if we've updated this table recently-enough. + bool mFresh; + + bool mProtocolConfirmed; + + nsCString mTableName; +}; + +typedef nsTArray LookupResultArray; + +struct CacheResult { + AddComplete entry; + nsCString table; +}; +typedef nsTArray CacheResultArray; + +class LookupCache { +public: + // Check for a canonicalized IP address. + static bool IsCanonicalizedIP(const nsACString& aHost); + + // take a lookup string (www.hostname.com/path/to/resource.html) and + // expand it into the set of fragments that should be searched for in an + // entry + static nsresult GetLookupFragments(const nsACString& aSpec, + nsTArray* aFragments); + // Similar to GetKey(), but if the domain contains three or more components, + // two keys will be returned: + // hostname.com/foo/bar -> [hostname.com] + // mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com] + // www.mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com] + static nsresult GetHostKeys(const nsACString& aSpec, + nsTArray* aHostKeys); + // Get the database key for a given URI. This is the top three + // domain components if they exist, otherwise the top two. + // hostname.com/foo/bar -> hostname.com + // mail.hostname.com/foo/bar -> mail.hostname.com + // www.mail.hostname.com/foo/bar -> mail.hostname.com + static nsresult GetKey(const nsACString& aSpec, Completion* aHash, + nsCOMPtr& aCryptoHash); + + /* We have both a prefix and a domain. Drop the domain, but + hash the domain, the prefix and a random value together, + ensuring any collisions happens at a different points for + different users. + */ + static nsresult KeyedHash(PRUint32 aPref, PRUint32 aDomain, + PRUint32 aKey, PRUint32* aOut); + + LookupCache(const nsACString& aTableName, nsIFile* aStoreFile); + ~LookupCache(); + + const nsCString &TableName() const { return mTableName; } + + nsresult Init(); + nsresult Open(); + nsresult Build(const AddPrefixArray& aAddPrefixes, + const AddCompleteArray& aAddCompletes); + nsresult GetPrefixes(nsTArray* aAddPrefixes); + +#if DEBUG && defined(PR_LOGGING) + void Dump(); +#endif + nsresult WriteFile(); + nsresult Has(const Completion& aCompletion, + const Completion& aHostkey, + PRUint32 aHashKey, + bool* aHas, bool* aComplete, + Prefix* aOrigPrefix); + bool IsPrimed(); + +private: + + void Clear(); + nsresult Reset(); + void UpdateHeader(); + nsresult ReadHeader(); + nsresult EnsureSizeConsistent(); + nsresult ReadCompletions(); + // Construct a Prefix Set with known prefixes + nsresult LoadPrefixSet(); + nsresult ConstructPrefixSet(const AddPrefixArray& aAddPrefixes); + + struct Header { + uint32 magic; + uint32 version; + uint32 numCompletions; + }; + Header mHeader; + + bool mPrimed; + nsCString mTableName; + nsCOMPtr mStoreDirectory; + nsCOMPtr mInputStream; + CompletionArray mCompletions; + // Set of prefixes known to be in the database + nsRefPtr mPrefixSet; +}; + +} +} + +#endif diff --git a/toolkit/components/url-classifier/Makefile.in b/toolkit/components/url-classifier/Makefile.in index 7d6902331023..2381ed8afa2a 100644 --- a/toolkit/components/url-classifier/Makefile.in +++ b/toolkit/components/url-classifier/Makefile.in @@ -59,11 +59,17 @@ XPIDLSRCS = \ $(NULL) CPPSRCS = \ + ChunkSet.cpp \ + Classifier.cpp \ + HashStore.cpp \ + ProtocolParser.cpp \ + LookupCache.cpp \ nsUrlClassifierDBService.cpp \ nsUrlClassifierStreamUpdater.cpp \ nsUrlClassifierUtils.cpp \ nsUrlClassifierPrefixSet.cpp \ nsUrlClassifierProxies.cpp \ + nsCheckSummedOutputStream.cpp \ $(NULL) LOCAL_INCLUDES = \ diff --git a/toolkit/components/url-classifier/ProtocolParser.cpp b/toolkit/components/url-classifier/ProtocolParser.cpp new file mode 100644 index 000000000000..52c47e5b35ba --- /dev/null +++ b/toolkit/components/url-classifier/ProtocolParser.cpp @@ -0,0 +1,777 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +#include "ProtocolParser.h" +#include "LookupCache.h" +#include "nsIKeyModule.h" +#include "nsNetCID.h" +#include "prlog.h" +#include "prnetdb.h" +#include "prprf.h" + +#include "nsUrlClassifierUtils.h" + +// NSPR_LOG_MODULES=UrlClassifierDbService:5 +extern PRLogModuleInfo *gUrlClassifierDbServiceLog; +#if defined(PR_LOGGING) +#define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) +#else +#define LOG(args) +#define LOG_ENABLED() (PR_FALSE) +#endif + +namespace mozilla { +namespace safebrowsing { + +// Updates will fail if fed chunks larger than this +const uint32 MAX_CHUNK_SIZE = (1024 * 1024); + +const uint32 DOMAIN_SIZE = 4; + +// Parse one stringified range of chunks of the form "n" or "n-m" from a +// comma-separated list of chunks. Upon return, 'begin' will point to the +// next range of chunks in the list of chunks. +static bool +ParseChunkRange(nsACString::const_iterator& aBegin, + const nsACString::const_iterator& aEnd, + PRUint32* aFirst, PRUint32* aLast) +{ + nsACString::const_iterator iter = aBegin; + FindCharInReadable(',', iter, aEnd); + + nsCAutoString element(Substring(aBegin, iter)); + aBegin = iter; + if (aBegin != aEnd) + aBegin++; + + PRUint32 numRead = PR_sscanf(element.get(), "%u-%u", aFirst, aLast); + if (numRead == 2) { + if (*aFirst > *aLast) { + PRUint32 tmp = *aFirst; + *aFirst = *aLast; + *aLast = tmp; + } + return true; + } + + if (numRead == 1) { + *aLast = *aFirst; + return true; + } + + return false; +} + +ProtocolParser::ProtocolParser(PRUint32 aHashKey) + : mState(PROTOCOL_STATE_CONTROL) + , mHashKey(aHashKey) + , mUpdateStatus(NS_OK) + , mUpdateWait(0) + , mResetRequested(false) + , mRekeyRequested(false) +{ +} + +ProtocolParser::~ProtocolParser() +{ + CleanupUpdates(); +} + +nsresult +ProtocolParser::Init(nsICryptoHash* aHasher) +{ + mCryptoHash = aHasher; + return NS_OK; +} + +/** + * Initialize HMAC for the stream. + * + * If serverMAC is empty, the update stream will need to provide a + * server MAC. + */ +nsresult +ProtocolParser::InitHMAC(const nsACString& aClientKey, + const nsACString& aServerMAC) +{ + mServerMAC = aServerMAC; + + nsresult rv; + nsCOMPtr keyObjectFactory( + do_GetService("@mozilla.org/security/keyobjectfactory;1", &rv)); + + if (NS_FAILED(rv)) { + NS_WARNING("Failed to get nsIKeyObjectFactory service"); + mUpdateStatus = rv; + return mUpdateStatus; + } + + nsCOMPtr keyObject; + rv = keyObjectFactory->KeyFromString(nsIKeyObject::HMAC, aClientKey, + getter_AddRefs(keyObject)); + if (NS_FAILED(rv)) { + NS_WARNING("Failed to create key object, maybe not FIPS compliant?"); + mUpdateStatus = rv; + return mUpdateStatus; + } + + mHMAC = do_CreateInstance(NS_CRYPTO_HMAC_CONTRACTID, &rv); + if (NS_FAILED(rv)) { + NS_WARNING("Failed to create nsICryptoHMAC instance"); + mUpdateStatus = rv; + return mUpdateStatus; + } + + rv = mHMAC->Init(nsICryptoHMAC::SHA1, keyObject); + if (NS_FAILED(rv)) { + NS_WARNING("Failed to initialize nsICryptoHMAC instance"); + mUpdateStatus = rv; + return mUpdateStatus; + } + return NS_OK; +} + +nsresult +ProtocolParser::FinishHMAC() +{ + if (NS_FAILED(mUpdateStatus)) { + return mUpdateStatus; + } + + if (mRekeyRequested) { + mUpdateStatus = NS_ERROR_FAILURE; + return mUpdateStatus; + } + + if (!mHMAC) { + return NS_OK; + } + + nsCAutoString clientMAC; + mHMAC->Finish(PR_TRUE, clientMAC); + + if (clientMAC != mServerMAC) { + NS_WARNING("Invalid update MAC!"); + LOG(("Invalid update MAC: expected %s, got %s", + clientMAC.get(), mServerMAC.get())); + mUpdateStatus = NS_ERROR_FAILURE; + } + return mUpdateStatus; +} + +void +ProtocolParser::SetCurrentTable(const nsACString& aTable) +{ + mTableUpdate = GetTableUpdate(aTable); +} + +nsresult +ProtocolParser::AppendStream(const nsACString& aData) +{ + if (NS_FAILED(mUpdateStatus)) + return mUpdateStatus; + + nsresult rv; + + // Digest the data if we have a server MAC. + if (mHMAC && !mServerMAC.IsEmpty()) { + rv = mHMAC->Update(reinterpret_cast(aData.BeginReading()), + aData.Length()); + if (NS_FAILED(rv)) { + mUpdateStatus = rv; + return rv; + } + } + + mPending.Append(aData); + + bool done = false; + while (!done) { + if (mState == PROTOCOL_STATE_CONTROL) { + rv = ProcessControl(&done); + } else if (mState == PROTOCOL_STATE_CHUNK) { + rv = ProcessChunk(&done); + } else { + NS_ERROR("Unexpected protocol state"); + rv = NS_ERROR_FAILURE; + } + if (NS_FAILED(rv)) { + mUpdateStatus = rv; + return rv; + } + } + return NS_OK; +} + +nsresult +ProtocolParser::ProcessControl(bool* aDone) +{ + nsresult rv; + + nsCAutoString line; + *aDone = true; + while (NextLine(line)) { + //LOG(("Processing %s\n", line.get())); + + if (line.EqualsLiteral("e:pleaserekey")) { + mRekeyRequested = true; + return NS_OK; + } else if (mHMAC && mServerMAC.IsEmpty()) { + rv = ProcessMAC(line); + NS_ENSURE_SUCCESS(rv, rv); + } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("i:"))) { + SetCurrentTable(Substring(line, 2)); + } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("n:"))) { + if (PR_sscanf(line.get(), "n:%d", &mUpdateWait) != 1) { + LOG(("Error parsing n: '%s' (%d)", line.get(), mUpdateWait)); + mUpdateWait = 0; + } + } else if (line.EqualsLiteral("r:pleasereset")) { + mResetRequested = true; + } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("u:"))) { + rv = ProcessForward(line); + NS_ENSURE_SUCCESS(rv, rv); + } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("a:")) || + StringBeginsWith(line, NS_LITERAL_CSTRING("s:"))) { + rv = ProcessChunkControl(line); + NS_ENSURE_SUCCESS(rv, rv); + *aDone = false; + return NS_OK; + } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("ad:")) || + StringBeginsWith(line, NS_LITERAL_CSTRING("sd:"))) { + rv = ProcessExpirations(line); + NS_ENSURE_SUCCESS(rv, rv); + } + } + + *aDone = true; + return NS_OK; +} + + +nsresult +ProtocolParser::ProcessMAC(const nsCString& aLine) +{ + nsresult rv; + + LOG(("line: %s", aLine.get())); + + if (StringBeginsWith(aLine, NS_LITERAL_CSTRING("m:"))) { + mServerMAC = Substring(aLine, 2); + nsUrlClassifierUtils::UnUrlsafeBase64(mServerMAC); + + // The remainder of the pending update wasn't digested, digest it now. + rv = mHMAC->Update(reinterpret_cast(mPending.BeginReading()), + mPending.Length()); + return rv; + } + + LOG(("No MAC specified!")); + return NS_ERROR_FAILURE; +} + +nsresult +ProtocolParser::ProcessExpirations(const nsCString& aLine) +{ + if (!mTableUpdate) { + NS_WARNING("Got an expiration without a table."); + return NS_ERROR_FAILURE; + } + const nsCSubstring &list = Substring(aLine, 3); + nsACString::const_iterator begin, end; + list.BeginReading(begin); + list.EndReading(end); + while (begin != end) { + PRUint32 first, last; + if (ParseChunkRange(begin, end, &first, &last)) { + for (PRUint32 num = first; num <= last; num++) { + if (aLine[0] == 'a') + mTableUpdate->NewAddExpiration(num); + else + mTableUpdate->NewSubExpiration(num); + } + } else { + return NS_ERROR_FAILURE; + } + } + return NS_OK; +} + +nsresult +ProtocolParser::ProcessChunkControl(const nsCString& aLine) +{ + if (!mTableUpdate) { + NS_WARNING("Got a chunk before getting a table."); + return NS_ERROR_FAILURE; + } + + mState = PROTOCOL_STATE_CHUNK; + char command; + + mChunkState.Clear(); + + if (PR_sscanf(aLine.get(), + "%c:%d:%d:%d", + &command, + &mChunkState.num, &mChunkState.hashSize, &mChunkState.length) + != 4) + { + return NS_ERROR_FAILURE; + } + + if (mChunkState.length > MAX_CHUNK_SIZE) { + return NS_ERROR_FAILURE; + } + + if (!(mChunkState.hashSize == PREFIX_SIZE || mChunkState.hashSize == COMPLETE_SIZE)) { + NS_WARNING("Invalid hash size specified in update."); + return NS_ERROR_FAILURE; + } + + mChunkState.type = (command == 'a') ? CHUNK_ADD : CHUNK_SUB; + + if (mChunkState.type == CHUNK_ADD) { + mTableUpdate->NewAddChunk(mChunkState.num); + } else { + mTableUpdate->NewSubChunk(mChunkState.num); + } + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessForward(const nsCString& aLine) +{ + const nsCSubstring &forward = Substring(aLine, 2); + if (mHMAC) { + // We're expecting MACs alongside any url forwards. + nsCSubstring::const_iterator begin, end, sepBegin, sepEnd; + forward.BeginReading(begin); + sepBegin = begin; + + forward.EndReading(end); + sepEnd = end; + + if (!RFindInReadable(NS_LITERAL_CSTRING(","), sepBegin, sepEnd)) { + NS_WARNING("No MAC specified for a redirect in a request that expects a MAC"); + return NS_ERROR_FAILURE; + } + + nsCString serverMAC(Substring(sepEnd, end)); + nsUrlClassifierUtils::UnUrlsafeBase64(serverMAC); + return AddForward(Substring(begin, sepBegin), serverMAC); + } + return AddForward(forward, mServerMAC); +} + +nsresult +ProtocolParser::AddForward(const nsACString& aUrl, const nsACString& aMac) +{ + if (!mTableUpdate) { + NS_WARNING("Forward without a table name."); + return NS_ERROR_FAILURE; + } + + ForwardedUpdate *forward = mForwards.AppendElement(); + forward->table = mTableUpdate->TableName(); + forward->url.Assign(aUrl); + forward->mac.Assign(aMac); + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessChunk(bool* aDone) +{ + if (!mTableUpdate) { + NS_WARNING("Processing chunk without an active table."); + return NS_ERROR_FAILURE; + } + + NS_ASSERTION(mChunkState.num != 0, "Must have a chunk number."); + + if (mPending.Length() < mChunkState.length) { + *aDone = true; + return NS_OK; + } + + // Pull the chunk out of the pending stream data. + nsCAutoString chunk; + chunk.Assign(Substring(mPending, 0, mChunkState.length)); + mPending = Substring(mPending, mChunkState.length); + + *aDone = false; + mState = PROTOCOL_STATE_CONTROL; + + //LOG(("Handling a %d-byte chunk", chunk.Length())); + if (StringEndsWith(mTableUpdate->TableName(), NS_LITERAL_CSTRING("-shavar"))) { + return ProcessShaChunk(chunk); + } else { + return ProcessPlaintextChunk(chunk); + } +} + +/** + * Process a plaintext chunk (currently only used in unit tests). + */ +nsresult +ProtocolParser::ProcessPlaintextChunk(const nsACString& aChunk) +{ + if (!mTableUpdate) { + NS_WARNING("Chunk received with no table."); + return NS_ERROR_FAILURE; + } + + nsresult rv; + nsTArray lines; + ParseString(PromiseFlatCString(aChunk), '\n', lines); + + // non-hashed tables need to be hashed + for (uint32 i = 0; i < lines.Length(); i++) { + nsCString& line = lines[i]; + + if (mChunkState.type == CHUNK_ADD) { + if (mChunkState.hashSize == COMPLETE_SIZE) { + Completion hash; + hash.FromPlaintext(line, mCryptoHash); + mTableUpdate->NewAddComplete(mChunkState.num, hash); + } else { + NS_ASSERTION(mChunkState.hashSize == 4, "Only 32- or 4-byte hashes can be used for add chunks."); + Completion hash; + Completion domHash; + Prefix newHash; + rv = LookupCache::GetKey(line, &domHash, mCryptoHash); + NS_ENSURE_SUCCESS(rv, rv); + hash.FromPlaintext(line, mCryptoHash); + PRUint32 codedHash; + rv = LookupCache::KeyedHash(hash.ToUint32(), domHash.ToUint32(), mHashKey, &codedHash); + NS_ENSURE_SUCCESS(rv, rv); + newHash.FromUint32(codedHash); + mTableUpdate->NewAddPrefix(mChunkState.num, newHash); + } + } else { + nsCString::const_iterator begin, iter, end; + line.BeginReading(begin); + line.EndReading(end); + iter = begin; + uint32 addChunk; + if (!FindCharInReadable(':', iter, end) || + PR_sscanf(lines[i].get(), "%d:", &addChunk) != 1) { + NS_WARNING("Received sub chunk without associated add chunk."); + return NS_ERROR_FAILURE; + } + iter++; + + if (mChunkState.hashSize == COMPLETE_SIZE) { + Completion hash; + hash.FromPlaintext(Substring(iter, end), mCryptoHash); + mTableUpdate->NewSubComplete(addChunk, hash, mChunkState.num); + } else { + NS_ASSERTION(mChunkState.hashSize == 4, "Only 32- or 4-byte hashes can be used for add chunks."); + Prefix hash; + Completion domHash; + Prefix newHash; + rv = LookupCache::GetKey(Substring(iter, end), &domHash, mCryptoHash); + NS_ENSURE_SUCCESS(rv, rv); + hash.FromPlaintext(Substring(iter, end), mCryptoHash); + PRUint32 codedHash; + rv = LookupCache::KeyedHash(hash.ToUint32(), domHash.ToUint32(), mHashKey, &codedHash); + NS_ENSURE_SUCCESS(rv, rv); + newHash.FromUint32(codedHash); + mTableUpdate->NewSubPrefix(addChunk, newHash, mChunkState.num); + // Needed to knock out completes + // Fake chunk nr, will cause it to be removed next update + mTableUpdate->NewSubPrefix(addChunk, hash, 0); + mTableUpdate->NewSubChunk(0); + } + } + } + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessShaChunk(const nsACString& aChunk) +{ + PRUint32 start = 0; + while (start < aChunk.Length()) { + // First four bytes are the domain key. + Prefix domain; + domain.Assign(Substring(aChunk, start, DOMAIN_SIZE)); + start += DOMAIN_SIZE; + + // Then a count of entries. + uint8 numEntries = static_cast(aChunk[start]); + start++; + + nsresult rv; + if (mChunkState.type == CHUNK_ADD && mChunkState.hashSize == PREFIX_SIZE) { + rv = ProcessHostAdd(domain, numEntries, aChunk, &start); + } else if (mChunkState.type == CHUNK_ADD && mChunkState.hashSize == COMPLETE_SIZE) { + rv = ProcessHostAddComplete(numEntries, aChunk, &start); + } else if (mChunkState.type == CHUNK_SUB && mChunkState.hashSize == PREFIX_SIZE) { + rv = ProcessHostSub(domain, numEntries, aChunk, &start); + } else if (mChunkState.type == CHUNK_SUB && mChunkState.hashSize == COMPLETE_SIZE) { + rv = ProcessHostSubComplete(numEntries, aChunk, &start); + } else { + NS_WARNING("Unexpected chunk type/hash size!"); + LOG(("Got an unexpected chunk type/hash size: %s:%d", + mChunkState.type == CHUNK_ADD ? "add" : "sub", + mChunkState.hashSize)); + return NS_ERROR_FAILURE; + } + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessHostAdd(const Prefix& aDomain, PRUint8 aNumEntries, + const nsACString& aChunk, PRUint32* aStart) +{ + NS_ASSERTION(mChunkState.hashSize == PREFIX_SIZE, + "ProcessHostAdd should only be called for prefix hashes."); + + PRUint32 codedHash; + PRUint32 domHash = aDomain.ToUint32(); + + if (aNumEntries == 0) { + nsresult rv = LookupCache::KeyedHash(domHash, domHash, mHashKey, &codedHash); + NS_ENSURE_SUCCESS(rv, rv); + Prefix newHash; + newHash.FromUint32(codedHash); + mTableUpdate->NewAddPrefix(mChunkState.num, newHash); + return NS_OK; + } + + if (*aStart + (PREFIX_SIZE * aNumEntries) > aChunk.Length()) { + NS_WARNING("Chunk is not long enough to contain the expected entries."); + return NS_ERROR_FAILURE; + } + + for (uint8 i = 0; i < aNumEntries; i++) { + Prefix hash; + hash.Assign(Substring(aChunk, *aStart, PREFIX_SIZE)); + nsresult rv = LookupCache::KeyedHash(domHash, hash.ToUint32(), mHashKey, &codedHash); + NS_ENSURE_SUCCESS(rv, rv); + Prefix newHash; + newHash.FromUint32(codedHash); + mTableUpdate->NewAddPrefix(mChunkState.num, newHash); + *aStart += PREFIX_SIZE; + } + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessHostSub(const Prefix& aDomain, PRUint8 aNumEntries, + const nsACString& aChunk, PRUint32 *aStart) +{ + NS_ASSERTION(mChunkState.hashSize == PREFIX_SIZE, + "ProcessHostSub should only be called for prefix hashes."); + + PRUint32 codedHash; + PRUint32 domHash = aDomain.ToUint32(); + + if (aNumEntries == 0) { + if ((*aStart) + 4 > aChunk.Length()) { + NS_WARNING("Received a zero-entry sub chunk without an associated add."); + return NS_ERROR_FAILURE; + } + + const nsCSubstring& addChunkStr = Substring(aChunk, *aStart, 4); + *aStart += 4; + + uint32 addChunk; + memcpy(&addChunk, addChunkStr.BeginReading(), 4); + addChunk = PR_ntohl(addChunk); + + nsresult rv = LookupCache::KeyedHash(domHash, domHash, mHashKey, &codedHash); + NS_ENSURE_SUCCESS(rv, rv); + Prefix newHash; + newHash.FromUint32(codedHash); + + mTableUpdate->NewSubPrefix(addChunk, newHash, mChunkState.num); + // Needed to knock out completes + // Fake chunk nr, will cause it to be removed next update + mTableUpdate->NewSubPrefix(addChunk, aDomain, 0); + mTableUpdate->NewSubChunk(0); + return NS_OK; + } + + if (*aStart + ((PREFIX_SIZE + 4) * aNumEntries) > aChunk.Length()) { + NS_WARNING("Chunk is not long enough to contain the expected entries."); + return NS_ERROR_FAILURE; + } + + for (uint8 i = 0; i < aNumEntries; i++) { + const nsCSubstring& addChunkStr = Substring(aChunk, *aStart, 4); + *aStart += 4; + + uint32 addChunk; + memcpy(&addChunk, addChunkStr.BeginReading(), 4); + addChunk = PR_ntohl(addChunk); + + Prefix prefix; + prefix.Assign(Substring(aChunk, *aStart, PREFIX_SIZE)); + *aStart += PREFIX_SIZE; + + nsresult rv = LookupCache::KeyedHash(prefix.ToUint32(), domHash, mHashKey, &codedHash); + NS_ENSURE_SUCCESS(rv, rv); + Prefix newHash; + newHash.FromUint32(codedHash); + + mTableUpdate->NewSubPrefix(addChunk, newHash, mChunkState.num); + // Needed to knock out completes + // Fake chunk nr, will cause it to be removed next update + mTableUpdate->NewSubPrefix(addChunk, prefix, 0); + mTableUpdate->NewSubChunk(0); + } + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessHostAddComplete(PRUint8 aNumEntries, + const nsACString& aChunk, PRUint32* aStart) +{ + NS_ASSERTION(mChunkState.hashSize == COMPLETE_SIZE, + "ProcessHostAddComplete should only be called for complete hashes."); + + if (aNumEntries == 0) { + // this is totally comprehensible. + NS_WARNING("Expected > 0 entries for a 32-byte hash add."); + return NS_OK; + } + + if (*aStart + (COMPLETE_SIZE * aNumEntries) > aChunk.Length()) { + NS_WARNING("Chunk is not long enough to contain the expected entries."); + return NS_ERROR_FAILURE; + } + + for (uint8 i = 0; i < aNumEntries; i++) { + Completion hash; + hash.Assign(Substring(aChunk, *aStart, COMPLETE_SIZE)); + mTableUpdate->NewAddComplete(mChunkState.num, hash); + *aStart += COMPLETE_SIZE; + } + + return NS_OK; +} + +nsresult +ProtocolParser::ProcessHostSubComplete(PRUint8 aNumEntries, + const nsACString& aChunk, PRUint32* aStart) +{ + NS_ASSERTION(mChunkState.hashSize == PREFIX_SIZE, + "ProcessHostSub should only be called for prefix hashes."); + + if (aNumEntries == 0) { + // this is totally comprehensible. + NS_WARNING("Expected > 0 entries for a 32-byte hash add."); + return NS_OK; + } + + if (*aStart + ((COMPLETE_SIZE + 4) * aNumEntries) > aChunk.Length()) { + NS_WARNING("Chunk is not long enough to contain the expected entries."); + return NS_ERROR_FAILURE; + } + + for (PRUint8 i = 0; i < aNumEntries; i++) { + Completion hash; + hash.Assign(Substring(aChunk, *aStart, COMPLETE_SIZE)); + *aStart += COMPLETE_SIZE; + + const nsCSubstring& addChunkStr = Substring(aChunk, *aStart, 4); + *aStart += 4; + + uint32 addChunk; + memcpy(&addChunk, addChunkStr.BeginReading(), 4); + addChunk = PR_ntohl(addChunk); + + mTableUpdate->NewSubComplete(addChunk, hash, mChunkState.num); + } + + return NS_OK; +} + +bool +ProtocolParser::NextLine(nsACString& line) +{ + int32 newline = mPending.FindChar('\n'); + if (newline == kNotFound) { + return false; + } + line.Assign(Substring(mPending, 0, newline)); + mPending = Substring(mPending, newline + 1); + return true; +} + +void +ProtocolParser::CleanupUpdates() +{ + for (uint32 i = 0; i < mTableUpdates.Length(); i++) { + delete mTableUpdates[i]; + } + mTableUpdates.Clear(); +} + +TableUpdate * +ProtocolParser::GetTableUpdate(const nsACString& aTable) +{ + for (uint32 i = 0; i < mTableUpdates.Length(); i++) { + if (aTable.Equals(mTableUpdates[i]->TableName())) { + return mTableUpdates[i]; + } + } + + // We free automatically on destruction, ownership of these + // updates can be transferred to DBServiceWorker, which passes + // them back to Classifier when doing the updates, and that + // will free them. + TableUpdate *update = new TableUpdate(aTable); + mTableUpdates.AppendElement(update); + return update; +} + +} +} diff --git a/toolkit/components/url-classifier/ProtocolParser.h b/toolkit/components/url-classifier/ProtocolParser.h new file mode 100644 index 000000000000..e263d4f43617 --- /dev/null +++ b/toolkit/components/url-classifier/ProtocolParser.h @@ -0,0 +1,151 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Dave Camp + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef ProtocolParser_h__ +#define ProtocolParser_h__ + +#include "HashStore.h" +#include "nsICryptoHMAC.h" + +namespace mozilla { +namespace safebrowsing { + +/** + * Some helpers for parsing the safe + */ +class ProtocolParser { +public: + struct ForwardedUpdate { + nsCString table; + nsCString url; + nsCString mac; + }; + + ProtocolParser(PRUint32 aHashKey); + ~ProtocolParser(); + + nsresult Status() const { return mUpdateStatus; } + + nsresult Init(nsICryptoHash* aHasher); + + nsresult InitHMAC(const nsACString& aClientKey, + const nsACString& aServerMAC); + nsresult FinishHMAC(); + + void SetCurrentTable(const nsACString& aTable); + + nsresult Begin(); + nsresult AppendStream(const nsACString& aData); + + // Forget the table updates that were created by this pass. It + // becomes the caller's responsibility to free them. This is shitty. + TableUpdate *GetTableUpdate(const nsACString& aTable); + void ForgetTableUpdates() { mTableUpdates.Clear(); } + nsTArray &GetTableUpdates() { return mTableUpdates; } + + // Update information. + const nsTArray &Forwards() const { return mForwards; } + int32 UpdateWait() { return mUpdateWait; } + bool ResetRequested() { return mResetRequested; } + bool RekeyRequested() { return mRekeyRequested; } + +private: + nsresult ProcessControl(bool* aDone); + nsresult ProcessMAC(const nsCString& aLine); + nsresult ProcessExpirations(const nsCString& aLine); + nsresult ProcessChunkControl(const nsCString& aLine); + nsresult ProcessForward(const nsCString& aLine); + nsresult AddForward(const nsACString& aUrl, const nsACString& aMac); + nsresult ProcessChunk(bool* done); + nsresult ProcessPlaintextChunk(const nsACString& aChunk); + nsresult ProcessShaChunk(const nsACString& aChunk); + nsresult ProcessHostAdd(const Prefix& aDomain, PRUint8 aNumEntries, + const nsACString& aChunk, PRUint32* aStart); + nsresult ProcessHostSub(const Prefix& aDomain, PRUint8 aNumEntries, + const nsACString& aChunk, PRUint32* aStart); + nsresult ProcessHostAddComplete(PRUint8 aNumEntries, const nsACString& aChunk, + PRUint32 *aStart); + nsresult ProcessHostSubComplete(PRUint8 numEntries, const nsACString& aChunk, + PRUint32* start); + bool NextLine(nsACString& aLine); + + void CleanupUpdates(); + + enum ParserState { + PROTOCOL_STATE_CONTROL, + PROTOCOL_STATE_CHUNK + }; + ParserState mState; + + enum ChunkType { + CHUNK_ADD, + CHUNK_SUB + }; + + struct ChunkState { + ChunkType type; + uint32 num; + uint32 hashSize; + uint32 length; + void Clear() { num = 0; hashSize = 0; length = 0; } + }; + ChunkState mChunkState; + + PRUint32 mHashKey; + nsCOMPtr mCryptoHash; + + nsresult mUpdateStatus; + nsCString mPending; + + nsCOMPtr mHMAC; + nsCString mServerMAC; + + uint32 mUpdateWait; + bool mResetRequested; + bool mRekeyRequested; + + nsTArray mForwards; + nsTArray mTableUpdates; + TableUpdate *mTableUpdate; +}; + +} +} + +#endif diff --git a/toolkit/components/url-classifier/content/listmanager.js b/toolkit/components/url-classifier/content/listmanager.js index bed67db4a840..144a917bd5d9 100644 --- a/toolkit/components/url-classifier/content/listmanager.js +++ b/toolkit/components/url-classifier/content/listmanager.js @@ -304,7 +304,7 @@ PROT_ListManager.prototype.maybeToggleUpdateChecking = function() { */ PROT_ListManager.prototype.startUpdateChecker = function() { this.stopUpdateChecker(); - + // Schedule the first check for between 15 and 45 minutes. var repeatingUpdateDelay = this.updateInterval / 2; repeatingUpdateDelay += Math.floor(Math.random() * this.updateInterval); diff --git a/toolkit/components/url-classifier/nsCheckSummedOutputStream.cpp b/toolkit/components/url-classifier/nsCheckSummedOutputStream.cpp new file mode 100644 index 000000000000..90661f4d54d0 --- /dev/null +++ b/toolkit/components/url-classifier/nsCheckSummedOutputStream.cpp @@ -0,0 +1,92 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code. + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsILocalFile.h" +#include "nsCRT.h" +#include "nsIFile.h" +#include "nsISupportsImpl.h" +#include "nsCheckSummedOutputStream.h" + +//////////////////////////////////////////////////////////////////////////////// +// nsCheckSummedOutputStream + +NS_IMPL_ISUPPORTS_INHERITED3(nsCheckSummedOutputStream, + nsSafeFileOutputStream, + nsISafeOutputStream, + nsIOutputStream, + nsIFileOutputStream) + +NS_IMETHODIMP +nsCheckSummedOutputStream::Init(nsIFile* file, PRInt32 ioFlags, PRInt32 perm, + PRInt32 behaviorFlags) +{ + nsresult rv; + mHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mHash->Init(nsICryptoHash::MD5); + NS_ENSURE_SUCCESS(rv, rv); + + return nsSafeFileOutputStream::Init(file, ioFlags, perm, behaviorFlags); +} + +NS_IMETHODIMP +nsCheckSummedOutputStream::Finish() +{ + nsresult rv = mHash->Finish(false, mCheckSum); + NS_ENSURE_SUCCESS(rv, rv); + + PRUint32 written; + rv = nsSafeFileOutputStream::Write(reinterpret_cast(mCheckSum.BeginReading()), + mCheckSum.Length(), &written); + NS_ASSERTION(written == mCheckSum.Length(), "Error writing stream checksum"); + NS_ENSURE_SUCCESS(rv, rv); + + return nsSafeFileOutputStream::Finish(); +} + +NS_IMETHODIMP +nsCheckSummedOutputStream::Write(const char *buf, PRUint32 count, PRUint32 *result) +{ + nsresult rv = mHash->Update(reinterpret_cast(buf), count); + NS_ENSURE_SUCCESS(rv, rv); + + return nsSafeFileOutputStream::Write(buf, count, result); +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/toolkit/components/url-classifier/nsCheckSummedOutputStream.h b/toolkit/components/url-classifier/nsCheckSummedOutputStream.h new file mode 100644 index 000000000000..d0f50f9b1ae7 --- /dev/null +++ b/toolkit/components/url-classifier/nsCheckSummedOutputStream.h @@ -0,0 +1,86 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Url Classifier code. + * + * The Initial Developer of the Original Code is + * the Mozilla Foundation. + * Portions created by the Initial Developer are Copyright (C) 2011 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Gian-Carlo Pascutto + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef nsCheckSummedOutputStream_h__ +#define nsCheckSummedOutputStream_h__ + +#include "nsILocalFile.h" +#include "nsIFile.h" +#include "nsIOutputStream.h" +#include "nsICryptoHash.h" +#include "nsNetCID.h" +#include "nsString.h" +#include "../../../netwerk/base/src/nsFileStreams.h" +#include "nsToolkitCompsCID.h" + +class nsCheckSummedOutputStream : public nsSafeFileOutputStream +{ +public: + NS_DECL_ISUPPORTS_INHERITED + + // Size of MD5 hash in bytes + static const PRUint32 CHECKSUM_SIZE = 16; + + nsCheckSummedOutputStream() {} + virtual ~nsCheckSummedOutputStream() { nsSafeFileOutputStream::Close(); } + + NS_IMETHOD Finish(); + NS_IMETHOD Write(const char *buf, PRUint32 count, PRUint32 *result); + NS_IMETHOD Init(nsIFile* file, PRInt32 ioFlags, PRInt32 perm, PRInt32 behaviorFlags); + +protected: + nsCOMPtr mHash; + nsCAutoString mCheckSum; +}; + +// returns a file output stream which can be QI'ed to nsIFileOutputStream. +inline nsresult +NS_NewCheckSummedOutputStream(nsIOutputStream **result, + nsIFile *file, + PRInt32 ioFlags = -1, + PRInt32 perm = -1, + PRInt32 behaviorFlags = 0) +{ + nsCOMPtr out = new nsCheckSummedOutputStream(); + nsresult rv = out->Init(file, ioFlags, perm, behaviorFlags); + if (NS_SUCCEEDED(rv)) + NS_ADDREF(*result = out); // cannot use nsCOMPtr::swap + return rv; +} + +#endif diff --git a/toolkit/components/url-classifier/nsIUrlClassifierDBService.idl b/toolkit/components/url-classifier/nsIUrlClassifierDBService.idl index e15323f11c56..be3a673619dc 100644 --- a/toolkit/components/url-classifier/nsIUrlClassifierDBService.idl +++ b/toolkit/components/url-classifier/nsIUrlClassifierDBService.idl @@ -40,10 +40,12 @@ %{C++ #include "nsTArray.h" -class nsUrlClassifierLookupResult; +#include "Entries.h" +#include "LookupCache.h" %} -[ptr] native ResultArray(nsTArray); - +[ptr] native ResultArray(nsTArray); +[ptr] native CacheCompletionArray(nsTArray); +[ptr] native PrefixArray(mozilla::safebrowsing::PrefixArray); interface nsIUrlClassifierHashCompleter; // Interface for JS function callbacks @@ -231,14 +233,14 @@ interface nsIUrlClassifierDBService : nsISupports * Interface for the actual worker thread. Implementations of this need not * be thread aware and just work on the database. */ -[scriptable, uuid(2af84c09-269e-4fc2-b28f-af56717db118)] +[scriptable, uuid(0445be75-b114-43ea-89dc-aa16af26e77e)] interface nsIUrlClassifierDBServiceWorker : nsIUrlClassifierDBService { // Provide a way to forcibly close the db connection. void closeDb(); - // Cache the results of a hash completion. - [noscript]void cacheCompletions(in ResultArray entries); + [noscript]void cacheCompletions(in CacheCompletionArray completions); + [noscript]void cacheMisses(in PrefixArray misses); }; /** @@ -247,7 +249,7 @@ interface nsIUrlClassifierDBServiceWorker : nsIUrlClassifierDBService * lookup to provide a set of possible results, which the main thread * may need to expand using an nsIUrlClassifierCompleter. */ -[uuid(f1dc83c6-ad43-4f0f-a809-fd43de7de8a4)] +[uuid(b903dc8f-dff1-42fe-894b-36e7a59bb801)] interface nsIUrlClassifierLookupCallback : nsISupports { /** diff --git a/toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl b/toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl index 0d6624cbfff7..1df5a7878412 100644 --- a/toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl +++ b/toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl @@ -39,28 +39,26 @@ #include "nsISupports.idl" #include "nsIFile.idl" -interface nsIArray; - // Note that the PrefixSet name is historical and we do properly support // duplicated values, so it's really a Prefix Trie. // All methods are thread-safe. - -[scriptable, uuid(519c8519-0f30-426b-bb7b-c400ba0318e2)] +[scriptable, uuid(b21b0fa1-20d2-422a-b2cc-b289c9325811)] interface nsIUrlClassifierPrefixSet : nsISupports { + // Initialize the PrefixSet. Give it a name for memory reporting. + void init(in ACString aName); // Fills the PrefixSet with the given array of prefixes. // Can send an empty Array to clear the tree. A truly "empty tree" // cannot be represented, so put a sentinel value if that is required // Requires array to be sorted. void setPrefixes([const, array, size_is(aLength)] in unsigned long aPrefixes, in unsigned long aLength); + void getPrefixes(out unsigned long aCount, + [array, size_is(aCount), retval] out unsigned long aPrefixes); // Do a lookup in the PrefixSet, return whether the value is present. // If aReady is set, we will block until there are any entries. // If not set, we will return in aReady whether we were ready or not. - boolean probe(in unsigned long aPrefix, in unsigned long aKey, - inout boolean aReady); - // Return the key that is used to randomize the collisions in the prefixes. - PRUint32 getKey(); + boolean probe(in unsigned long aPrefix, inout boolean aReady); boolean isEmpty(); void loadFromFile(in nsIFile aFile); void storeToFile(in nsIFile aFile); diff --git a/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp b/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp index c135f358954c..f2870f8e405e 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp +++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp @@ -42,14 +42,8 @@ #include "nsAutoPtr.h" #include "nsCOMPtr.h" -#include "mozIStorageService.h" -#include "mozIStorageConnection.h" -#include "mozIStorageStatement.h" -#include "mozStorageHelper.h" -#include "mozStorageCID.h" #include "nsAppDirectoryServiceDefs.h" #include "nsCRT.h" -#include "nsDataHashtable.h" #include "nsICryptoHash.h" #include "nsICryptoHMAC.h" #include "nsIDirectoryService.h" @@ -73,48 +67,23 @@ #include "nsNetCID.h" #include "nsThreadUtils.h" #include "nsXPCOMStrings.h" +#include "nsProxyRelease.h" #include "mozilla/Mutex.h" +#include "mozilla/TimeStamp.h" #include "mozilla/Telemetry.h" #include "prlog.h" #include "prprf.h" #include "prnetdb.h" -#include "zlib.h" - -// Needed to interpert mozIStorageConnection::GetLastError -#include +#include "Entries.h" +#include "Classifier.h" +#include "ProtocolParser.h" using namespace mozilla; - -/** - * The DBServices stores a set of Fragments. A fragment is one URL - * fragment containing two or more domain components and some number - * of path components. - * - * Fragment examples: - * example.com/ - * www.example.com/foo/bar - * www.mail.example.com/mail - * - * Fragments are described in "Simplified Regular Expression Lookup" - * section of the protocol document at - * http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec - * - * A fragment is associated with a domain. The domain for a given - * fragment is the three-host-component domain of the fragment (two - * host components for URLs with only two components) with a trailing - * slash. So for the fragments listed above, the domains are - * example.com/, www.example.com/ and mail.example.com/. - * - * Fragments and domains are hashed in the database. The hash is described - * in the protocol document, but it's basically a truncated SHA256 hash. - * - * A (table, chunk id, domain key, fragment) tuple is referred to as - * an Entry. - */ +using namespace mozilla::safebrowsing; // NSPR_LOG_MODULES=UrlClassifierDbService:5 #if defined(PR_LOGGING) -static const PRLogModuleInfo *gUrlClassifierDbServiceLog = nsnull; +PRLogModuleInfo *gUrlClassifierDbServiceLog = nsnull; #define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) #define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) #else @@ -122,31 +91,6 @@ static const PRLogModuleInfo *gUrlClassifierDbServiceLog = nsnull; #define LOG_ENABLED() (false) #endif -// Schema versioning: note that we don't bother to migrate between different -// versions of the schema, we just start fetching the data freshly with each -// migration. - -// The database filename is updated when there is an incompatible -// schema change and we expect both implementations to continue -// accessing the same database (such as between stable versions of the -// platform). -#define DATABASE_FILENAME "urlclassifier3.sqlite" - -// The implementation version is updated during development when we -// want to change schema, or to recover from updating bugs. When an -// implementation version change is detected, the database is scrapped -// and we start over. -#define IMPLEMENTATION_VERSION 7 - -// Name of the persistent PrefixSet storage -#define PREFIXSET_FILENAME "urlclassifier.pset" - -#define MAX_HOST_COMPONENTS 5 -#define MAX_PATH_COMPONENTS 4 - -// Updates will fail if fed chunks larger than this -#define MAX_CHUNK_SIZE (1024 * 1024) - // Prefs for implementing nsIURIClassifier to block page loads #define CHECK_MALWARE_PREF "browser.safebrowsing.malware.enabled" #define CHECK_MALWARE_DEFAULT false @@ -162,23 +106,6 @@ static const PRLogModuleInfo *gUrlClassifierDbServiceLog = nsnull; #define CONFIRM_AGE_PREF "urlclassifier.confirm-age" #define CONFIRM_AGE_DEFAULT_SEC (45 * 60) -#define UPDATE_CACHE_SIZE_PREF "urlclassifier.updatecachemax" -#define UPDATE_CACHE_SIZE_DEFAULT -1 - -#define LOOKUP_CACHE_SIZE_PREF "urlclassifier.lookupcachemax" -#define LOOKUP_CACHE_SIZE_DEFAULT -1 - -// Amount of time to spend updating before committing and delaying, in -// seconds. This is checked after each update stream, so the actual -// time spent can be higher than this, depending on update stream size. -#define UPDATE_WORKING_TIME "urlclassifier.workingtime" -#define UPDATE_WORKING_TIME_DEFAULT 5 - -// The amount of time to delay after hitting UPDATE_WORKING_TIME, in -// seconds. -#define UPDATE_DELAY_TIME "urlclassifier.updatetime" -#define UPDATE_DELAY_TIME_DEFAULT 60 - class nsUrlClassifierDBServiceWorker; // Singleton instance. @@ -192,12 +119,6 @@ static bool gShuttingDownThread = false; static PRInt32 gFreshnessGuarantee = CONFIRM_AGE_DEFAULT_SEC; -static PRInt32 gUpdateCacheSize = UPDATE_CACHE_SIZE_DEFAULT; -static PRInt32 gLookupCacheSize = LOOKUP_CACHE_SIZE_DEFAULT; - -static PRInt32 gWorkingTimeThreshold = UPDATE_WORKING_TIME_DEFAULT; -static PRInt32 gDelayTime = UPDATE_DELAY_TIME_DEFAULT; - static void SplitTables(const nsACString& str, nsTArray& tables) { @@ -216,868 +137,6 @@ SplitTables(const nsACString& str, nsTArray& tables) } } -// ------------------------------------------------------------------------- -// Hash class implementation - -// A convenience wrapper around the potentially-truncated hash for a -// domain or fragment. - -template -struct nsUrlClassifierHash -{ - static const PRUint32 sHashSize = S; - typedef nsUrlClassifierHash self_type; - PRUint8 buf[S]; - - nsresult FromPlaintext(const nsACString& plainText, nsICryptoHash *hash) { - // From the protocol doc: - // Each entry in the chunk is composed of the 128 most significant bits - // of the SHA 256 hash of a suffix/prefix expression. - - nsresult rv = hash->Init(nsICryptoHash::SHA256); - NS_ENSURE_SUCCESS(rv, rv); - - rv = hash->Update - (reinterpret_cast(plainText.BeginReading()), - plainText.Length()); - NS_ENSURE_SUCCESS(rv, rv); - - nsCAutoString hashed; - rv = hash->Finish(false, hashed); - NS_ENSURE_SUCCESS(rv, rv); - - NS_ASSERTION(hashed.Length() >= sHashSize, - "not enough characters in the hash"); - - memcpy(buf, hashed.BeginReading(), sHashSize); - - return NS_OK; - } - - void Assign(const nsACString& str) { - NS_ASSERTION(str.Length() >= sHashSize, - "string must be at least sHashSize characters long"); - memcpy(buf, str.BeginReading(), sHashSize); - } - - void Clear() { - memset(buf, 0, sizeof(buf)); - } - - const bool operator==(const self_type& hash) const { - return (memcmp(buf, hash.buf, sizeof(buf)) == 0); - } - const bool operator!=(const self_type& hash) const { - return !(*this == hash); - } - const bool operator<(const self_type& hash) const { - return memcmp(buf, hash.buf, sizeof(self_type)) < 0; - } - const bool StartsWith(const nsUrlClassifierHash& hash) const { - NS_ASSERTION(sHashSize >= PARTIAL_LENGTH, "nsUrlClassifierHash must be at least PARTIAL_LENGTH bytes long"); - return memcmp(buf, hash.buf, PARTIAL_LENGTH) == 0; - } - PRUint32 ToUint32() const { - return *(reinterpret_cast(buf)); - } -}; - -typedef nsUrlClassifierHash nsUrlClassifierDomainHash; -typedef nsUrlClassifierHash nsUrlClassifierPartialHash; -typedef nsUrlClassifierHash nsUrlClassifierCompleteHash; - - -// ------------------------------------------------------------------------- -// Entry class implementation - -// This class represents one entry in the classifier database. It consists -// of a table id, a chunk id, a domain hash, and a partial or complete hash. -class nsUrlClassifierEntry -{ -public: - nsUrlClassifierEntry() - : mId(-1) - , mHavePartial(false) - , mHaveComplete(false) - , mTableId(0) - , mChunkId(0) - , mAddChunkId(0) - {} - ~nsUrlClassifierEntry() {} - - // Check that this entry could potentially match the complete hash. - bool Match(const nsUrlClassifierCompleteHash &hash); - - // Check that the sub entry should apply to this entry. - bool SubMatch(const nsUrlClassifierEntry& sub); - - // Clear out the entry structure - void Clear(); - - // Set the partial hash for this domain. - void SetHash(const nsUrlClassifierPartialHash &partialHash) { - mPartialHash = partialHash; - mHavePartial = true; - } - - // Set the complete hash for this domain. - void SetHash(const nsUrlClassifierCompleteHash &completeHash) { - mCompleteHash = completeHash; - mHaveComplete = true; - } - - bool operator== (const nsUrlClassifierEntry& entry) const { - return ! (mTableId != entry.mTableId || - mChunkId != entry.mChunkId || - mHavePartial != entry.mHavePartial || - (mHavePartial && mPartialHash != entry.mPartialHash) || - mHaveComplete != entry.mHaveComplete || - (mHaveComplete && mCompleteHash != entry.mCompleteHash)); - } - - bool operator< (const nsUrlClassifierEntry& entry) const { - return (mTableId < entry.mTableId || - mChunkId < entry.mChunkId || - (mHavePartial && !entry.mHavePartial) || - (mHavePartial && mPartialHash < entry.mPartialHash) || - (mHaveComplete && !entry.mHaveComplete) || - (mHaveComplete && mCompleteHash < entry.mCompleteHash)); - } - - PRInt64 mId; - - nsUrlClassifierDomainHash mKey; - - bool mHavePartial; - nsUrlClassifierPartialHash mPartialHash; - - bool mHaveComplete; - nsUrlClassifierCompleteHash mCompleteHash; - - PRUint32 mTableId; - PRUint32 mChunkId; - PRUint32 mAddChunkId; -}; - -bool -nsUrlClassifierEntry::Match(const nsUrlClassifierCompleteHash &hash) -{ - if (mHaveComplete) - return mCompleteHash == hash; - - if (mHavePartial) - return hash.StartsWith(mPartialHash); - - return false; -} - -bool -nsUrlClassifierEntry::SubMatch(const nsUrlClassifierEntry &subEntry) -{ - if ((mTableId != subEntry.mTableId) || (mChunkId != subEntry.mAddChunkId)) - return false; - - if (subEntry.mHaveComplete) - return mHaveComplete && mCompleteHash == subEntry.mCompleteHash; - - if (subEntry.mHavePartial) - return mHavePartial && mPartialHash == subEntry.mPartialHash; - - return false; -} - -void -nsUrlClassifierEntry::Clear() -{ - mId = -1; - mHavePartial = false; - mHaveComplete = false; -} - -// ------------------------------------------------------------------------- -// Lookup result class implementation - -// This helper class wraps a nsUrlClassifierEntry found during a lookup. -class nsUrlClassifierLookupResult -{ -public: - nsUrlClassifierLookupResult() : mConfirmed(false), mNoise(false) { - mLookupFragment.Clear(); - } - ~nsUrlClassifierLookupResult() {} - - bool operator==(const nsUrlClassifierLookupResult &result) const { - // Don't need to compare table name, it's contained by id in the entry. - return (mLookupFragment == result.mLookupFragment && - mConfirmed == result.mConfirmed && - mEntry == result.mEntry); - } - - bool operator<(const nsUrlClassifierLookupResult &result) const { - // Don't need to compare table name, it's contained by id in the entry. - return (mLookupFragment < result.mLookupFragment || - mConfirmed < result.mConfirmed || - mEntry < result.mEntry); - } - - // The hash that matched this entry. - nsUrlClassifierCompleteHash mLookupFragment; - - // The entry that was found during the lookup. - nsUrlClassifierEntry mEntry; - - // TRUE if the lookup matched a complete hash (not just a partial - // one). - bool mConfirmed; - - // TRUE if this lookup is gethash noise. Does not represent an actual - // result. - bool mNoise; - - // The table name associated with mEntry.mTableId. - nsCString mTableName; -}; - -// ------------------------------------------------------------------------- -// Store class implementation - -// This class mediates access to the classifier and chunk entry tables. -class nsUrlClassifierStore -{ -public: - nsUrlClassifierStore() {} - virtual ~nsUrlClassifierStore() {} - - // Initialize the statements for the store. - nsresult Init(nsUrlClassifierDBServiceWorker *worker, - mozIStorageConnection *connection, - const nsACString& entriesTableName); - // Shut down the store. - void Close(); - - // Read an entry from a database statement - virtual bool ReadStatement(mozIStorageStatement* statement, - nsUrlClassifierEntry& entry); - - // Prepare a statement to write this entry to the database - virtual nsresult BindStatement(const nsUrlClassifierEntry& entry, - mozIStorageStatement* statement); - - // Read the entry with a given ID from the database - nsresult ReadEntry(PRInt64 id, nsUrlClassifierEntry& entry, bool *exists); - - // Remove an entry from the database - nsresult DeleteEntry(nsUrlClassifierEntry& entry); - - // Write an entry to the database - nsresult WriteEntry(nsUrlClassifierEntry& entry); - - // Update an entry in the database. The entry must already exist in the - // database or this method will fail. - nsresult UpdateEntry(nsUrlClassifierEntry& entry); - - // Remove all entries for a given table/chunk pair from the database. - nsresult Expire(PRUint32 tableId, - PRUint32 chunkNum); - - // Read a certain number of rows adjacent to the requested rowid that - // don't have complete hash data. - nsresult ReadNoiseEntries(PRInt64 rowID, - PRUint32 numRequested, - bool before, - nsTArray &entries); - - // Ask the db for a random number. This is temporary, and should be - // replaced with nsIRandomGenerator when 419739 is fixed. - nsresult RandomNumber(PRInt64 *randomNum); - // Return an array with all Prefixes known - nsresult ReadPrefixes(FallibleTArray& array, PRUint32 aKey); - - -protected: - nsresult ReadEntries(mozIStorageStatement *statement, - nsTArray& entries); - nsUrlClassifierDBServiceWorker *mWorker; - nsCOMPtr mConnection; - - nsCOMPtr mLookupWithIDStatement; - - nsCOMPtr mInsertStatement; - nsCOMPtr mUpdateStatement; - nsCOMPtr mDeleteStatement; - nsCOMPtr mExpireStatement; - - nsCOMPtr mPartialEntriesStatement; - nsCOMPtr mPartialEntriesAfterStatement; - nsCOMPtr mLastPartialEntriesStatement; - nsCOMPtr mPartialEntriesBeforeStatement; - - nsCOMPtr mRandomStatement; - nsCOMPtr mAllPrefixGetStatement; - nsCOMPtr mAllPrefixCountStatement; -}; - -nsresult -nsUrlClassifierStore::Init(nsUrlClassifierDBServiceWorker *worker, - mozIStorageConnection *connection, - const nsACString& entriesName) -{ - mWorker = worker; - mConnection = connection; - - nsresult rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE id=?1"), - getter_AddRefs(mLookupWithIDStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("DELETE FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE id=?1"), - getter_AddRefs(mDeleteStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("DELETE FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE table_id=?1 AND chunk_id=?2"), - getter_AddRefs(mExpireStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE complete_data ISNULL" - " LIMIT ?1"), - getter_AddRefs(mPartialEntriesStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE id > ?1 AND complete_data ISNULL" - " LIMIT ?2"), - getter_AddRefs(mPartialEntriesAfterStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE complete_data ISNULL" - " ORDER BY id DESC LIMIT ?1"), - getter_AddRefs(mLastPartialEntriesStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName + - NS_LITERAL_CSTRING(" WHERE id < ?1 AND complete_data ISNULL" - " ORDER BY id DESC LIMIT ?2"), - getter_AddRefs(mPartialEntriesBeforeStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT abs(random())"), - getter_AddRefs(mRandomStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement(NS_LITERAL_CSTRING("SELECT domain, partial_data, complete_data FROM ") - + entriesName, - getter_AddRefs(mAllPrefixGetStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement(NS_LITERAL_CSTRING("SELECT COUNT(1) FROM ") - + entriesName, - getter_AddRefs(mAllPrefixCountStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -void -nsUrlClassifierStore::Close() -{ - mLookupWithIDStatement = nsnull; - - mInsertStatement = nsnull; - mUpdateStatement = nsnull; - mDeleteStatement = nsnull; - mExpireStatement = nsnull; - - mPartialEntriesStatement = nsnull; - mPartialEntriesAfterStatement = nsnull; - mPartialEntriesBeforeStatement = nsnull; - mLastPartialEntriesStatement = nsnull; - mRandomStatement = nsnull; - - mAllPrefixGetStatement = nsnull; - mAllPrefixCountStatement = nsnull; - - mConnection = nsnull; -} - - -bool -nsUrlClassifierStore::ReadStatement(mozIStorageStatement* statement, - nsUrlClassifierEntry& entry) -{ - entry.mId = statement->AsInt64(0); - - PRUint32 size; - const PRUint8* blob = statement->AsSharedBlob(1, &size); - if (!blob || (size != DOMAIN_LENGTH)) - return false; - memcpy(entry.mKey.buf, blob, DOMAIN_LENGTH); - - blob = statement->AsSharedBlob(2, &size); - if (!blob || size == 0) { - entry.mHavePartial = false; - } else { - if (size != PARTIAL_LENGTH) - return false; - entry.mHavePartial = true; - memcpy(entry.mPartialHash.buf, blob, PARTIAL_LENGTH); - } - - blob = statement->AsSharedBlob(3, &size); - if (!blob || size == 0) { - entry.mHaveComplete = false; - } else { - if (size != COMPLETE_LENGTH) - return false; - entry.mHaveComplete = true; - memcpy(entry.mCompleteHash.buf, blob, COMPLETE_LENGTH); - } - - // If we only have a partial entry, and that partial entry matches the - // domain, we don't save the extra copy to the database. - if (!(entry.mHavePartial || entry.mHaveComplete)) { - entry.SetHash(entry.mKey); - } - - entry.mChunkId = statement->AsInt32(4); - entry.mTableId = statement->AsInt32(5); - - return true; -} - -nsresult -nsUrlClassifierStore::BindStatement(const nsUrlClassifierEntry &entry, - mozIStorageStatement* statement) -{ - nsresult rv; - - if (entry.mId == -1) - rv = statement->BindNullByIndex(0); - else - rv = statement->BindInt64ByIndex(0, entry.mId); - NS_ENSURE_SUCCESS(rv, rv); - - rv = statement->BindBlobByIndex(1, entry.mKey.buf, DOMAIN_LENGTH); - NS_ENSURE_SUCCESS(rv, rv); - - if (entry.mHavePartial) { - // If we only have a partial entry and that entry matches the domain, - // we'll save some space by only storing the domain hash. - if (!entry.mHaveComplete && entry.mKey == entry.mPartialHash) { - rv = statement->BindNullByIndex(2); - } else { - rv = statement->BindBlobByIndex(2, entry.mPartialHash.buf, - PARTIAL_LENGTH); - } - } else { - rv = statement->BindNullByIndex(2); - } - NS_ENSURE_SUCCESS(rv, rv); - - if (entry.mHaveComplete) { - rv = statement->BindBlobByIndex(3, entry.mCompleteHash.buf, COMPLETE_LENGTH); - } else { - rv = statement->BindNullByIndex(3); - } - NS_ENSURE_SUCCESS(rv, rv); - - rv = statement->BindInt32ByIndex(4, entry.mChunkId); - NS_ENSURE_SUCCESS(rv, rv); - - rv = statement->BindInt32ByIndex(5, entry.mTableId); - NS_ENSURE_SUCCESS(rv, rv); - - return true; -} - -nsresult -nsUrlClassifierStore::ReadEntries(mozIStorageStatement *statement, - nsTArray& entries) -{ - bool exists; - nsresult rv = statement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - - while (exists) { - nsUrlClassifierEntry *entry = entries.AppendElement(); - if (!entry) { - return NS_ERROR_OUT_OF_MEMORY; - } - - if (!ReadStatement(statement, *entry)) - return NS_ERROR_FAILURE; - - statement->ExecuteStep(&exists); - } - - return NS_OK; -} - -nsresult -nsUrlClassifierStore::ReadEntry(PRInt64 id, - nsUrlClassifierEntry& entry, - bool *exists) -{ - entry.Clear(); - - mozStorageStatementScoper scoper(mLookupWithIDStatement); - - nsresult rv = mLookupWithIDStatement->BindInt64ByIndex(0, id); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mLookupWithIDStatement->ExecuteStep(exists); - NS_ENSURE_SUCCESS(rv, rv); - - if (*exists) { - if (ReadStatement(mLookupWithIDStatement, entry)) - return NS_ERROR_FAILURE; - } - - return NS_OK; -} - -nsresult -nsUrlClassifierStore::ReadNoiseEntries(PRInt64 rowID, - PRUint32 numRequested, - bool before, - nsTArray &entries) -{ - if (numRequested == 0) { - return NS_OK; - } - - mozIStorageStatement *statement = - before ? mPartialEntriesBeforeStatement : mPartialEntriesAfterStatement; - mozStorageStatementScoper scoper(statement); - - nsresult rv = statement->BindInt64ByIndex(0, rowID); - NS_ENSURE_SUCCESS(rv, rv); - - statement->BindInt32ByIndex(1, numRequested); - NS_ENSURE_SUCCESS(rv, rv); - - PRUint32 length = entries.Length(); - rv = ReadEntries(statement, entries); - NS_ENSURE_SUCCESS(rv, rv); - - PRUint32 numRead = entries.Length() - length; - - if (numRead >= numRequested) - return NS_OK; - - // If we didn't get enough entries, we need the search to wrap around from - // beginning to end (or vice-versa) - - mozIStorageStatement *wraparoundStatement = - before ? mPartialEntriesStatement : mLastPartialEntriesStatement; - mozStorageStatementScoper wraparoundScoper(wraparoundStatement); - - rv = wraparoundStatement->BindInt32ByIndex(0, numRequested - numRead); - NS_ENSURE_SUCCESS(rv, rv); - - return ReadEntries(wraparoundStatement, entries); -} - -nsresult -nsUrlClassifierStore::RandomNumber(PRInt64 *randomNum) -{ - mozStorageStatementScoper randScoper(mRandomStatement); - bool exists; - nsresult rv = mRandomStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - if (!exists) - return NS_ERROR_NOT_AVAILABLE; - - *randomNum = mRandomStatement->AsInt64(0); - - return NS_OK; -} - -// ------------------------------------------------------------------------- -// nsUrlClassifierAddStore class implementation - -// This class accesses the moz_classifier table. -class nsUrlClassifierAddStore: public nsUrlClassifierStore -{ -public: - nsUrlClassifierAddStore() {}; - virtual ~nsUrlClassifierAddStore() {}; - - nsresult Init(nsUrlClassifierDBServiceWorker *worker, - mozIStorageConnection *connection, - const nsACString& entriesTableName); - - void Close(); - - // Read the entries for a given key/table/chunk from the database - nsresult ReadAddEntries(const nsUrlClassifierDomainHash& key, - PRUint32 tableId, - PRUint32 chunkId, - nsTArray& entry); - - // Read the entries for a given host key from the database. - nsresult ReadAddEntries(const nsUrlClassifierDomainHash& key, - nsTArray& entry); - -protected: - nsCOMPtr mLookupStatement; - nsCOMPtr mLookupWithChunkStatement; -}; - -nsresult -nsUrlClassifierAddStore::Init(nsUrlClassifierDBServiceWorker *worker, - mozIStorageConnection *connection, - const nsACString &entriesTableName) -{ - nsresult rv = nsUrlClassifierStore::Init(worker, connection, - entriesTableName); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("INSERT OR REPLACE INTO ") + entriesTableName + - NS_LITERAL_CSTRING(" VALUES (?1, ?2, ?3, ?4, ?5, ?6)"), - getter_AddRefs(mInsertStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("UPDATE ") + entriesTableName + - NS_LITERAL_CSTRING(" SET domain=?2, partial_data=?3, " - " complete_data=?4, chunk_id=?5, table_id=?6" - " WHERE id=?1"), - getter_AddRefs(mUpdateStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesTableName + - NS_LITERAL_CSTRING(" WHERE domain=?1"), - getter_AddRefs(mLookupStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesTableName + - NS_LITERAL_CSTRING(" WHERE domain=?1 AND table_id=?2 AND chunk_id=?3"), - getter_AddRefs(mLookupWithChunkStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -void -nsUrlClassifierAddStore::Close() -{ - nsUrlClassifierStore::Close(); - - mLookupStatement = nsnull; - mLookupWithChunkStatement = nsnull; -} - -nsresult -nsUrlClassifierAddStore::ReadAddEntries(const nsUrlClassifierDomainHash& hash, - PRUint32 tableId, - PRUint32 chunkId, - nsTArray& entries) -{ - mozStorageStatementScoper scoper(mLookupWithChunkStatement); - - nsresult rv = mLookupWithChunkStatement->BindBlobByIndex - (0, hash.buf, DOMAIN_LENGTH); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mLookupWithChunkStatement->BindInt32ByIndex(1, tableId); - NS_ENSURE_SUCCESS(rv, rv); - rv = mLookupWithChunkStatement->BindInt32ByIndex(2, chunkId); - NS_ENSURE_SUCCESS(rv, rv); - - return ReadEntries(mLookupWithChunkStatement, entries); -} - -nsresult -nsUrlClassifierAddStore::ReadAddEntries(const nsUrlClassifierDomainHash& hash, - nsTArray& entries) -{ - mozStorageStatementScoper scoper(mLookupStatement); - - nsresult rv = mLookupStatement->BindBlobByIndex - (0, hash.buf, DOMAIN_LENGTH); - NS_ENSURE_SUCCESS(rv, rv); - - return ReadEntries(mLookupStatement, entries); -} - -// ------------------------------------------------------------------------- -// nsUrlClassifierSubStore class implementation - -// This class accesses the moz_subs table. -class nsUrlClassifierSubStore : public nsUrlClassifierStore -{ -public: - nsUrlClassifierSubStore() {}; - virtual ~nsUrlClassifierSubStore() {}; - - nsresult Init(nsUrlClassifierDBServiceWorker *worker, - mozIStorageConnection *connection, - const nsACString& entriesTableName); - - void Close(); - - // Read an entry from a database statement - virtual bool ReadStatement(mozIStorageStatement* statement, - nsUrlClassifierEntry& entry); - - // Prepare a statement to write this entry to the database - virtual nsresult BindStatement(const nsUrlClassifierEntry& entry, - mozIStorageStatement* statement); - - // Read sub entries for a given add chunk - nsresult ReadSubEntries(PRUint32 tableId, PRUint32 chunkId, - nsTArray &subEntry); - - // Expire sub entries for a given add chunk - nsresult ExpireAddChunk(PRUint32 tableId, PRUint32 chunkId); - -protected: - nsCOMPtr mLookupWithAddChunkStatement; - nsCOMPtr mExpireAddChunkStatement; -}; - -nsresult -nsUrlClassifierSubStore::Init(nsUrlClassifierDBServiceWorker *worker, - mozIStorageConnection *connection, - const nsACString &entriesTableName) -{ - nsresult rv = nsUrlClassifierStore::Init(worker, connection, - entriesTableName); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("INSERT OR REPLACE INTO ") + entriesTableName + - NS_LITERAL_CSTRING(" VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"), - getter_AddRefs(mInsertStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("UPDATE ") + entriesTableName + - NS_LITERAL_CSTRING(" SET domain=?2, partial_data=?3, complete_data=?4," - " chunk_id=?5, table_id=?6, add_chunk_id=?7" - " WHERE id=?1"), - getter_AddRefs(mUpdateStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesTableName + - NS_LITERAL_CSTRING(" WHERE table_id=?1 AND add_chunk_id=?2"), - getter_AddRefs(mLookupWithAddChunkStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mConnection->CreateStatement - (NS_LITERAL_CSTRING("DELETE FROM ") + entriesTableName + - NS_LITERAL_CSTRING(" WHERE table_id=?1 AND add_chunk_id=?2"), - getter_AddRefs(mExpireAddChunkStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -bool -nsUrlClassifierSubStore::ReadStatement(mozIStorageStatement* statement, - nsUrlClassifierEntry& entry) -{ - if (!nsUrlClassifierStore::ReadStatement(statement, entry)) - return false; - - entry.mAddChunkId = statement->AsInt32(6); - return true; -} - -nsresult -nsUrlClassifierSubStore::BindStatement(const nsUrlClassifierEntry& entry, - mozIStorageStatement* statement) -{ - nsresult rv = nsUrlClassifierStore::BindStatement(entry, statement); - NS_ENSURE_SUCCESS(rv, rv); - - return statement->BindInt32ByIndex(6, entry.mAddChunkId); -} - -nsresult -nsUrlClassifierSubStore::ReadSubEntries(PRUint32 tableId, PRUint32 addChunkId, - nsTArray& entries) -{ - mozStorageStatementScoper scoper(mLookupWithAddChunkStatement); - - nsresult rv = mLookupWithAddChunkStatement->BindInt32ByIndex(0, tableId); - NS_ENSURE_SUCCESS(rv, rv); - rv = mLookupWithAddChunkStatement->BindInt32ByIndex(1, addChunkId); - NS_ENSURE_SUCCESS(rv, rv); - - return ReadEntries(mLookupWithAddChunkStatement, entries); -} - -nsresult -nsUrlClassifierSubStore::ExpireAddChunk(PRUint32 tableId, PRUint32 addChunkId) -{ - mozStorageStatementScoper scoper(mExpireAddChunkStatement); - - nsresult rv = mExpireAddChunkStatement->BindInt32ByIndex(0, tableId); - NS_ENSURE_SUCCESS(rv, rv); - rv = mExpireAddChunkStatement->BindInt32ByIndex(1, addChunkId); - NS_ENSURE_SUCCESS(rv, rv); - - return mExpireAddChunkStatement->Execute(); -} - -void -nsUrlClassifierSubStore::Close() -{ - nsUrlClassifierStore::Close(); - mLookupWithAddChunkStatement = nsnull; - mExpireAddChunkStatement = nsnull; -} - -// Similar to GetKey(), but if the domain contains three or more components, -// two keys will be returned: -// hostname.com/foo/bar -> [hostname.com] -// mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com] -// www.mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com] -static nsresult GetHostKeys(const nsACString &spec, - nsTArray &hostKeys); - -// take a lookup string (www.hostname.com/path/to/resource.html) and -// expand it into the set of fragments that should be searched for in an -// entry -static nsresult GetLookupFragments(const nsCSubstring& spec, - nsTArray& fragments); - -// Check for a canonicalized IP address. -static bool IsCanonicalizedIP(const nsACString& host); - -// Get the database key for a given URI. This is the top three -// domain components if they exist, otherwise the top two. -// hostname.com/foo/bar -> hostname.com -// mail.hostname.com/foo/bar -> mail.hostname.com -// www.mail.hostname.com/foo/bar -> mail.hostname.com -static nsresult GetKey(const nsACString& spec, nsUrlClassifierDomainHash& hash, - nsICryptoHash * aCryptoHash); - -// We have both a prefix and a domain. Drop the domain, but -// hash the domain, the prefix and a random value together, -// ensuring any collisions happens at a different points for -// different users. -static nsresult KeyedHash(PRUint32 aPref, PRUint32 aDomain, - PRUint32 aKey, PRUint32 *aOut); - - // ------------------------------------------------------------------------- // Actual worker implemenatation class nsUrlClassifierDBServiceWorker : public nsIUrlClassifierDBServiceWorker @@ -1090,8 +149,7 @@ public: NS_DECL_NSIURLCLASSIFIERDBSERVICEWORKER // Initialize, called in the main thread - nsresult Init(PRInt32 gethashNoise, - nsRefPtr & prefSet); + nsresult Init(PRInt32 gethashNoise, nsCOMPtr aCacheDir); // Queue a lookup for the worker to perform, called in the main thread. nsresult QueueLookup(const nsACString& lookupKey, @@ -1101,12 +159,6 @@ public: // update operations to prevent lookups from blocking for too long. nsresult HandlePendingLookups(); - // Blocks the PrefixSet from being updated while the main thread is doing - // its lookups. LockPrefixSet will return whether the PrefixSet is in a - // usable state. If not, we should fall through to SQLite lookups. - bool LockPrefixSet(); - void UnlockPrefixSet(); - private: // No subclassing ~nsUrlClassifierDBServiceWorker(); @@ -1114,231 +166,66 @@ private: // Disallow copy constructor nsUrlClassifierDBServiceWorker(nsUrlClassifierDBServiceWorker&); - // Try to open the db, DATABASE_FILENAME. nsresult OpenDb(); - // Create table in the db if they don't exist. - nsresult MaybeCreateTables(mozIStorageConnection* connection); - - nsresult GetTableName(PRUint32 tableId, nsACString& table); - nsresult GetTableId(const nsACString& table, PRUint32* tableId); - - // Decompress a zlib'ed chunk (used for -exp tables) - nsresult InflateChunk(nsACString& chunk); - - // Expand shavar chunk into its individual entries - nsresult GetShaEntries(PRUint32 tableId, - PRUint32 chunkType, - PRUint32 chunkNum, - PRUint32 domainSize, - PRUint32 hashSize, - nsACString& chunk, - nsTArray& entries); - - // Expand a chunk into its individual entries - nsresult GetChunkEntries(const nsACString& table, - PRUint32 tableId, - PRUint32 chunkType, - PRUint32 chunkNum, - PRUint32 hashSize, - nsACString& chunk, - nsTArray& entries); - - // Parse one stringified range of chunks of the form "n" or "n-m" from a - // comma-separated list of chunks. Upon return, 'begin' will point to the - // next range of chunks in the list of chunks. - bool ParseChunkRange(nsACString::const_iterator &begin, - const nsACString::const_iterator &end, - PRUint32 *first, PRUint32 *last); - - // Expand a stringified chunk list into an array of ints. - nsresult ParseChunkList(const nsACString& chunkStr, - nsTArray& chunks); - - // Join an array of ints into a stringified chunk list. - nsresult JoinChunkList(nsTArray& chunks, nsCString& chunkStr); - - // List the add/subtract chunks that have been applied to a table - nsresult GetChunkLists(PRUint32 tableId, - nsACString& addChunks, - nsACString& subChunks); - - // Set the list of add/subtract chunks that have been applied to a table - nsresult SetChunkLists(PRUint32 tableId, - const nsACString& addChunks, - const nsACString& subChunks); - - // Cache the list of add/subtract chunks applied to the table, optionally - // parsing the add or sub lists. These lists are cached while updating - // tables to avoid excessive database reads/writes and parsing. - nsresult CacheChunkLists(PRUint32 tableId, - bool parseAdds, - bool parseSubs); - - // Clear the cached list of add/subtract chunks. - void ClearCachedChunkLists(); - - // Flush the cached add/subtract lists to the database. - nsresult FlushChunkLists(); - - // Inserts a chunk id into the list, sorted. Returns TRUE if the - // number was successfully added, FALSE if the chunk already exists. - bool InsertChunkId(nsTArray& chunks, PRUint32 chunkNum); - - // Add a list of entries to the database, merging with - // existing entries as necessary - nsresult AddChunk(PRUint32 tableId, PRUint32 chunkNum, - nsTArray& entries); - - // Expire an add chunk - nsresult ExpireAdd(PRUint32 tableId, PRUint32 chunkNum); - - // Subtract a list of entries from the database - nsresult SubChunk(PRUint32 tableId, PRUint32 chunkNum, - nsTArray& entries); - - // Expire a subtract chunk - nsresult ExpireSub(PRUint32 tableId, PRUint32 chunkNum); - - // Handle line-oriented control information from a stream update - nsresult ProcessResponseLines(bool* done); - // Handle chunk data from a stream update - nsresult ProcessChunk(bool* done); - - // Sets up a transaction and begins counting update time. - nsresult SetupUpdate(); - // Applies the current transaction and resets the update/working times. nsresult ApplyUpdate(); // Reset the in-progress update stream void ResetStream(); - // Reset the in-progress update - void ResetUpdate(); - - // Look for a given lookup string (www.hostname.com/path/to/resource.html) - // Returns a list of entries that match. - nsresult Check(const nsCSubstring& spec, - nsTArray& results); + // Reset the in-progress update + void ResetUpdate(); // Perform a classifier lookup for a given url. nsresult DoLookup(const nsACString& spec, nsIUrlClassifierLookupCallback* c); // Add entries to the results. - nsresult AddNoise(PRInt64 nearID, - PRInt32 count, - nsTArray& results); - - // Construct a Prefix Set with known prefixes - nsresult LoadPrefixSet(nsCOMPtr & aFile); - nsresult ConstructPrefixSet(); - - // Set the SQLite cache size - nsresult SetCacheSize(mozIStorageConnection * aConnection, - PRInt32 aCacheSize); - - nsCOMPtr mDBFile; - nsCOMPtr mPSFile; + nsresult AddNoise(const Prefix aPrefix, + const nsCString tableName, + PRInt32 aCount, + LookupResultArray& results); nsCOMPtr mCryptoHash; - // Holds a connection to the Db. We lazily initialize this because it has - // to be created in the background thread (currently mozStorageConnection - // isn't thread safe). - nsCOMPtr mConnection; + nsAutoPtr mClassifier; + nsAutoPtr mProtocolParser; - // The main collection of entries. This is the store that will be checked - // when classifying a URL. - nsUrlClassifierAddStore mMainStore; + // Directory where to store the SB databases. + nsCOMPtr mCacheDir; - // The collection of subs waiting for their accompanying add. - nsUrlClassifierSubStore mPendingSubStore; - - nsCOMPtr mGetChunkListsStatement; - nsCOMPtr mSetChunkListsStatement; - - nsCOMPtr mGetTablesStatement; - nsCOMPtr mGetTableIdStatement; - nsCOMPtr mGetTableNameStatement; - nsCOMPtr mInsertTableIdStatement; - nsCOMPtr mGetPageSizeStatement; - - // Stores the last time a given table was updated. - nsDataHashtable mTableFreshness; - - // We receive data in small chunks that may be broken in the middle of - // a line. So we save the last partial line here. - nsCString mPendingStreamUpdate; + // XXX: maybe an array of autoptrs. Or maybe a class specifically + // storing a series of updates. + nsTArray mTableUpdates; PRInt32 mUpdateWait; - bool mResetRequested; - bool mGrewCache; - - enum { - STATE_LINE, - STATE_CHUNK - } mState; - - enum { - CHUNK_ADD, - CHUNK_SUB - } mChunkType; - - PRUint32 mChunkNum; - PRUint32 mHashSize; - PRUint32 mChunkLen; - - // List of tables included in this update. - nsTArray mUpdateTables; - - nsCString mUpdateTable; - PRUint32 mUpdateTableId; + // Entries that cannot be completed. We expect them to die at + // the next update + PrefixArray mMissCache; nsresult mUpdateStatus; + nsTArray mUpdateTables; nsCOMPtr mUpdateObserver; bool mInStream; - bool mPrimaryStream; - - bool mHaveCachedLists; - PRUint32 mCachedListsTable; - nsCAutoString mCachedSubsStr; - nsCAutoString mCachedAddsStr; - - bool mHaveCachedAddChunks; - nsTArray mCachedAddChunks; - - bool mHaveCachedSubChunks; - nsTArray mCachedSubChunks; // The client key with which the data from the server will be MAC'ed. nsCString mUpdateClientKey; - // The MAC stated by the server. - nsCString mServerMAC; + // The client-specific hash key to rehash + PRUint32 mHashKey; - // Start time of the current update interval. This will be reset - // every time we apply the update. - PRIntervalTime mUpdateStartTime; - - nsCOMPtr mHMAC; // The number of noise entries to add to the set of lookup results. PRInt32 mGethashNoise; - // Set of prefixes known to be in the database - nsRefPtr mPrefixSet; - // Can we use the PrefixSet (low memory conditions) - bool mPrefixSetEnabled; - Mutex mPrefixSetEnabledLock; - // Pending lookups are stored in a queue for processing. The queue // is protected by mPendingLookupLock. Mutex mPendingLookupLock; class PendingLookup { public: + TimeStamp mStartTime; nsCString mKey; nsCOMPtr mCallback; }; @@ -1352,72 +239,28 @@ NS_IMPL_THREADSAFE_ISUPPORTS2(nsUrlClassifierDBServiceWorker, nsIUrlClassifierDBService) nsUrlClassifierDBServiceWorker::nsUrlClassifierDBServiceWorker() - : mUpdateWait(0) - , mResetRequested(false) - , mGrewCache(false) - , mState(STATE_LINE) - , mChunkType(CHUNK_ADD) - , mChunkNum(0) - , mHashSize(0) - , mChunkLen(0) - , mUpdateTableId(0) - , mUpdateStatus(NS_OK) - , mInStream(false) - , mPrimaryStream(false) - , mHaveCachedLists(false) - , mCachedListsTable(PR_UINT32_MAX) - , mHaveCachedAddChunks(false) - , mHaveCachedSubChunks(false) - , mUpdateStartTime(0) +: mInStream(false) , mGethashNoise(0) - , mPrefixSet(0) - , mPrefixSetEnabled(true) - , mPrefixSetEnabledLock("mPrefixSetEnabledLock") , mPendingLookupLock("nsUrlClassifierDBServerWorker.mPendingLookupLock") { } nsUrlClassifierDBServiceWorker::~nsUrlClassifierDBServiceWorker() { - NS_ASSERTION(!mConnection, + NS_ASSERTION(!mClassifier, "Db connection not closed, leaking memory! Call CloseDb " "to close the connection."); } nsresult nsUrlClassifierDBServiceWorker::Init(PRInt32 gethashNoise, - nsRefPtr & prefSet) + nsCOMPtr aCacheDir) { mGethashNoise = gethashNoise; - mPrefixSet = prefSet; - - // Compute database filename - - // Because we dump raw integers into the database, this database isn't - // portable between machine types, so store it in the local profile dir. - nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR, - getter_AddRefs(mDBFile)); - - if (NS_FAILED(rv)) { - rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR, - getter_AddRefs(mDBFile)); - } - - if (NS_FAILED(rv)) return NS_ERROR_NOT_AVAILABLE; - - rv = mDBFile->Clone(getter_AddRefs(mPSFile)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mDBFile->Append(NS_LITERAL_STRING(DATABASE_FILENAME)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mPSFile->Append(NS_LITERAL_STRING(PREFIXSET_FILENAME)); - NS_ENSURE_SUCCESS(rv, rv); + mCacheDir = aCacheDir; ResetUpdate(); - mTableFreshness.Init(); - return NS_OK; } @@ -1430,318 +273,13 @@ nsUrlClassifierDBServiceWorker::QueueLookup(const nsACString& spec, PendingLookup* lookup = mPendingLookups.AppendElement(); if (!lookup) return NS_ERROR_OUT_OF_MEMORY; + lookup->mStartTime = TimeStamp::Now(); lookup->mKey = spec; lookup->mCallback = callback; return NS_OK; } -nsresult -nsUrlClassifierDBService::CheckClean(const nsACString &spec, - bool *clean) -{ - Telemetry::AutoTimer timer; - - // Is the PrefixSet usable? - bool usePrefixSet = mWorker->LockPrefixSet(); - - // No, bail out and pretend the URL is not clean. We will do - // a database lookup and get the correct result. - if (!usePrefixSet) { - mWorker->UnlockPrefixSet(); - *clean = false; - return NS_OK; - } - - // Get the set of fragments to look up. - nsTArray fragments; - nsresult rv = GetLookupFragments(spec, fragments); - if (NS_FAILED(rv)) { - goto error_checkclean; - } - - PRUint32 prefixkey; - rv = mPrefixSet->GetKey(&prefixkey); - if (NS_FAILED(rv)) { - goto error_checkclean; - } - - *clean = true; - - for (PRUint32 i = 0; i < fragments.Length(); i++) { - nsUrlClassifierDomainHash fragmentKeyHash; - fragmentKeyHash.FromPlaintext(fragments[i], mHash); - - // Find the corresponding host key - nsUrlClassifierDomainHash hostkey; - rv = GetKey(fragments[i], hostkey, mHash); - if (NS_FAILED(rv)) { - /* This happens for hosts on the local network, - can't check these against the DB */ - continue; - } - - PRUint32 hostprefix = hostkey.ToUint32(); - PRUint32 fragkey = fragmentKeyHash.ToUint32(); - PRUint32 codedkey; - rv = KeyedHash(fragkey, hostprefix, prefixkey, &codedkey); - if (NS_FAILED(rv)) { - goto error_checkclean; - } - - bool found = false; - bool ready = false; /* opportunistic probe */ - rv = mPrefixSet->Probe(codedkey, prefixkey, &ready, &found); - if (NS_FAILED(rv)) { - goto error_checkclean; - } - LOG(("CheckClean Probed %X ready: %d found: %d ", - codedkey, ready, found)); - if (found || !ready) { - *clean = false; - } - } - - mWorker->UnlockPrefixSet(); - return NS_OK; - - error_checkclean: - mWorker->UnlockPrefixSet(); - return rv; -} - -static nsresult GetHostKeys(const nsACString &spec, - nsTArray &hostKeys) -{ - nsACString::const_iterator begin, end, iter; - spec.BeginReading(begin); - spec.EndReading(end); - - iter = begin; - if (!FindCharInReadable('/', iter, end)) { - return NS_OK; - } - - const nsCSubstring& host = Substring(begin, iter); - - if (IsCanonicalizedIP(host)) { - nsCString *key = hostKeys.AppendElement(); - if (!key) - return NS_ERROR_OUT_OF_MEMORY; - - key->Assign(host); - key->Append("/"); - return NS_OK; - } - - nsTArray hostComponents; - ParseString(PromiseFlatCString(host), '.', hostComponents); - - if (hostComponents.Length() < 2) { - // no host or toplevel host, this won't match anything in the db - return NS_OK; - } - - // First check with two domain components - PRInt32 last = PRInt32(hostComponents.Length()) - 1; - nsCString *lookupHost = hostKeys.AppendElement(); - if (!lookupHost) - return NS_ERROR_OUT_OF_MEMORY; - - lookupHost->Assign(hostComponents[last - 1]); - lookupHost->Append("."); - lookupHost->Append(hostComponents[last]); - lookupHost->Append("/"); - - // Now check with three domain components - if (hostComponents.Length() > 2) { - nsCString *lookupHost2 = hostKeys.AppendElement(); - if (!lookupHost2) - return NS_ERROR_OUT_OF_MEMORY; - lookupHost2->Assign(hostComponents[last - 2]); - lookupHost2->Append("."); - lookupHost2->Append(*lookupHost); - } - - return NS_OK; -} - -nsresult -GetLookupFragments(const nsACString& spec, - nsTArray& fragments) -{ - fragments.Clear(); - - nsACString::const_iterator begin, end, iter; - spec.BeginReading(begin); - spec.EndReading(end); - - iter = begin; - if (!FindCharInReadable('/', iter, end)) { - return NS_OK; - } - - const nsCSubstring& host = Substring(begin, iter++); - nsCAutoString path; - path.Assign(Substring(iter, end)); - - /** - * From the protocol doc: - * For the hostname, the client will try at most 5 different strings. They - * are: - * a) The exact hostname of the url - * b) The 4 hostnames formed by starting with the last 5 components and - * successivly removing the leading component. The top-level component - * can be skipped. This is not done if the hostname is a numerical IP. - */ - nsTArray hosts; - hosts.AppendElement(host); - - if (!IsCanonicalizedIP(host)) { - host.BeginReading(begin); - host.EndReading(end); - int numHostComponents = 0; - while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) && - numHostComponents < MAX_HOST_COMPONENTS) { - // don't bother checking toplevel domains - if (++numHostComponents >= 2) { - host.EndReading(iter); - hosts.AppendElement(Substring(end, iter)); - } - end = begin; - host.BeginReading(begin); - } - } - - /** - * From the protocol doc: - * For the path, the client will also try at most 6 different strings. - * They are: - * a) the exact path of the url, including query parameters - * b) the exact path of the url, without query parameters - * c) the 4 paths formed by starting at the root (/) and - * successively appending path components, including a trailing - * slash. This behavior should only extend up to the next-to-last - * path component, that is, a trailing slash should never be - * appended that was not present in the original url. - */ - nsTArray paths; - nsCAutoString pathToAdd; - - path.BeginReading(begin); - path.EndReading(end); - iter = begin; - if (FindCharInReadable('?', iter, end)) { - pathToAdd = Substring(begin, iter); - paths.AppendElement(pathToAdd); - end = iter; - } - - int numPathComponents = 1; - iter = begin; - while (FindCharInReadable('/', iter, end) && - numPathComponents < MAX_PATH_COMPONENTS) { - iter++; - pathToAdd.Assign(Substring(begin, iter)); - paths.AppendElement(pathToAdd); - numPathComponents++; - } - - // If we haven't already done so, add the full path - if (!pathToAdd.Equals(path)) { - paths.AppendElement(path); - } - // Check an empty path (for whole-domain blacklist entries) - paths.AppendElement(EmptyCString()); - - for (PRUint32 hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) { - for (PRUint32 pathIndex = 0; pathIndex < paths.Length(); pathIndex++) { - nsCString key; - key.Assign(hosts[hostIndex]); - key.Append('/'); - key.Append(paths[pathIndex]); - LOG(("Chking %s", key.get())); - - fragments.AppendElement(key); - } - } - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::Check(const nsACString& spec, - nsTArray& results) -{ - PRInt64 now = (PR_Now() / PR_USEC_PER_SEC); - - // Get list of host keys to look up - nsAutoTArray lookupHosts; - nsresult rv = GetHostKeys(spec, lookupHosts); - - nsTArray mCachedEntries; - - // Gather host's prefixes - for (PRUint32 i = 0; i < lookupHosts.Length(); i++) { - // Find the corresponding host key - nsUrlClassifierDomainHash hostKey; - nsresult rv = GetKey(lookupHosts[i], hostKey, mCryptoHash); - NS_ENSURE_SUCCESS(rv, rv); - // Read the entries for this fragments host from SQLite - mMainStore.ReadAddEntries(hostKey, mCachedEntries); - } - - // Now get the set of fragments to look up. - nsTArray fragments; - rv = GetLookupFragments(spec, fragments); - NS_ENSURE_SUCCESS(rv, rv); - - // Now check each lookup fragment against the entries in the DB. - for (PRUint32 i = 0; i < fragments.Length(); i++) { - nsUrlClassifierCompleteHash lookupHash; - lookupHash.FromPlaintext(fragments[i], mCryptoHash); - - for (PRUint32 j = 0; j < mCachedEntries.Length(); j++) { - nsUrlClassifierEntry &entry = mCachedEntries[j]; - if (entry.Match(lookupHash)) { - // If the entry doesn't contain a complete hash, we need to - // save it here so that it can be compared against the - // complete hash. However, we don't set entry.mHaveComplete - // because it isn't a verified part of the entry yet. - nsUrlClassifierLookupResult *result = results.AppendElement(); - if (!result) - return NS_ERROR_OUT_OF_MEMORY; - - result->mLookupFragment = lookupHash; - result->mEntry = entry; - - // Fill in the table name. - GetTableName(entry.mTableId, result->mTableName); - - bool fresh; - PRInt64 tableUpdateTime; - if (mTableFreshness.Get(result->mTableName, &tableUpdateTime)) { - LOG(("tableUpdateTime: %lld, now: %lld, freshnessGuarantee: %d\n", - tableUpdateTime, now, gFreshnessGuarantee)); - fresh = ((now - tableUpdateTime) <= gFreshnessGuarantee); - } else { - LOG(("No expiration time for this table.\n")); - fresh = false; - } - - // This is a confirmed result if we match a complete fragment in - // an up-to-date table. - result->mConfirmed = entry.mHaveComplete && fresh; - - LOG(("Found a result. complete=%d, fresh=%d", - entry.mHaveComplete, fresh)); - } - } - } - - return NS_OK; -} - /** * Lookup up a key in the database is a two step process: * @@ -1775,8 +313,7 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, } #endif - nsAutoPtr > results; - results = new nsTArray(); + nsAutoPtr results(new LookupResultArray()); if (!results) { c->LookupComplete(nsnull); return NS_ERROR_OUT_OF_MEMORY; @@ -1784,7 +321,10 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, // we ignore failures from Check because we'd rather return the // results that were found than fail. - Check(spec, *results); + mClassifier->SetFreshTime(gFreshnessGuarantee); + mClassifier->Check(spec, *results); + + LOG(("Found %d results.", results->Length())); #if defined(PR_LOGGING) if (LOG_ENABLED()) { @@ -1794,16 +334,28 @@ nsUrlClassifierDBServiceWorker::DoLookup(const nsACString& spec, } #endif + nsAutoPtr completes(new LookupResultArray()); + for (PRUint32 i = 0; i < results->Length(); i++) { - if (!results->ElementAt(i).mConfirmed) { + if (!mMissCache.Contains(results->ElementAt(i).hash.prefix)) { + completes->AppendElement(results->ElementAt(i)); + } + } + + for (PRUint32 i = 0; i < completes->Length(); i++) { + if (!completes->ElementAt(i).Confirmed()) { // We're going to be doing a gethash request, add some extra entries. - AddNoise(results->ElementAt(i).mEntry.mId, mGethashNoise, *results); + // Note that we cannot pass the first two by reference, because we + // add to completes, whicah can cause completes to reallocate and move. + AddNoise(completes->ElementAt(i).mCodedPrefix, + completes->ElementAt(i).mTableName, + mGethashNoise, *completes); break; } } // At this point ownership of 'results' is handed to the callback. - c->LookupComplete(results.forget()); + c->LookupComplete(completes.forget()); return NS_OK; } @@ -1819,50 +371,42 @@ nsUrlClassifierDBServiceWorker::HandlePendingLookups() MutexAutoUnlock unlock(mPendingLookupLock); DoLookup(lookup.mKey, lookup.mCallback); } + double lookupTime = (TimeStamp::Now() - lookup.mStartTime).ToMilliseconds(); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LOOKUP_TIME, + static_cast(lookupTime)); } return NS_OK; } nsresult -nsUrlClassifierDBServiceWorker::AddNoise(PRInt64 nearID, - PRInt32 count, - nsTArray& results) +nsUrlClassifierDBServiceWorker::AddNoise(const Prefix aPrefix, + const nsCString tableName, + PRInt32 aCount, + LookupResultArray& results) { - if (count < 1) { + if (aCount < 1) { return NS_OK; } - PRInt64 randomNum; - nsresult rv = mMainStore.RandomNumber(&randomNum); - NS_ENSURE_SUCCESS(rv, rv); - - PRInt32 numBefore = randomNum % count; - - nsTArray noiseEntries; - rv = mMainStore.ReadNoiseEntries(nearID, numBefore, true, noiseEntries); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mMainStore.ReadNoiseEntries(nearID, count - numBefore, false, noiseEntries); + PrefixArray noiseEntries; + nsresult rv = mClassifier->ReadNoiseEntries(aPrefix, tableName, + aCount, &noiseEntries); NS_ENSURE_SUCCESS(rv, rv); for (PRUint32 i = 0; i < noiseEntries.Length(); i++) { - nsUrlClassifierLookupResult *result = results.AppendElement(); + LookupResult *result = results.AppendElement(); if (!result) return NS_ERROR_OUT_OF_MEMORY; - result->mEntry = noiseEntries[i]; - result->mConfirmed = false; + result->hash.prefix = noiseEntries[i]; result->mNoise = true; - - // Fill in the table name. - GetTableName(noiseEntries[i].mTableId, result->mTableName); + result->mTableName.Assign(tableName); } return NS_OK; } - // Lookup a key in the db. NS_IMETHODIMP nsUrlClassifierDBServiceWorker::Lookup(const nsACString& spec, @@ -1883,1027 +427,31 @@ nsUrlClassifierDBServiceWorker::GetTables(nsIUrlClassifierCallback* c) return NS_ERROR_FAILURE; } - mozStorageStatementScoper scoper(mGetTablesStatement); + NS_ENSURE_SUCCESS(rv, rv); nsCAutoString response; - bool hasMore; - while (NS_SUCCEEDED(rv = mGetTablesStatement->ExecuteStep(&hasMore)) && - hasMore) { - nsCAutoString val; - mGetTablesStatement->GetUTF8String(0, val); - - if (val.IsEmpty()) { - continue; - } - - response.Append(val); - response.Append(';'); - - mGetTablesStatement->GetUTF8String(1, val); - - bool haveAdds = false; - if (!val.IsEmpty()) { - response.Append("a:"); - response.Append(val); - haveAdds = true; - } - - mGetTablesStatement->GetUTF8String(2, val); - if (!val.IsEmpty()) { - if (haveAdds) - response.Append(":"); - - response.Append("s:"); - response.Append(val); - } - - response.Append('\n'); - } - - if (NS_FAILED(rv)) { - response.Truncate(); - } - + mClassifier->TableRequest(response); c->HandleEvent(response); return rv; } -nsresult -nsUrlClassifierDBServiceWorker::GetTableId(const nsACString& table, - PRUint32* tableId) -{ - mozStorageStatementScoper findScoper(mGetTableIdStatement); - - nsresult rv = mGetTableIdStatement->BindUTF8StringByIndex(0, table); - NS_ENSURE_SUCCESS(rv, rv); - - bool exists; - rv = mGetTableIdStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - if (exists) { - *tableId = mGetTableIdStatement->AsInt32(0); - return NS_OK; - } - - mozStorageStatementScoper insertScoper(mInsertTableIdStatement); - rv = mInsertTableIdStatement->BindUTF8StringByIndex(0, table); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mInsertTableIdStatement->Execute(); - NS_ENSURE_SUCCESS(rv, rv); - - PRInt64 rowId; - rv = mConnection->GetLastInsertRowID(&rowId); - NS_ENSURE_SUCCESS(rv, rv); - - if (rowId > PR_UINT32_MAX) - return NS_ERROR_FAILURE; - - *tableId = rowId; - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::GetTableName(PRUint32 tableId, - nsACString& tableName) -{ - mozStorageStatementScoper findScoper(mGetTableNameStatement); - nsresult rv = mGetTableNameStatement->BindInt32ByIndex(0, tableId); - NS_ENSURE_SUCCESS(rv, rv); - bool exists; - rv = mGetTableNameStatement->ExecuteStep(&exists); - NS_ENSURE_SUCCESS(rv, rv); - if (!exists) return NS_ERROR_FAILURE; - - return mGetTableNameStatement->GetUTF8String(0, tableName); -} - -nsresult -nsUrlClassifierDBServiceWorker::InflateChunk(nsACString& chunk) -{ - nsCAutoString inflated; - char buf[4096]; - - const nsPromiseFlatCString& flat = PromiseFlatCString(chunk); - - z_stream stream; - memset(&stream, 0, sizeof(stream)); - stream.next_in = (Bytef*)flat.get(); - stream.avail_in = flat.Length(); - - if (inflateInit(&stream) != Z_OK) { - return NS_ERROR_FAILURE; - } - - int code; - do { - stream.next_out = (Bytef*)buf; - stream.avail_out = sizeof(buf); - - code = inflate(&stream, Z_NO_FLUSH); - PRUint32 numRead = sizeof(buf) - stream.avail_out; - - if (code == Z_OK || code == Z_STREAM_END) { - inflated.Append(buf, numRead); - } - } while (code == Z_OK); - - inflateEnd(&stream); - - if (code != Z_STREAM_END) { - return NS_ERROR_FAILURE; - } - - chunk = inflated; - - return NS_OK; -} - -nsresult -nsUrlClassifierStore::DeleteEntry(nsUrlClassifierEntry& entry) -{ - if (entry.mId == -1) { - return NS_OK; - } - - mozStorageStatementScoper scoper(mDeleteStatement); - mDeleteStatement->BindInt64ByIndex(0, entry.mId); - nsresult rv = mDeleteStatement->Execute(); - NS_ENSURE_SUCCESS(rv, rv); - - entry.mId = -1; - - return NS_OK; -} - -nsresult -nsUrlClassifierStore::WriteEntry(nsUrlClassifierEntry& entry) -{ - if (entry.mId != -1) { - // existing entry, just ignore it - return NS_OK; - } - - mozStorageStatementScoper scoper(mInsertStatement); - - nsresult rv = BindStatement(entry, mInsertStatement); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mInsertStatement->Execute(); - NS_ENSURE_SUCCESS(rv, rv); - - PRInt64 rowId; - rv = mConnection->GetLastInsertRowID(&rowId); - NS_ENSURE_SUCCESS(rv, rv); - - if (rowId > PR_UINT32_MAX) { - return NS_ERROR_FAILURE; - } - - entry.mId = rowId; - - return NS_OK; -} - -nsresult -nsUrlClassifierStore::UpdateEntry(nsUrlClassifierEntry& entry) -{ - mozStorageStatementScoper scoper(mUpdateStatement); - - NS_ENSURE_ARG(entry.mId != -1); - - nsresult rv = BindStatement(entry, mUpdateStatement); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mUpdateStatement->Execute(); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -static bool -IsCanonicalizedIP(const nsACString& host) -{ - // The canonicalization process will have left IP addresses in dotted - // decimal with no surprises. - PRUint32 i1, i2, i3, i4; - char c; - if (PR_sscanf(PromiseFlatCString(host).get(), "%u.%u.%u.%u%c", - &i1, &i2, &i3, &i4, &c) == 4) { - return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF); - } - - return false; -} - -static nsresult -GetKey(const nsACString& spec, - nsUrlClassifierDomainHash& hash, - nsICryptoHash * aCryptoHash) -{ - nsACString::const_iterator begin, end, iter; - spec.BeginReading(begin); - spec.EndReading(end); - - iter = begin; - if (!FindCharInReadable('/', iter, end)) { - return NS_OK; - } - - const nsCSubstring& host = Substring(begin, iter); - - if (IsCanonicalizedIP(host)) { - nsCAutoString key; - key.Assign(host); - key.Append("/"); - return hash.FromPlaintext(key, aCryptoHash); - } - - nsTArray hostComponents; - ParseString(PromiseFlatCString(host), '.', hostComponents); - - if (hostComponents.Length() < 2) - return NS_ERROR_FAILURE; - - PRInt32 last = PRInt32(hostComponents.Length()) - 1; - nsCAutoString lookupHost; - - if (hostComponents.Length() > 2) { - lookupHost.Append(hostComponents[last - 2]); - lookupHost.Append("."); - } - - lookupHost.Append(hostComponents[last - 1]); - lookupHost.Append("."); - lookupHost.Append(hostComponents[last]); - lookupHost.Append("/"); - - return hash.FromPlaintext(lookupHost, aCryptoHash); -} - -nsresult -nsUrlClassifierDBServiceWorker::GetShaEntries(PRUint32 tableId, - PRUint32 chunkType, - PRUint32 chunkNum, - PRUint32 domainSize, - PRUint32 fragmentSize, - nsACString& chunk, - nsTArray& entries) -{ - PRUint32 start = 0; - while (start + domainSize + 1 <= chunk.Length()) { - nsUrlClassifierDomainHash domain; - domain.Assign(Substring(chunk, start, DOMAIN_LENGTH)); - start += domainSize; - - // then there is a one-byte count of fragments - PRUint8 numEntries = static_cast(chunk[start]); - start++; - - if (numEntries == 0) { - // if there are no fragments, the domain itself is treated as a - // fragment. This will only work if domainHashSize == hashSize - if (domainSize != fragmentSize) { - NS_WARNING("Received 0-fragment entry where domainSize != fragmentSize"); - return NS_ERROR_FAILURE; - } - - nsUrlClassifierEntry* entry = entries.AppendElement(); - if (!entry) return NS_ERROR_OUT_OF_MEMORY; - - entry->mKey = domain; - entry->mTableId = tableId; - entry->mChunkId = chunkNum; - entry->SetHash(domain); - - if (chunkType == CHUNK_SUB) { - if (start + 4 > chunk.Length()) { - // there isn't as much data as there should be. - NS_WARNING("Received a zero-entry sub chunk without an associated add."); - return NS_ERROR_FAILURE; - } - const nsCSubstring& str = Substring(chunk, start, 4); - PRUint32 p; - memcpy(&p, str.BeginReading(), 4); - entry->mAddChunkId = PR_ntohl(p); - if (entry->mAddChunkId == 0) { - NS_WARNING("Received invalid chunk number."); - return NS_ERROR_FAILURE; - } - start += 4; - } - } else { - PRUint32 entrySize = fragmentSize; - if (chunkType == CHUNK_SUB) { - entrySize += 4; - } - if (start + (numEntries * entrySize) > chunk.Length()) { - // there isn't as much data as they said there would be. - NS_WARNING("Received a chunk without enough data"); - return NS_ERROR_FAILURE; - } - - for (PRUint8 i = 0; i < numEntries; i++) { - nsUrlClassifierEntry* entry = entries.AppendElement(); - if (!entry) return NS_ERROR_OUT_OF_MEMORY; - - entry->mKey = domain; - entry->mTableId = tableId; - entry->mChunkId = chunkNum; - - if (chunkType == CHUNK_SUB) { - const nsCSubstring& str = Substring(chunk, start, 4); - PRUint32 p; - memcpy(&p, str.BeginReading(), 4); - entry->mAddChunkId = PR_ntohl(p); - if (entry->mAddChunkId == 0) { - NS_WARNING("Received invalid chunk number."); - return NS_ERROR_FAILURE; - } - start += 4; - } - - if (fragmentSize == PARTIAL_LENGTH) { - nsUrlClassifierPartialHash hash; - hash.Assign(Substring(chunk, start, PARTIAL_LENGTH)); - entry->SetHash(hash); - } else if (fragmentSize == COMPLETE_LENGTH) { - nsUrlClassifierCompleteHash hash; - hash.Assign(Substring(chunk, start, COMPLETE_LENGTH)); - entry->SetHash(hash); - } else { - NS_ASSERTION(false, "Invalid fragment size!"); - return NS_ERROR_FAILURE; - } - - start += fragmentSize; - } - } - } - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::GetChunkEntries(const nsACString& table, - PRUint32 tableId, - PRUint32 chunkType, - PRUint32 chunkNum, - PRUint32 hashSize, - nsACString& chunk, - nsTArray& entries) -{ - nsresult rv; - if (StringEndsWith(table, NS_LITERAL_CSTRING("-exp"))) { - // regexp tables need to be ungzipped - rv = InflateChunk(chunk); - NS_ENSURE_SUCCESS(rv, rv); - } - - if (StringEndsWith(table, NS_LITERAL_CSTRING("-shavar"))) { - rv = GetShaEntries(tableId, chunkType, chunkNum, DOMAIN_LENGTH, hashSize, - chunk, entries); - NS_ENSURE_SUCCESS(rv, rv); - } else { - nsTArray lines; - ParseString(PromiseFlatCString(chunk), '\n', lines); - - // non-hashed tables need to be hashed - for (PRInt32 i = 0; i < PRInt32(lines.Length()); i++) { - nsUrlClassifierEntry *entry = entries.AppendElement(); - if (!entry) - return NS_ERROR_OUT_OF_MEMORY; - - nsCAutoString entryStr; - if (chunkType == CHUNK_SUB) { - nsCString::const_iterator begin, iter, end; - lines[i].BeginReading(begin); - lines[i].EndReading(end); - iter = begin; - if (!FindCharInReadable(':', iter, end) || - PR_sscanf(lines[i].get(), "%d:", &entry->mAddChunkId) != 1) { - NS_WARNING("Received sub chunk without associated add chunk."); - return NS_ERROR_FAILURE; - } - iter++; - entryStr = Substring(iter, end); - } else { - entryStr = lines[i]; - } - - rv = GetKey(entryStr, entry->mKey, mCryptoHash); - NS_ENSURE_SUCCESS(rv, rv); - - entry->mTableId = tableId; - entry->mChunkId = chunkNum; - if (hashSize == PARTIAL_LENGTH) { - nsUrlClassifierPartialHash hash; - hash.FromPlaintext(entryStr, mCryptoHash); - entry->SetHash(hash); - } else if (hashSize == COMPLETE_LENGTH) { - nsUrlClassifierCompleteHash hash; - hash.FromPlaintext(entryStr, mCryptoHash); - entry->SetHash(hash); - } else { - NS_ASSERTION(false, "Invalid fragment size!"); - return NS_ERROR_FAILURE; - } - } - } - - return NS_OK; -} - -bool -nsUrlClassifierDBServiceWorker::ParseChunkRange(nsACString::const_iterator &begin, - const nsACString::const_iterator &end, - PRUint32 *first, - PRUint32 *last) -{ - nsACString::const_iterator iter = begin; - FindCharInReadable(',', iter, end); - - nsCAutoString element(Substring(begin, iter)); - begin = iter; - if (begin != end) - begin++; - - PRUint32 numRead = PR_sscanf(element.get(), "%u-%u", first, last); - if (numRead == 2) { - if (*first > *last) { - PRUint32 tmp = *first; - *first = *last; - *last = tmp; - } - return true; - } - - if (numRead == 1) { - *last = *first; - return true; - } - - return false; -} - -nsresult -nsUrlClassifierDBServiceWorker::ParseChunkList(const nsACString& chunkStr, - nsTArray& chunks) -{ - LOG(("Parsing %s", PromiseFlatCString(chunkStr).get())); - - nsACString::const_iterator begin, end; - chunkStr.BeginReading(begin); - chunkStr.EndReading(end); - while (begin != end) { - PRUint32 first, last; - if (ParseChunkRange(begin, end, &first, &last)) { - for (PRUint32 num = first; num <= last; num++) { - chunks.AppendElement(num); - } - } - } - - LOG(("Got %d elements.", chunks.Length())); - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::JoinChunkList(nsTArray& chunks, - nsCString& chunkStr) -{ - chunkStr.Truncate(); - chunks.Sort(); - - PRUint32 i = 0; - while (i < chunks.Length()) { - if (i != 0) { - chunkStr.Append(','); - } - chunkStr.AppendInt(chunks[i]); - - PRUint32 first = i; - PRUint32 last = first; - i++; - while (i < chunks.Length() && (chunks[i] == chunks[i - 1] + 1 || chunks[i] == chunks[i - 1])) { - last = i++; - } - - if (last != first) { - chunkStr.Append('-'); - chunkStr.AppendInt(chunks[last]); - } - } - - return NS_OK; -} - - -nsresult -nsUrlClassifierDBServiceWorker::GetChunkLists(PRUint32 tableId, - nsACString& addChunks, - nsACString& subChunks) -{ - addChunks.Truncate(); - subChunks.Truncate(); - - mozStorageStatementScoper scoper(mGetChunkListsStatement); - - nsresult rv = mGetChunkListsStatement->BindInt32ByIndex(0, tableId); - NS_ENSURE_SUCCESS(rv, rv); - - bool hasMore = false; - rv = mGetChunkListsStatement->ExecuteStep(&hasMore); - NS_ENSURE_SUCCESS(rv, rv); - - if (!hasMore) { - LOG(("Getting chunks for %d, found nothing", tableId)); - return NS_OK; - } - - rv = mGetChunkListsStatement->GetUTF8String(0, addChunks); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mGetChunkListsStatement->GetUTF8String(1, subChunks); - NS_ENSURE_SUCCESS(rv, rv); - - LOG(("Getting chunks for %d, got %s/%s", - tableId, - PromiseFlatCString(addChunks).get(), - PromiseFlatCString(subChunks).get())); - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::SetChunkLists(PRUint32 tableId, - const nsACString& addChunks, - const nsACString& subChunks) -{ - mozStorageStatementScoper scoper(mSetChunkListsStatement); - - mSetChunkListsStatement->BindUTF8StringByIndex(0, addChunks); - mSetChunkListsStatement->BindUTF8StringByIndex(1, subChunks); - mSetChunkListsStatement->BindInt32ByIndex(2, tableId); - nsresult rv = mSetChunkListsStatement->Execute(); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::CacheChunkLists(PRUint32 tableId, - bool parseAdds, - bool parseSubs) -{ - nsresult rv; - - if (mHaveCachedLists && mCachedListsTable != tableId) { - rv = FlushChunkLists(); - NS_ENSURE_SUCCESS(rv, rv); - } - - if (!mHaveCachedLists) { - rv = GetChunkLists(tableId, mCachedAddsStr, mCachedSubsStr); - NS_ENSURE_SUCCESS(rv, rv); - - mHaveCachedLists = true; - mCachedListsTable = tableId; - } - - if (parseAdds && !mHaveCachedAddChunks) { - ParseChunkList(mCachedAddsStr, mCachedAddChunks); - mHaveCachedAddChunks = true; - } - - if (parseSubs && !mHaveCachedSubChunks) { - ParseChunkList(mCachedSubsStr, mCachedSubChunks); - mHaveCachedSubChunks = true; - } - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::FlushChunkLists() -{ - if (!mHaveCachedLists) { - return NS_OK; - } - - if (mHaveCachedAddChunks) { - JoinChunkList(mCachedAddChunks, mCachedAddsStr); - } - - if (mHaveCachedSubChunks) { - JoinChunkList(mCachedSubChunks, mCachedSubsStr); - } - - nsresult rv = SetChunkLists(mCachedListsTable, - mCachedAddsStr, mCachedSubsStr); - - // clear out the cache before checking/returning the error here. - ClearCachedChunkLists(); - - return rv; -} - -void -nsUrlClassifierDBServiceWorker::ClearCachedChunkLists() -{ - mCachedAddsStr.Truncate(); - mCachedSubsStr.Truncate(); - mCachedListsTable = PR_UINT32_MAX; - mHaveCachedLists = false; - - mCachedAddChunks.Clear(); - mHaveCachedAddChunks = false; - - mCachedSubChunks.Clear(); - mHaveCachedSubChunks = false; -} - -bool -nsUrlClassifierDBServiceWorker::InsertChunkId(nsTArray &chunks, - PRUint32 chunkNum) -{ - PRUint32 low = 0, high = chunks.Length(); - while (high > low) { - PRUint32 mid = (high + low) >> 1; - if (chunks[mid] == chunkNum) - return false; - if (chunks[mid] < chunkNum) - low = mid + 1; - else - high = mid; - } - - PRUint32 *item = chunks.InsertElementAt(low, chunkNum); - return (item != nsnull); -} - -nsresult -nsUrlClassifierDBServiceWorker::AddChunk(PRUint32 tableId, - PRUint32 chunkNum, - nsTArray& entries) -{ -#if defined(PR_LOGGING) - PRIntervalTime clockStart = 0; - if (LOG_ENABLED()) { - clockStart = PR_IntervalNow(); - } -#endif - - nsresult rv = CacheChunkLists(tableId, true, false); - NS_ENSURE_SUCCESS(rv, rv); - - if (!InsertChunkId(mCachedAddChunks, chunkNum)) { - LOG(("Ignoring duplicate add chunk %d in table %d", chunkNum, tableId)); - return NS_OK; - } - - LOG(("Adding %d entries to chunk %d in table %d", entries.Length(), chunkNum, tableId)); - - nsTArray entryIDs; - - nsAutoTArray subEntries; - rv = mPendingSubStore.ReadSubEntries(tableId, chunkNum, subEntries); - NS_ENSURE_SUCCESS(rv, rv); - - for (PRUint32 i = 0; i < entries.Length(); i++) { - nsUrlClassifierEntry& thisEntry = entries[i]; - - HandlePendingLookups(); - - bool writeEntry = true; - for (PRUint32 j = 0; j < subEntries.Length(); j++) { - if (thisEntry.SubMatch(subEntries[j])) { - subEntries.RemoveElementAt(j); - - writeEntry = false; - break; - } - } - - HandlePendingLookups(); - - if (writeEntry) { - rv = mMainStore.WriteEntry(thisEntry); - NS_ENSURE_SUCCESS(rv, rv); - } - } - - rv = mPendingSubStore.ExpireAddChunk(tableId, chunkNum); - NS_ENSURE_SUCCESS(rv, rv); - -#if defined(PR_LOGGING) - if (LOG_ENABLED()) { - PRIntervalTime clockEnd = PR_IntervalNow(); - LOG(("adding chunk %d took %dms\n", chunkNum, - PR_IntervalToMilliseconds(clockEnd - clockStart))); - } -#endif - - return rv; -} - -nsresult -nsUrlClassifierStore::Expire(PRUint32 tableId, PRUint32 chunkNum) -{ - LOG(("Expiring chunk %d\n", chunkNum)); - - mozStorageStatementScoper expireScoper(mExpireStatement); - - nsresult rv = mExpireStatement->BindInt32ByIndex(0, tableId); - NS_ENSURE_SUCCESS(rv, rv); - rv = mExpireStatement->BindInt32ByIndex(1, chunkNum); - NS_ENSURE_SUCCESS(rv, rv); - - mWorker->HandlePendingLookups(); - - rv = mExpireStatement->Execute(); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::ExpireAdd(PRUint32 tableId, - PRUint32 chunkNum) -{ - nsresult rv = CacheChunkLists(tableId, true, false); - NS_ENSURE_SUCCESS(rv, rv); - mCachedAddChunks.RemoveElement(chunkNum); - - return mMainStore.Expire(tableId, chunkNum); -} - -nsresult -nsUrlClassifierDBServiceWorker::SubChunk(PRUint32 tableId, - PRUint32 chunkNum, - nsTArray& entries) -{ - nsresult rv = CacheChunkLists(tableId, true, true); - - if (!InsertChunkId(mCachedSubChunks, chunkNum)) { - LOG(("Ignoring duplicate sub chunk %d in table %d", chunkNum, tableId)); - return NS_OK; - } - - LOG(("Subbing %d entries in chunk %d in table %d", entries.Length(), chunkNum, tableId)); - - for (PRUint32 i = 0; i < entries.Length(); i++) { - nsAutoTArray existingEntries; - nsUrlClassifierEntry& thisEntry = entries[i]; - - HandlePendingLookups(); - - // Check if we have the add chunk associated with the sub. - bool haveAdds = (mCachedAddChunks.BinaryIndexOf(thisEntry.mAddChunkId) != - mCachedAddChunks.NoIndex); - - if (haveAdds) { - rv = mMainStore.ReadAddEntries(thisEntry.mKey, thisEntry.mTableId, - thisEntry.mAddChunkId, existingEntries); - NS_ENSURE_SUCCESS(rv, rv); - } - - for (PRUint32 j = 0; j < existingEntries.Length(); j++) { - if (existingEntries[j].SubMatch(thisEntry)) { - rv = mMainStore.DeleteEntry(existingEntries[j]); - NS_ENSURE_SUCCESS(rv, rv); - existingEntries.RemoveElementAt(j); - break; - } - } - - if (!haveAdds) { - // Save this entry in the pending subtraction store. - rv = mPendingSubStore.WriteEntry(thisEntry); - NS_ENSURE_SUCCESS(rv, rv); - } - } - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::ExpireSub(PRUint32 tableId, PRUint32 chunkNum) -{ - nsresult rv = CacheChunkLists(tableId, false, true); - NS_ENSURE_SUCCESS(rv, rv); - mCachedSubChunks.RemoveElement(chunkNum); - - return mPendingSubStore.Expire(tableId, chunkNum); -} - -nsresult -nsUrlClassifierDBServiceWorker::ProcessChunk(bool* done) -{ - // wait until the chunk has been read - if (mPendingStreamUpdate.Length() < static_cast(mChunkLen)) { - *done = true; - return NS_OK; - } - - nsCAutoString chunk; - chunk.Assign(Substring(mPendingStreamUpdate, 0, mChunkLen)); - mPendingStreamUpdate = Substring(mPendingStreamUpdate, mChunkLen); - - LOG(("Handling a chunk sized %d", chunk.Length())); - - nsTArray entries; - nsresult rv = GetChunkEntries(mUpdateTable, mUpdateTableId, mChunkType, - mChunkNum, mHashSize, chunk, entries); - NS_ENSURE_SUCCESS(rv, rv); - - if (mChunkType == CHUNK_ADD) { - rv = AddChunk(mUpdateTableId, mChunkNum, entries); - } else { - rv = SubChunk(mUpdateTableId, mChunkNum, entries); - } - - mState = STATE_LINE; - *done = false; - - return rv; -} - -nsresult -nsUrlClassifierDBServiceWorker::ProcessResponseLines(bool* done) -{ - PRUint32 cur = 0; - PRInt32 next; - - nsresult rv; - // We will run to completion unless we find a chunk line - *done = true; - - nsACString& updateString = mPendingStreamUpdate; - - while(cur < updateString.Length() && - (next = updateString.FindChar('\n', cur)) != kNotFound) { - const nsCSubstring& line = Substring(updateString, cur, next - cur); - cur = next + 1; - - LOG(("Processing %s\n", PromiseFlatCString(line).get())); - - if (mHMAC && mServerMAC.IsEmpty()) { - // If we did not receive a server MAC during BeginStream(), we - // require the first line of the update to be either a MAC or - // a request to rekey. - if (StringBeginsWith(line, NS_LITERAL_CSTRING("m:"))) { - mServerMAC = Substring(line, 2); - nsUrlClassifierUtils::UnUrlsafeBase64(mServerMAC); - - // The remainder of the pending update needs to be digested. - const nsCSubstring &toDigest = Substring(updateString, cur); - rv = mHMAC->Update(reinterpret_cast(toDigest.BeginReading()), - toDigest.Length()); - NS_ENSURE_SUCCESS(rv, rv); - } else if (line.EqualsLiteral("e:pleaserekey")) { - mUpdateObserver->RekeyRequested(); - } else { - LOG(("No MAC specified!")); - return NS_ERROR_FAILURE; - } - } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("n:"))) { - if (PR_sscanf(PromiseFlatCString(line).get(), "n:%d", - &mUpdateWait) != 1) { - LOG(("Error parsing n: field: %s", PromiseFlatCString(line).get())); - mUpdateWait = 0; - } - } else if (line.EqualsLiteral("r:pleasereset")) { - mResetRequested = true; - } else if (line.EqualsLiteral("e:pleaserekey")) { - mUpdateObserver->RekeyRequested(); - } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("i:"))) { - mUpdateTable.Assign(Substring(line, 2)); - GetTableId(mUpdateTable, &mUpdateTableId); - LOG(("update table: '%s' (%d)", mUpdateTable.get(), mUpdateTableId)); - } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("u:"))) { - if (!mPrimaryStream) { - LOG(("Forwarded update tried to add its own forwarded update.")); - return NS_ERROR_FAILURE; - } - - const nsCSubstring& data = Substring(line, 2); - if (mHMAC) { - // We're expecting MACs alongside any url forwards. - nsCSubstring::const_iterator begin, end, sepBegin, sepEnd; - data.BeginReading(begin); - sepBegin = begin; - - data.EndReading(end); - sepEnd = end; - - if (!RFindInReadable(NS_LITERAL_CSTRING(","), sepBegin, sepEnd)) { - NS_WARNING("No MAC specified for a redirect in a request that expects a MAC"); - return NS_ERROR_FAILURE; - } - - nsCString serverMAC(Substring(sepEnd, end)); - nsUrlClassifierUtils::UnUrlsafeBase64(serverMAC); - mUpdateObserver->UpdateUrlRequested(Substring(begin, sepBegin), - mUpdateTable, - serverMAC); - } else { - // We didn't ask for a MAC, none should have been specified. - mUpdateObserver->UpdateUrlRequested(data, mUpdateTable, - NS_LITERAL_CSTRING("")); - } - } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("a:")) || - StringBeginsWith(line, NS_LITERAL_CSTRING("s:"))) { - mState = STATE_CHUNK; - char command; - if (PR_sscanf(PromiseFlatCString(line).get(), - "%c:%d:%d:%d", &command, &mChunkNum, &mHashSize, &mChunkLen) != 4) { - return NS_ERROR_FAILURE; - } - - if (mChunkLen > MAX_CHUNK_SIZE) { - return NS_ERROR_FAILURE; - } - - if (!(mHashSize == PARTIAL_LENGTH || mHashSize == COMPLETE_LENGTH)) { - NS_WARNING("Invalid hash size specified in update."); - return NS_ERROR_FAILURE; - } - - mChunkType = (command == 'a') ? CHUNK_ADD : CHUNK_SUB; - - // Done parsing lines, move to chunk state now - *done = false; - break; - } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("ad:"))) { - const nsCSubstring &list = Substring(line, 3); - nsACString::const_iterator begin, end; - list.BeginReading(begin); - list.EndReading(end); - while (begin != end) { - PRUint32 first, last; - if (ParseChunkRange(begin, end, &first, &last)) { - for (PRUint32 num = first; num <= last; num++) { - rv = ExpireAdd(mUpdateTableId, num); - NS_ENSURE_SUCCESS(rv, rv); - } - } else { - return NS_ERROR_FAILURE; - } - } - } else if (StringBeginsWith(line, NS_LITERAL_CSTRING("sd:"))) { - const nsCSubstring &list = Substring(line, 3); - nsACString::const_iterator begin, end; - list.BeginReading(begin); - list.EndReading(end); - while (begin != end) { - PRUint32 first, last; - if (ParseChunkRange(begin, end, &first, &last)) { - for (PRUint32 num = first; num <= last; num++) { - rv = ExpireSub(mUpdateTableId, num); - NS_ENSURE_SUCCESS(rv, rv); - } - } else { - return NS_ERROR_FAILURE; - } - } - } else { - LOG(("ignoring unknown line: '%s'", PromiseFlatCString(line).get())); - } - } - - mPendingStreamUpdate = Substring(updateString, cur); - - return NS_OK; -} - void nsUrlClassifierDBServiceWorker::ResetStream() { - mState = STATE_LINE; - mChunkNum = 0; - mHashSize = 0; - mChunkLen = 0; + LOG(("ResetStream")); mInStream = false; - mPrimaryStream = false; - mUpdateTable.Truncate(); - mPendingStreamUpdate.Truncate(); - mServerMAC.Truncate(); - mHMAC = nsnull; + mProtocolParser = nsnull; } void nsUrlClassifierDBServiceWorker::ResetUpdate() { + LOG(("ResetUpdate")); mUpdateWait = 0; mUpdateStatus = NS_OK; mUpdateObserver = nsnull; mUpdateClientKey.Truncate(); - mResetRequested = false; - mUpdateTables.Clear(); } NS_IMETHODIMP @@ -2918,6 +466,8 @@ nsUrlClassifierDBServiceWorker::BeginUpdate(nsIUrlClassifierUpdateObserver *obse const nsACString &tables, const nsACString &clientKey) { + LOG(("nsUrlClassifierDBServiceWorker::BeginUpdate [%s]", PromiseFlatCString(tables).get())); + if (gShuttingDownThread) return NS_ERROR_NOT_INITIALIZED; @@ -2929,38 +479,16 @@ nsUrlClassifierDBServiceWorker::BeginUpdate(nsIUrlClassifierUpdateObserver *obse return NS_ERROR_FAILURE; } - bool transaction; - rv = mConnection->GetTransactionInProgress(&transaction); - if (NS_FAILED(rv)) { - mUpdateStatus = rv; - return rv; - } - - if (transaction) { - NS_WARNING("Transaction already in progress in nsUrlClassifierDBServiceWorker::BeginUpdate. Cancelling update."); - mUpdateStatus = NS_ERROR_FAILURE; - return rv; - } - - rv = SetupUpdate(); - if (NS_FAILED(rv)) { - mUpdateStatus = rv; - return rv; - } - + mUpdateStatus = NS_OK; mUpdateObserver = observer; + SplitTables(tables, mUpdateTables); if (!clientKey.IsEmpty()) { rv = nsUrlClassifierUtils::DecodeClientKey(clientKey, mUpdateClientKey); NS_ENSURE_SUCCESS(rv, rv); + LOG(("clientKey present, marking update key")); } - // The first stream in an update is the only stream that may request - // forwarded updates. - mPrimaryStream = true; - - SplitTables(tables, mUpdateTables); - return NS_OK; } @@ -2968,62 +496,37 @@ NS_IMETHODIMP nsUrlClassifierDBServiceWorker::BeginStream(const nsACString &table, const nsACString &serverMAC) { + LOG(("nsUrlClassifierDBServiceWorker::BeginStream")); + if (gShuttingDownThread) return NS_ERROR_NOT_INITIALIZED; NS_ENSURE_STATE(mUpdateObserver); NS_ENSURE_STATE(!mInStream); - // We may have committed the update in FinishStream, if so set it up - // again here. - nsresult rv = SetupUpdate(); - if (NS_FAILED(rv)) { - mUpdateStatus = rv; - return rv; - } - mInStream = true; + NS_ASSERTION(!mProtocolParser, "Should not have a protocol parser."); + + mProtocolParser = new ProtocolParser(mHashKey); + if (!mProtocolParser) + return NS_ERROR_OUT_OF_MEMORY; + + mProtocolParser->Init(mCryptoHash); + + nsresult rv; + // If we're expecting a MAC, create the nsICryptoHMAC component now. if (!mUpdateClientKey.IsEmpty()) { - nsCOMPtr keyObjectFactory(do_GetService( - "@mozilla.org/security/keyobjectfactory;1", &rv)); - if (NS_FAILED(rv)) { - NS_WARNING("Failed to get nsIKeyObjectFactory service"); - mUpdateStatus = rv; - return mUpdateStatus; - } - - nsCOMPtr keyObject; - rv = keyObjectFactory->KeyFromString(nsIKeyObject::HMAC, mUpdateClientKey, - getter_AddRefs(keyObject)); - if (NS_FAILED(rv)) { - NS_WARNING("Failed to create key object, maybe not FIPS compliant?"); - mUpdateStatus = rv; - return mUpdateStatus; - } - - mHMAC = do_CreateInstance(NS_CRYPTO_HMAC_CONTRACTID, &rv); - if (NS_FAILED(rv)) { - NS_WARNING("Failed to create nsICryptoHMAC instance"); - mUpdateStatus = rv; - return mUpdateStatus; - } - - rv = mHMAC->Init(nsICryptoHMAC::SHA1, keyObject); - if (NS_FAILED(rv)) { - NS_WARNING("Failed to initialize nsICryptoHMAC instance"); - mUpdateStatus = rv; - return mUpdateStatus; - } + LOG(("Expecting MAC in this stream")); + rv = mProtocolParser->InitHMAC(mUpdateClientKey, serverMAC); + NS_ENSURE_SUCCESS(rv, rv); + } else { + LOG(("No MAC in this stream")); } - mServerMAC = serverMAC; - if (!table.IsEmpty()) { - mUpdateTable = table; - GetTableId(mUpdateTable, &mUpdateTableId); - LOG(("update table: '%s' (%d)", mUpdateTable.get(), mUpdateTableId)); + mProtocolParser->SetCurrentTable(table); } return NS_OK; @@ -3070,45 +573,7 @@ nsUrlClassifierDBServiceWorker::UpdateStream(const nsACString& chunk) HandlePendingLookups(); - LOG(("Update from Stream.")); - nsresult rv = OpenDb(); - if (NS_FAILED(rv)) { - NS_ERROR("Unable to open database"); - return NS_ERROR_FAILURE; - } - - // if something has gone wrong during this update, just throw it away - if (NS_FAILED(mUpdateStatus)) { - return mUpdateStatus; - } - - if (mHMAC && !mServerMAC.IsEmpty()) { - rv = mHMAC->Update(reinterpret_cast(chunk.BeginReading()), - chunk.Length()); - if (NS_FAILED(rv)) { - mUpdateStatus = rv; - return mUpdateStatus; - } - } - - LOG(("Got %s\n", PromiseFlatCString(chunk).get())); - - mPendingStreamUpdate.Append(chunk); - - bool done = false; - while (!done) { - if (mState == STATE_CHUNK) { - rv = ProcessChunk(&done); - } else { - rv = ProcessResponseLines(&done); - } - if (NS_FAILED(rv)) { - mUpdateStatus = rv; - return rv; - } - } - - return NS_OK; + return mProtocolParser->AppendStream(chunk); } NS_IMETHODIMP @@ -3120,85 +585,71 @@ nsUrlClassifierDBServiceWorker::FinishStream() NS_ENSURE_STATE(mInStream); NS_ENSURE_STATE(mUpdateObserver); - PRInt32 nextStreamDelay = 0; + mInStream = false; - if (NS_SUCCEEDED(mUpdateStatus) && mHMAC) { - nsCAutoString clientMAC; - mHMAC->Finish(true, clientMAC); + mProtocolParser->FinishHMAC(); - if (clientMAC != mServerMAC) { - NS_WARNING("Invalid update MAC!"); - LOG(("Invalid update MAC: expected %s, got %s", - mServerMAC.get(), clientMAC.get())); - mUpdateStatus = NS_ERROR_FAILURE; + if (NS_SUCCEEDED(mProtocolParser->Status())) { + if (mProtocolParser->UpdateWait()) { + mUpdateWait = mProtocolParser->UpdateWait(); } - PRIntervalTime updateTime = PR_IntervalNow() - mUpdateStartTime; - if (PR_IntervalToSeconds(updateTime) >= - static_cast(gWorkingTimeThreshold)) { - // We've spent long enough working that we should commit what we - // have and hold off for a bit. - nsresult rv = ApplyUpdate(); - if (NS_FAILED(rv)) { - if (rv == NS_ERROR_FILE_CORRUPTED) { - ResetDatabase(); - } - return rv; - } - nextStreamDelay = gDelayTime * 1000; + // XXX: Only allow forwards from the initial update? + const nsTArray &forwards = + mProtocolParser->Forwards(); + for (uint32 i = 0; i < forwards.Length(); i++) { + const ProtocolParser::ForwardedUpdate &forward = forwards[i]; + mUpdateObserver->UpdateUrlRequested(forward.url, forward.table, forward.mac); + } + // Hold on to any TableUpdate objects that were created by the + // parser. + mTableUpdates.AppendElements(mProtocolParser->GetTableUpdates()); + mProtocolParser->ForgetTableUpdates(); + } else { + mUpdateStatus = mProtocolParser->Status(); + } + mUpdateObserver->StreamFinished(mProtocolParser->Status(), 0); + + // Only reset if MAC was OK + if (NS_SUCCEEDED(mUpdateStatus)) { + if (mProtocolParser->ResetRequested()) { + mClassifier->Reset(); } } + // Rekey will cause update to fail (can't check MACs) + if (mProtocolParser->RekeyRequested()) { + mUpdateObserver->RekeyRequested(); + } - mUpdateObserver->StreamFinished(mUpdateStatus, - static_cast(nextStreamDelay)); - - ResetStream(); - + mProtocolParser = nsnull; return NS_OK; } -nsresult -nsUrlClassifierDBServiceWorker::SetCacheSize( - mozIStorageConnection * aConnection, PRInt32 aCacheSize) +NS_IMETHODIMP +nsUrlClassifierDBServiceWorker::FinishUpdate() { - mozStorageStatementScoper scoper(mGetPageSizeStatement); - bool hasResult; - nsresult rv = mGetPageSizeStatement->ExecuteStep(&hasResult); - NS_ENSURE_SUCCESS(rv, rv); + if (gShuttingDownThread) + return NS_ERROR_NOT_INITIALIZED; + NS_ENSURE_STATE(mUpdateObserver); - NS_ASSERTION(hasResult, "Should always be able to get page size from sqlite"); - PRUint32 pageSize = mGetPageSizeStatement->AsInt32(0); - PRUint32 cachePages = aCacheSize / pageSize; - nsCAutoString cacheSizePragma(MOZ_STORAGE_UNIQUIFY_QUERY_STR - "PRAGMA cache_size="); - cacheSizePragma.AppendInt(cachePages); - rv = aConnection->ExecuteSimpleSQL(cacheSizePragma); - NS_ENSURE_SUCCESS(rv, rv); - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::SetupUpdate() -{ - LOG(("nsUrlClassifierDBServiceWorker::SetupUpdate")); - bool inProgress; - nsresult rv = mConnection->GetTransactionInProgress(&inProgress); - if (inProgress) { - return NS_OK; + if (NS_SUCCEEDED(mUpdateStatus)) { + mUpdateStatus = ApplyUpdate(); } - mUpdateStartTime = PR_IntervalNow(); + mMissCache.Clear(); - rv = mConnection->BeginTransaction(); - NS_ENSURE_SUCCESS(rv, rv); - - if (gUpdateCacheSize > 0) { - rv = SetCacheSize(mConnection, gUpdateCacheSize); - NS_ENSURE_SUCCESS(rv, rv); - if (gUpdateCacheSize != gLookupCacheSize) { - mGrewCache = true; - } + if (NS_SUCCEEDED(mUpdateStatus)) { + LOG(("Notifying success: %d", mUpdateWait)); + mUpdateObserver->UpdateSuccess(mUpdateWait); + } else { + LOG(("Notifying error: %d", mUpdateStatus)); + mUpdateObserver->UpdateError(mUpdateStatus); + /* + * mark the tables as spoiled, we don't want to block hosts + * longer than normal because our update failed + */ + mClassifier->MarkSpoiled(mUpdateTables); } + mUpdateObserver = nsnull; return NS_OK; } @@ -3206,116 +657,20 @@ nsUrlClassifierDBServiceWorker::SetupUpdate() nsresult nsUrlClassifierDBServiceWorker::ApplyUpdate() { - LOG(("nsUrlClassifierDBServiceWorker::ApplyUpdate")); - - if (mConnection) { - if (NS_FAILED(mUpdateStatus)) { - mConnection->RollbackTransaction(); - } else { - mUpdateStatus = FlushChunkLists(); - if (NS_SUCCEEDED(mUpdateStatus)) { - mUpdateStatus = mConnection->CommitTransaction(); - } - } - } - - if (NS_SUCCEEDED(mUpdateStatus)) { - // Reconstruct the prefix tree from the DB - nsresult rv = ConstructPrefixSet(); - NS_ENSURE_SUCCESS(rv, rv); - } - - if (mGrewCache) { - // During the update we increased the page cache to bigger than we - // want to keep around. At the moment, the only reliable way to make - // sure that the page cache is freed is to reopen the connection. - LOG(("GrewCache true, reopening DB")); - mGrewCache = false; - CloseDb(); - OpenDb(); - } - - mUpdateStartTime = 0; - - return NS_OK; -} - -NS_IMETHODIMP -nsUrlClassifierDBServiceWorker::FinishUpdate() -{ - LOG(("nsUrlClassifierDBServiceWorker::FinishUpdate()")); - if (gShuttingDownThread) - return NS_ERROR_NOT_INITIALIZED; - - NS_ENSURE_STATE(!mInStream); - NS_ENSURE_STATE(mUpdateObserver); - - // We need to get the error code before ApplyUpdate, because it might - // close/open the connection. - PRInt32 errcode = SQLITE_OK; - if (mConnection) - mConnection->GetLastError(&errcode); - - nsresult rv = ApplyUpdate(); - if (NS_FAILED(rv)) { - if (rv == NS_ERROR_FILE_CORRUPTED) { - ResetDatabase(); - } - return rv; - } - - if (NS_SUCCEEDED(mUpdateStatus)) { - mUpdateObserver->UpdateSuccess(mUpdateWait); - } else { - mUpdateObserver->UpdateError(mUpdateStatus); - } - - // It's important that we only reset the database on an update - // command if the update was successful, otherwise unauthenticated - // updates could cause a database reset. - bool resetDB = (NS_SUCCEEDED(mUpdateStatus) && mResetRequested) || - errcode == SQLITE_CORRUPT; - - if (!resetDB) { - if (NS_SUCCEEDED(mUpdateStatus)) { - PRInt64 now = (PR_Now() / PR_USEC_PER_SEC); - for (PRUint32 i = 0; i < mUpdateTables.Length(); i++) { - LOG(("Successfully updated %s", mUpdateTables[i].get())); - mTableFreshness.Put(mUpdateTables[i], now); - } - } else { - for (PRUint32 i = 0; i < mUpdateTables.Length(); i++) { - LOG(("Failed updating %s", mUpdateTables[i].get())); - mTableFreshness.Remove(mUpdateTables[i]); - } - } - } - - ResetUpdate(); - - if (resetDB) { - ResetDatabase(); - } - - return NS_OK; + LOG(("nsUrlClassifierDBServiceWorker::ApplyUpdate()")); + return mClassifier->ApplyUpdates(&mTableUpdates); } NS_IMETHODIMP nsUrlClassifierDBServiceWorker::ResetDatabase() { - LOG(("nsUrlClassifierDBServiceWorker::ResetDatabase [%p]", this)); - ClearCachedChunkLists(); - - mTableFreshness.Clear(); - - nsresult rv = CloseDb(); + nsresult rv = OpenDb(); NS_ENSURE_SUCCESS(rv, rv); - rv = mPrefixSet->SetPrefixes(nsnull, 0); - NS_ENSURE_SUCCESS(rv, rv); + mClassifier->Reset(); - mDBFile->Remove(false); - mPSFile->Remove(false); + rv = CloseDb(); + NS_ENSURE_SUCCESS(rv, rv); return NS_OK; } @@ -3323,22 +678,25 @@ nsUrlClassifierDBServiceWorker::ResetDatabase() NS_IMETHODIMP nsUrlClassifierDBServiceWorker::CancelUpdate() { - LOG(("CancelUpdate")); + LOG(("nsUrlClassifierDBServiceWorker::CancelUpdate")); if (mUpdateObserver) { + LOG(("UpdateObserver exists, cancelling")); + mUpdateStatus = NS_BINDING_ABORTED; - ClearCachedChunkLists(); - mConnection->RollbackTransaction(); mUpdateObserver->UpdateError(mUpdateStatus); - for (PRUint32 i = 0; i < mUpdateTables.Length(); i++) { - LOG(("Failed updating %s", mUpdateTables[i].get())); - mTableFreshness.Remove(mUpdateTables[i]); - } + /* + * mark the tables as spoiled, we don't want to block hosts + * longer than normal because our update failed + */ + mClassifier->MarkSpoiled(mUpdateTables); ResetStream(); ResetUpdate(); + } else { + LOG(("No UpdateObserver, nothing to cancel")); } return NS_OK; @@ -3351,48 +709,75 @@ nsUrlClassifierDBServiceWorker::CancelUpdate() NS_IMETHODIMP nsUrlClassifierDBServiceWorker::CloseDb() { - if (mConnection) { - mMainStore.Close(); - mPendingSubStore.Close(); - - mGetChunkListsStatement = nsnull; - mSetChunkListsStatement = nsnull; - - mGetTablesStatement = nsnull; - mGetTableIdStatement = nsnull; - mGetTableNameStatement = nsnull; - mInsertTableIdStatement = nsnull; - mGetPageSizeStatement = nsnull; - - mConnection = nsnull; - LOG(("urlclassifier db closed\n")); + if (mClassifier) { + mClassifier->Close(); + mClassifier = nsnull; } mCryptoHash = nsnull; + LOG(("urlclassifier db closed\n")); return NS_OK; } NS_IMETHODIMP -nsUrlClassifierDBServiceWorker::CacheCompletions(nsTArray *results) +nsUrlClassifierDBServiceWorker::CacheCompletions(CacheResultArray *results) { LOG(("nsUrlClassifierDBServiceWorker::CacheCompletions [%p]", this)); + if (!mClassifier) + return NS_OK; - nsAutoPtr > resultsPtr(results); + // Ownership is transferred in to us + nsAutoPtr resultsPtr(results); - // Start a new transaction. If a transaction is open for an update - // this will be a noop, and this cache will be included in the - // update's transaction. - mozStorageTransaction trans(mConnection, true); + nsAutoPtr pParse(new ProtocolParser(mHashKey)); + nsTArray updates; - for (PRUint32 i = 0; i < results->Length(); i++) { - nsUrlClassifierLookupResult& result = results->ElementAt(i); - // Failing to update here shouldn't be fatal (and might be common, - // if we're updating entries that were removed since they were - // returned after a lookup). - mMainStore.UpdateEntry(result.mEntry); - } + // Only cache results for tables that we have, don't take + // in tables we might accidentally have hit during a completion. + // This happens due to goog vs googpub lists existing. + nsTArray tables; + nsresult rv = mClassifier->ActiveTables(tables); + NS_ENSURE_SUCCESS(rv, rv); + for (PRUint32 i = 0; i < resultsPtr->Length(); i++) { + bool activeTable = false; + for (PRUint32 table = 0; table < tables.Length(); table++) { + if (tables[table].Equals(resultsPtr->ElementAt(i).table)) { + activeTable = true; + } + } + if (activeTable) { + TableUpdate * tu = pParse->GetTableUpdate(resultsPtr->ElementAt(i).table); + LOG(("CacheCompletion Addchunk %d hash %X", resultsPtr->ElementAt(i).entry.addChunk, + resultsPtr->ElementAt(i).entry.hash.prefix)); + tu->NewAddComplete(resultsPtr->ElementAt(i).entry.addChunk, + resultsPtr->ElementAt(i).entry.hash.complete); + tu->NewAddChunk(resultsPtr->ElementAt(i).entry.addChunk); + tu->SetLocalUpdate(); + updates.AppendElement(tu); + pParse->ForgetTableUpdates(); + } else { + LOG(("Completion received, but table is not active, so not caching.")); + } + } + + mClassifier->ApplyUpdates(&updates); + return NS_OK; +} + +NS_IMETHODIMP +nsUrlClassifierDBServiceWorker::CacheMisses(PrefixArray *results) +{ + LOG(("nsUrlClassifierDBServiceWorker::CacheMisses [%p] %d", + this, results->Length())); + + // Ownership is transferred in to us + nsAutoPtr resultsPtr(results); + + for (PRUint32 i = 0; i < resultsPtr->Length(); i++) { + mMissCache.AppendElement(resultsPtr->ElementAt(i)); + } return NS_OK; } @@ -3400,462 +785,33 @@ nsresult nsUrlClassifierDBServiceWorker::OpenDb() { // Connection already open, don't do anything. - if (mConnection) { + if (mClassifier) { return NS_OK; } - LOG(("Opening db\n")); + LOG(("Opening db")); - nsresult rv; - // open the connection - nsCOMPtr storageService = - do_GetService(MOZ_STORAGE_SERVICE_CONTRACTID, &rv); - NS_ENSURE_SUCCESS(rv, rv); - - bool exists; - rv = mDBFile->Exists(&exists); - NS_ENSURE_SUCCESS(rv, rv); - bool newDB = !exists; - - nsCOMPtr connection; - rv = storageService->OpenDatabase(mDBFile, getter_AddRefs(connection)); - if (rv == NS_ERROR_FILE_CORRUPTED) { - // delete the db and try opening again - rv = mDBFile->Remove(false); - NS_ENSURE_SUCCESS(rv, rv); - - newDB = true; - - rv = storageService->OpenDatabase(mDBFile, getter_AddRefs(connection)); - } - NS_ENSURE_SUCCESS(rv, rv); - - if (!newDB) { - PRInt32 databaseVersion; - rv = connection->GetSchemaVersion(&databaseVersion); - NS_ENSURE_SUCCESS(rv, rv); - - if (databaseVersion != IMPLEMENTATION_VERSION) { - LOG(("Incompatible database, removing.")); - - rv = connection->Close(); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mDBFile->Remove(false); - NS_ENSURE_SUCCESS(rv, rv); - - newDB = true; - - rv = storageService->OpenDatabase(mDBFile, getter_AddRefs(connection)); - NS_ENSURE_SUCCESS(rv, rv); - } + nsAutoPtr classifier(new Classifier()); + if (!classifier) { + return NS_ERROR_OUT_OF_MEMORY; } - connection->SetGrowthIncrement(5 * 1024 * 1024, EmptyCString()); - rv = connection->ExecuteSimpleSQL(NS_LITERAL_CSTRING("PRAGMA synchronous=OFF")); - NS_ENSURE_SUCCESS(rv, rv); + classifier->SetFreshTime(gFreshnessGuarantee); - rv = connection->CreateStatement - (NS_LITERAL_CSTRING(MOZ_STORAGE_UNIQUIFY_QUERY_STR "PRAGMA page_size"), - getter_AddRefs(mGetPageSizeStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = SetCacheSize(connection, gLookupCacheSize); - NS_ENSURE_SUCCESS(rv, rv); - - if (newDB) { - rv = connection->SetSchemaVersion(IMPLEMENTATION_VERSION); - NS_ENSURE_SUCCESS(rv, rv); + nsresult rv = classifier->Open(*mCacheDir); + if (NS_FAILED(rv)) { + NS_WARNING("Failed to open URL classifier."); } - // Create the table - rv = MaybeCreateTables(connection); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mMainStore.Init(this, connection, - NS_LITERAL_CSTRING("moz_classifier")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = mPendingSubStore.Init(this, connection, - NS_LITERAL_CSTRING("moz_subs")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->CreateStatement - (NS_LITERAL_CSTRING("SELECT add_chunks, sub_chunks FROM moz_tables" - " WHERE id=?1"), - getter_AddRefs(mGetChunkListsStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->CreateStatement - (NS_LITERAL_CSTRING("UPDATE moz_tables" - " SET add_chunks=?1, sub_chunks=?2" - " WHERE id=?3"), - getter_AddRefs(mSetChunkListsStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->CreateStatement - (NS_LITERAL_CSTRING("SELECT name, add_chunks, sub_chunks" - " FROM moz_tables"), - getter_AddRefs(mGetTablesStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->CreateStatement - (NS_LITERAL_CSTRING("SELECT id FROM moz_tables" - " WHERE name = ?1"), - getter_AddRefs(mGetTableIdStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->CreateStatement - (NS_LITERAL_CSTRING("SELECT name FROM moz_tables" - " WHERE id = ?1"), - getter_AddRefs(mGetTableNameStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->CreateStatement - (NS_LITERAL_CSTRING("INSERT INTO moz_tables(id, name, add_chunks, sub_chunks)" - " VALUES (null, ?1, null, null)"), - getter_AddRefs(mInsertTableIdStatement)); - NS_ENSURE_SUCCESS(rv, rv); - - mConnection = connection; + mHashKey = classifier->GetHashKey(); + mClassifier = classifier; mCryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); NS_ENSURE_SUCCESS(rv, rv); - LOG(("loading Prefix Set\n")); - rv = LoadPrefixSet(mPSFile); - if (NS_FAILED(rv)) { - if (rv == NS_ERROR_FILE_CORRUPTED) { - ResetDatabase(); - } - return rv; - } - return NS_OK; } -// We have both a prefix and a domain. Drop the domain, but -// hash the domain, the prefix and a random value together, -// ensuring any collisions happens at a different points for -// different users. -// We need to calculate +- 500k hashes each update. -// The extensive initialization and finalization of normal -// cryptographic hashes, as well as fairly low speed, causes them -// to be prohibitively slow here, hence we can't use them. -// We use MurmurHash3 instead because it's reasonably well -// researched, trusted inside some other big projects, extremely -// fast and with a specific a 32-bit output version, and fairly -// compact. Upon testing with the actual prefix data, it does -// not appear to increase the number of collisions by any -// meaningful amount. -static nsresult KeyedHash(PRUint32 aPref, PRUint32 aDomain, - PRUint32 aKey, PRUint32 *aOut) -{ - // This is a reimplementation of MurmurHash3 32-bit - // based on the public domain C++ sources. - // http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp - // for nblocks = 2 - PRUint32 c1 = 0xCC9E2D51; - PRUint32 c2 = 0x1B873593; - PRUint32 c3 = 0xE6546B64; - PRUint32 c4 = 0x85EBCA6B; - PRUint32 c5 = 0xC2B2AE35; - PRUint32 h1 = aPref; // seed - PRUint32 k1; - PRUint32 karr[2]; - - karr[0] = aDomain; - karr[1] = aKey; - - for (PRUint32 i = 0; i < 2; i++) { - k1 = karr[i]; - k1 *= c1; - k1 = (k1 << 15) | (k1 >> (32-15)); - k1 *= c2; - - h1 ^= k1; - h1 = (h1 << 13) | (h1 >> (32-13)); - h1 *= 5; - h1 += c3; - } - - h1 ^= 2; // len - // fmix - h1 ^= h1 >> 16; - h1 *= c4; - h1 ^= h1 >> 13; - h1 *= c5; - h1 ^= h1 >> 16; - - *aOut = h1; - - return NS_OK; -} - -nsresult nsUrlClassifierStore::ReadPrefixes(FallibleTArray& array, - PRUint32 aKey) -{ - mozStorageStatementScoper scoper(mAllPrefixGetStatement); - mozStorageStatementScoper scoperToo(mAllPrefixCountStatement); - bool hasMoreData; - PRUint32 pcnt = 0; - PRUint32 fcnt = 0; - -#if defined(PR_LOGGING) - PRIntervalTime clockStart = 0; - if (LOG_ENABLED()) { - clockStart = PR_IntervalNow(); - } -#endif - - // Make sure we allocate no more than we really need, so first - // check how much entries there are - if (NS_SUCCEEDED(mAllPrefixCountStatement->ExecuteStep(&hasMoreData)) && hasMoreData) { - PRUint32 count = mAllPrefixCountStatement->AsInt32(0); - if (!array.SetCapacity(count)) { - return NS_ERROR_OUT_OF_MEMORY; - } - } else { - return NS_ERROR_FILE_CORRUPTED; - } - - while (NS_SUCCEEDED(mAllPrefixGetStatement->ExecuteStep(&hasMoreData)) && hasMoreData) { - PRUint32 prefixval; - PRUint32 domainval; - PRUint32 size; - - const PRUint8 *blobdomain = mAllPrefixGetStatement->AsSharedBlob(0, &size); - if (!blobdomain || (size != DOMAIN_LENGTH)) - return false; - - domainval = *(reinterpret_cast(blobdomain)); - - const PRUint8 *blobprefix = mAllPrefixGetStatement->AsSharedBlob(1, &size); - if (!blobprefix || (size != PARTIAL_LENGTH)) { - const PRUint8 *blobfull = mAllPrefixGetStatement->AsSharedBlob(2, &size); - if (!blobfull || (size != COMPLETE_LENGTH)) { - prefixval = domainval; - fcnt++; - } else { - prefixval = *(reinterpret_cast(blobfull)); - } - } else { - prefixval = *(reinterpret_cast(blobprefix)); - } - - PRUint32 keyedVal; - nsresult rv = KeyedHash(prefixval, domainval, aKey, &keyedVal); - NS_ENSURE_SUCCESS(rv, rv); - - PRUint32 *res = array.AppendElement(keyedVal); - MOZ_ASSERT(res != nsnull); - pcnt++; - // Normal DB size is about 500k entries. If we are getting 10x - // as much, the database must be corrupted. - if (pcnt > 5000000) { - return NS_ERROR_FILE_CORRUPTED; - } - } - - LOG(("SB prefixes: %d fulldomain: %d\n", pcnt, fcnt)); - -#if defined(PR_LOGGING) - if (LOG_ENABLED()) { - PRIntervalTime clockEnd = PR_IntervalNow(); - LOG(("Gathering took %dms\n", - PR_IntervalToMilliseconds(clockEnd - clockStart))); - } -#endif - - return NS_OK; -} - -bool nsUrlClassifierDBServiceWorker::LockPrefixSet() -{ - mPrefixSetEnabledLock.Lock(); - return mPrefixSetEnabled; -} - -void nsUrlClassifierDBServiceWorker::UnlockPrefixSet() -{ - mPrefixSetEnabledLock.Unlock(); -} - -nsresult -nsUrlClassifierDBServiceWorker::ConstructPrefixSet() -{ - Telemetry::AutoTimer timer; - - PRUint32 key; - nsresult rv = mPrefixSet->GetKey(&key); - NS_ENSURE_SUCCESS(rv, rv); - - FallibleTArray array; - rv = mMainStore.ReadPrefixes(array, key); - if (NS_FAILED(rv)) { - goto error_bailout; - } - -#ifdef HASHFUNCTION_COLLISION_TEST - array.Sort(); - PRUint32 collisions = 0; - for (int i = 1; i < array.Length(); i++) { - if (array[i - 1] == array[i]) { - collisions++; - } - } - LOG(("%d collisions in the set", collisions)); -#endif - - if (array.IsEmpty()) { - // DB is empty, put a sentinel to show that we loaded it - if (!array.AppendElement(0)) { - goto error_bailout; - } - } - // SetPrefixes requires sorted arrays - array.Sort(); - - // construct new prefixset - rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length()); - if (NS_FAILED(rv)) { - goto error_bailout; - } - - // store the new tree to disk - rv = mPrefixSet->StoreToFile(mPSFile); - NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "failed to store the prefixset"); - - // re-enable prefixset usage if disabled earlier - mPrefixSetEnabled = true; - - return NS_OK; - - error_bailout: - // disable prefixset usage - MutexAutoLock lock(mPrefixSetEnabledLock); - mPrefixSetEnabled = false; - // load an empty prefixset - nsAutoTArray sentinel; - sentinel.Clear(); - sentinel.AppendElement(0); - mPrefixSet->SetPrefixes(sentinel.Elements(), sentinel.Length()); - if (rv == NS_ERROR_OUT_OF_MEMORY) { - Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PS_OOM, 1); - } - return rv; -} - -nsresult -nsUrlClassifierDBServiceWorker::LoadPrefixSet(nsCOMPtr & aFile) -{ - bool empty; - nsresult rv = mPrefixSet->IsEmpty(&empty); - NS_ENSURE_SUCCESS(rv, rv); - - if (!empty) { - LOG(("PrefixSet already loaded, not loading again")); - return NS_OK; - } - - bool exists; - rv = aFile->Exists(&exists); - NS_ENSURE_SUCCESS(rv, rv); - -#if defined(PR_LOGGING) - PRIntervalTime clockStart = 0; - if (LOG_ENABLED()) { - clockStart = PR_IntervalNow(); - } -#endif - - if (exists) { - Telemetry::AutoTimer timer; - LOG(("stored PrefixSet exists, loading from disk")); - rv = mPrefixSet->LoadFromFile(aFile); - } - if (!exists || NS_FAILED(rv)) { - LOG(("no (usable) stored PrefixSet found, constructing from store")); - rv = ConstructPrefixSet(); - NS_ENSURE_SUCCESS(rv, rv); - } - -#ifdef DEBUG - LOG(("SB tree done, size = %d bytes\n", - mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of))); -#endif -#if defined(PR_LOGGING) - if (LOG_ENABLED()) { - PRIntervalTime clockEnd = PR_IntervalNow(); - LOG(("Loading took %dms\n", - PR_IntervalToMilliseconds(clockEnd - clockStart))); - } -#endif - - return NS_OK; -} - -nsresult -nsUrlClassifierDBServiceWorker::MaybeCreateTables(mozIStorageConnection* connection) -{ - LOG(("MaybeCreateTables\n")); - - nsresult rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_classifier" - " (id INTEGER PRIMARY KEY," - " domain BLOB," - " partial_data BLOB," - " complete_data BLOB," - " chunk_id INTEGER," - " table_id INTEGER)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" - " moz_classifier_domain_index" - " ON moz_classifier(domain)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" - " moz_classifier_chunk_index" - " ON moz_classifier(chunk_id)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_subs" - " (id INTEGER PRIMARY KEY," - " domain BLOB," - " partial_data BLOB," - " complete_data BLOB," - " chunk_id INTEGER," - " table_id INTEGER," - " add_chunk_id INTEGER)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" - " moz_subs_addchunk_index" - " ON moz_subs(add_chunk_id)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE INDEX IF NOT EXISTS" - " moz_subs_chunk_index" - " ON moz_subs(chunk_id)")); - NS_ENSURE_SUCCESS(rv, rv); - - rv = connection->ExecuteSimpleSQL( - NS_LITERAL_CSTRING("CREATE TABLE IF NOT EXISTS moz_tables" - " (id INTEGER PRIMARY KEY," - " name TEXT," - " add_chunks TEXT," - " sub_chunks TEXT);")); - NS_ENSURE_SUCCESS(rv, rv); - - return rv; -} - // ------------------------------------------------------------------------- // nsUrlClassifierLookupCallback // @@ -3879,14 +835,16 @@ public: , mCallback(c) {} + ~nsUrlClassifierLookupCallback(); + private: nsresult HandleResults(); nsRefPtr mDBService; - nsAutoPtr > mResults; + nsAutoPtr mResults; // Completed results to send back to the worker for caching. - nsAutoPtr > mCacheResults; + nsAutoPtr mCacheResults; PRUint32 mPendingCompletions; nsCOMPtr mCallback; @@ -3896,8 +854,18 @@ NS_IMPL_THREADSAFE_ISUPPORTS2(nsUrlClassifierLookupCallback, nsIUrlClassifierLookupCallback, nsIUrlClassifierHashCompleterCallback) +nsUrlClassifierLookupCallback::~nsUrlClassifierLookupCallback() +{ + nsCOMPtr thread; + (void)NS_GetMainThread(getter_AddRefs(thread)); + + if (mCallback) { + (void)NS_ProxyRelease(thread, mCallback, false); + } +} + NS_IMETHODIMP -nsUrlClassifierLookupCallback::LookupComplete(nsTArray* results) +nsUrlClassifierLookupCallback::LookupComplete(nsTArray* results) { NS_ASSERTION(mResults == nsnull, "Should only get one set of results per nsUrlClassifierLookupCallback!"); @@ -3908,22 +876,19 @@ nsUrlClassifierLookupCallback::LookupComplete(nsTArraySort(); // Check the results entries that need to be completed. for (PRUint32 i = 0; i < results->Length(); i++) { - nsUrlClassifierLookupResult& result = results->ElementAt(i); + LookupResult& result = results->ElementAt(i); // We will complete partial matches and matches that are stale. - if (!result.mConfirmed) { + if (!result.Confirmed()) { nsCOMPtr completer; if (mDBService->GetCompleter(result.mTableName, getter_AddRefs(completer))) { nsCAutoString partialHash; - PRUint8 *buf = - result.mEntry.mHavePartial ? result.mEntry.mPartialHash.buf - : result.mEntry.mCompleteHash.buf; - partialHash.Assign(reinterpret_cast(buf), PARTIAL_LENGTH); + partialHash.Assign(reinterpret_cast(&result.hash.prefix), + PREFIX_SIZE); nsresult rv = completer->Complete(partialHash, this); if (NS_SUCCEEDED(rv)) { @@ -3931,12 +896,9 @@ nsUrlClassifierLookupCallback::LookupComplete(nsTArrayCacheCompletions(mCacheResults.forget()); - } } return NS_OK; @@ -3983,51 +939,33 @@ nsUrlClassifierLookupCallback::Completion(const nsACString& completeHash, { LOG(("nsUrlClassifierLookupCallback::Completion [%p, %s, %d, %d]", this, PromiseFlatCString(tableName).get(), chunkId, verified)); - nsUrlClassifierCompleteHash hash; + mozilla::safebrowsing::Completion hash; hash.Assign(completeHash); + // Send this completion to the store for caching. + if (!mCacheResults) { + mCacheResults = new CacheResultArray(); + if (!mCacheResults) + return NS_ERROR_OUT_OF_MEMORY; + } + + if (verified) { + CacheResult result; + result.entry.addChunk = chunkId; + result.entry.hash.complete = hash; + result.table = tableName; + + // OK if this fails, we just won't cache the item. + mCacheResults->AppendElement(result); + } + + // Check if this matched any of our results. for (PRUint32 i = 0; i < mResults->Length(); i++) { - nsUrlClassifierLookupResult& result = mResults->ElementAt(i); - - // First, see if this result can be used to update an entry. - if (verified && - !result.mEntry.mHaveComplete && - hash.StartsWith(result.mEntry.mPartialHash) && - result.mTableName == tableName && - result.mEntry.mChunkId == chunkId) { - // We have a completion for this entry. Fill it in... - result.mEntry.SetHash(hash); - - if (!mCacheResults) { - mCacheResults = new nsTArray(); - if (!mCacheResults) - return NS_ERROR_OUT_OF_MEMORY; - } - - mCacheResults->AppendElement(result); - } + LookupResult& result = mResults->ElementAt(i); // Now, see if it verifies a lookup - if (result.mLookupFragment == hash) { - result.mConfirmed = true; - - if (result.mTableName != tableName || - result.mEntry.mChunkId != chunkId) { - // The hash we got for this completion matches the hash we - // looked up, but doesn't match the table/chunk id. This could - // happen in rare cases where a given URL was moved between - // lists or added/removed/re-added to the list in the time since - // we've updated. - // - // Update the lookup result, but don't update the entry or try - // cache the results of this completion, as it might confuse - // things. - result.mTableName = tableName; - NS_WARNING("Accepting a gethash with an invalid table name or chunk id"); - LOG(("Tablename: %s ?= %s, ChunkId %d ?= %d", - result.mTableName.get(), PromiseFlatCString(tableName).get(), - result.mEntry.mChunkId, chunkId)); - } + if (result.CompleteHash() == hash && result.mTableName.Equals(tableName)) { + result.mProtocolConfirmed = true; } } @@ -4042,29 +980,56 @@ nsUrlClassifierLookupCallback::HandleResults() return mCallback->HandleEvent(NS_LITERAL_CSTRING("")); } + nsTArray tables; // Build a stringified list of result tables. - mResults->Sort(); - PRUint32 lastTableId = 0; - nsCAutoString tables; for (PRUint32 i = 0; i < mResults->Length(); i++) { - nsUrlClassifierLookupResult& result = mResults->ElementAt(i); + LookupResult& result = mResults->ElementAt(i); + // Leave out results that weren't confirmed, as their existence on // the list can't be verified. Also leave out randomly-generated // noise. - if (!result.mConfirmed || result.mNoise) + if (!result.Confirmed() || result.mNoise) { + LOG(("Skipping result from table %s", result.mTableName.get())); continue; - - if (tables.Length() > 0) { - if (lastTableId == result.mEntry.mTableId) - continue; - tables.Append(","); } - tables.Append(result.mTableName); - lastTableId = result.mEntry.mTableId; + LOG(("Confirmed result from table %s", result.mTableName.get())); + + if (tables.IndexOf(result.mTableName) == nsTArray::NoIndex) { + tables.AppendElement(result.mTableName); + } } - return mCallback->HandleEvent(tables); + // Some parts of this gethash request generated no hits at all. + // Prefixes must have been removed from the database since our last update. + // Save the prefixes we checked to prevent repeated requests + // until the next update. + nsAutoPtr cacheMisses(new PrefixArray()); + if (cacheMisses) { + for (uint32 i = 0; i < mResults->Length(); i++) { + LookupResult &result = mResults->ElementAt(i); + if (!result.Confirmed()) { + cacheMisses->AppendElement(result.PrefixHash()); + } + } + // Hands ownership of the miss array back to the worker thread. + mDBService->CacheMisses(cacheMisses.forget()); + } + + if (mCacheResults) { + // This hands ownership of the cache results array back to the worker + // thread. + mDBService->CacheCompletions(mCacheResults.forget()); + } + + nsCAutoString tableStr; + for (PRUint32 i = 0; i < tables.Length(); i++) { + if (i != 0) + tableStr.Append(','); + tableStr.Append(tables[i]); + } + + return mCallback->HandleEvent(tableStr); } @@ -4180,18 +1145,7 @@ nsUrlClassifierDBService::Init() gUrlClassifierDbServiceLog = PR_NewLogModule("UrlClassifierDbService"); #endif - // Force the storage service to be created on the main thread. nsresult rv; - nsCOMPtr storageService = - do_GetService(MOZ_STORAGE_SERVICE_CONTRACTID, &rv); - NS_ENSURE_SUCCESS(rv, rv); - - // Force PSM to be loaded on the main thread. - mHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); - NS_ENSURE_SUCCESS(rv, rv); - - mPrefixSet = new nsUrlClassifierPrefixSet(); - NS_ENSURE_SUCCESS(rv, rv); // Should we check document loads for malware URIs? nsCOMPtr prefs = do_GetService(NS_PREFSERVICE_CONTRACTID); @@ -4225,20 +1179,19 @@ nsUrlClassifierDBService::Init() PR_ATOMIC_SET(&gFreshnessGuarantee, NS_SUCCEEDED(rv) ? tmpint : CONFIRM_AGE_DEFAULT_SEC); prefs->AddObserver(CONFIRM_AGE_PREF, this, false); + } - rv = prefs->GetIntPref(UPDATE_CACHE_SIZE_PREF, &tmpint); - PR_ATOMIC_SET(&gUpdateCacheSize, NS_SUCCEEDED(rv) ? tmpint : UPDATE_CACHE_SIZE_DEFAULT); + // Force PSM loading on main thread + nsCOMPtr acryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); - rv = prefs->GetIntPref(LOOKUP_CACHE_SIZE_PREF, &tmpint); - PR_ATOMIC_SET(&gLookupCacheSize, NS_SUCCEEDED(rv) ? tmpint : LOOKUP_CACHE_SIZE_DEFAULT); - - rv = prefs->GetIntPref(UPDATE_WORKING_TIME, &tmpint); - PR_ATOMIC_SET(&gWorkingTimeThreshold, - NS_SUCCEEDED(rv) ? tmpint : UPDATE_WORKING_TIME_DEFAULT); - - rv = prefs->GetIntPref(UPDATE_DELAY_TIME, &tmpint); - PR_ATOMIC_SET(&gDelayTime, - NS_SUCCEEDED(rv) ? tmpint : UPDATE_DELAY_TIME_DEFAULT); + // Directory providers must also be accessed on the main thread. + nsCOMPtr cacheDir; + rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR, + getter_AddRefs(cacheDir)); + if (NS_FAILED(rv)) { + rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR, + getter_AddRefs(cacheDir)); } // Start the background thread. @@ -4250,7 +1203,7 @@ nsUrlClassifierDBService::Init() if (!mWorker) return NS_ERROR_OUT_OF_MEMORY; - rv = mWorker->Init(gethashNoise, mPrefixSet); + rv = mWorker->Init(gethashNoise, cacheDir); if (NS_FAILED(rv)) { mWorker = nsnull; return rv; @@ -4339,11 +1292,7 @@ nsUrlClassifierDBService::LookupURI(nsIURI* uri, if (forceLookup) { *didLookup = true; } else { - // Check if the URI is clean. If so, we don't need to - // bother queueing up a lookup, we can just return.; - bool clean; - rv = CheckClean(key, &clean); - NS_ENSURE_SUCCESS(rv, rv); + bool clean = false; if (!clean) { nsCOMPtr permissionManager = @@ -4482,13 +1431,21 @@ nsUrlClassifierDBService::ResetDatabase() } nsresult -nsUrlClassifierDBService::CacheCompletions(nsTArray *results) +nsUrlClassifierDBService::CacheCompletions(CacheResultArray *results) { NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED); return mWorkerProxy->CacheCompletions(results); } +nsresult +nsUrlClassifierDBService::CacheMisses(PrefixArray *results) +{ + NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED); + + return mWorkerProxy->CacheMisses(results); +} + bool nsUrlClassifierDBService::GetCompleter(const nsACString &tableName, nsIUrlClassifierHashCompleter **completer) @@ -4531,24 +1488,6 @@ nsUrlClassifierDBService::Observe(nsISupports *aSubject, const char *aTopic, PRInt32 tmpint; rv = prefs->GetIntPref(CONFIRM_AGE_PREF, &tmpint); PR_ATOMIC_SET(&gFreshnessGuarantee, NS_SUCCEEDED(rv) ? tmpint : CONFIRM_AGE_DEFAULT_SEC); - } else if (NS_LITERAL_STRING(UPDATE_CACHE_SIZE_PREF).Equals(aData)) { - PRInt32 tmpint; - rv = prefs->GetIntPref(UPDATE_CACHE_SIZE_PREF, &tmpint); - PR_ATOMIC_SET(&gUpdateCacheSize, NS_SUCCEEDED(rv) ? tmpint : UPDATE_CACHE_SIZE_DEFAULT); - } else if (NS_LITERAL_STRING(LOOKUP_CACHE_SIZE_PREF).Equals(aData)) { - PRInt32 tmpint; - rv = prefs->GetIntPref(LOOKUP_CACHE_SIZE_PREF, &tmpint); - PR_ATOMIC_SET(&gLookupCacheSize, NS_SUCCEEDED(rv) ? tmpint : LOOKUP_CACHE_SIZE_DEFAULT); - } else if (NS_LITERAL_STRING(UPDATE_WORKING_TIME).Equals(aData)) { - PRInt32 tmpint; - rv = prefs->GetIntPref(UPDATE_WORKING_TIME, &tmpint); - PR_ATOMIC_SET(&gWorkingTimeThreshold, - NS_SUCCEEDED(rv) ? tmpint : UPDATE_WORKING_TIME_DEFAULT); - } else if (NS_LITERAL_STRING(UPDATE_DELAY_TIME).Equals(aData)) { - PRInt32 tmpint; - rv = prefs->GetIntPref(UPDATE_DELAY_TIME, &tmpint); - PR_ATOMIC_SET(&gDelayTime, - NS_SUCCEEDED(rv) ? tmpint : UPDATE_DELAY_TIME_DEFAULT); } } else if (!strcmp(aTopic, "profile-before-change") || !strcmp(aTopic, "xpcom-shutdown-threads")) { diff --git a/toolkit/components/url-classifier/nsUrlClassifierDBService.h b/toolkit/components/url-classifier/nsUrlClassifierDBService.h index b49303a41c2b..122102de87dc 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierDBService.h +++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.h @@ -53,6 +53,8 @@ #include "nsICryptoHash.h" #include "nsICryptoHMAC.h" +#include "LookupCache.h" + // The hash length for a domain key. #define DOMAIN_LENGTH 4 @@ -88,7 +90,8 @@ public: bool GetCompleter(const nsACString& tableName, nsIUrlClassifierHashCompleter** completer); - nsresult CacheCompletions(nsTArray *results); + nsresult CacheCompletions(mozilla::safebrowsing::CacheResultArray *results); + nsresult CacheMisses(mozilla::safebrowsing::PrefixArray *results); static nsIThread* BackgroundThread(); @@ -131,10 +134,6 @@ private: // The list of tables that can use the default hash completer object. nsTArray mGethashWhitelist; - // Set of prefixes known to be in the database - nsRefPtr mPrefixSet; - nsCOMPtr mHash; - // Thread that we do the updates on. static nsIThread* gDbBackgroundThread; }; diff --git a/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.cpp b/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.cpp index 9bc52e067833..dd528d13a8f9 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.cpp +++ b/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.cpp @@ -71,7 +71,7 @@ static const PRLogModuleInfo *gUrlClassifierPrefixSetLog = nsnull; class nsPrefixSetReporter : public nsIMemoryReporter { public: - nsPrefixSetReporter(nsUrlClassifierPrefixSet * aParent, const nsACString & aName); + nsPrefixSetReporter(nsUrlClassifierPrefixSet* aParent, const nsACString& aName); virtual ~nsPrefixSetReporter() {}; NS_DECL_ISUPPORTS @@ -79,7 +79,7 @@ public: private: nsCString mPath; - nsUrlClassifierPrefixSet * mParent; + nsUrlClassifierPrefixSet* mParent; }; NS_IMPL_THREADSAFE_ISUPPORTS1(nsPrefixSetReporter, nsIMemoryReporter) @@ -87,8 +87,8 @@ NS_IMPL_THREADSAFE_ISUPPORTS1(nsPrefixSetReporter, nsIMemoryReporter) NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN(StoragePrefixSetMallocSizeOf, "storage/prefixset") -nsPrefixSetReporter::nsPrefixSetReporter(nsUrlClassifierPrefixSet * aParent, - const nsACString & aName) +nsPrefixSetReporter::nsPrefixSetReporter(nsUrlClassifierPrefixSet* aParent, + const nsACString& aName) : mParent(aParent) { mPath.Assign(NS_LITERAL_CSTRING("explicit/storage/prefixset")); @@ -99,42 +99,42 @@ nsPrefixSetReporter::nsPrefixSetReporter(nsUrlClassifierPrefixSet * aParent, } NS_IMETHODIMP -nsPrefixSetReporter::GetProcess(nsACString & aProcess) +nsPrefixSetReporter::GetProcess(nsACString& aProcess) { aProcess.Truncate(); return NS_OK; } NS_IMETHODIMP -nsPrefixSetReporter::GetPath(nsACString & aPath) +nsPrefixSetReporter::GetPath(nsACString& aPath) { aPath.Assign(mPath); return NS_OK; } NS_IMETHODIMP -nsPrefixSetReporter::GetKind(PRInt32 * aKind) +nsPrefixSetReporter::GetKind(PRInt32* aKind) { *aKind = nsIMemoryReporter::KIND_HEAP; return NS_OK; } NS_IMETHODIMP -nsPrefixSetReporter::GetUnits(PRInt32 * aUnits) +nsPrefixSetReporter::GetUnits(PRInt32* aUnits) { *aUnits = nsIMemoryReporter::UNITS_BYTES; return NS_OK; } NS_IMETHODIMP -nsPrefixSetReporter::GetAmount(PRInt64 * aAmount) +nsPrefixSetReporter::GetAmount(PRInt64* aAmount) { *aAmount = mParent->SizeOfIncludingThis(StoragePrefixSetMallocSizeOf); return NS_OK; } NS_IMETHODIMP -nsPrefixSetReporter::GetDescription(nsACString & aDescription) +nsPrefixSetReporter::GetDescription(nsACString& aDescription) { aDescription.Assign(NS_LITERAL_CSTRING("Memory used by a PrefixSet for " "UrlClassifier, in bytes.")); @@ -146,21 +146,21 @@ NS_IMPL_THREADSAFE_ISUPPORTS1(nsUrlClassifierPrefixSet, nsIUrlClassifierPrefixSe nsUrlClassifierPrefixSet::nsUrlClassifierPrefixSet() : mPrefixSetLock("mPrefixSetLock"), mSetIsReady(mPrefixSetLock, "mSetIsReady"), - mHasPrefixes(false), - mRandomKey(0) + mHasPrefixes(false) { #if defined(PR_LOGGING) if (!gUrlClassifierPrefixSetLog) gUrlClassifierPrefixSetLog = PR_NewLogModule("UrlClassifierPrefixSet"); #endif +} - nsresult rv = InitKey(); - if (NS_FAILED(rv)) { - LOG(("Failed to initialize PrefixSet")); - } - - mReporter = new nsPrefixSetReporter(this, NS_LITERAL_CSTRING("all")); +NS_IMETHODIMP +nsUrlClassifierPrefixSet::Init(const nsACString& aName) +{ + mReporter = new nsPrefixSetReporter(this, aName); NS_RegisterMemoryReporter(mReporter); + + return NS_OK; } nsUrlClassifierPrefixSet::~nsUrlClassifierPrefixSet() @@ -168,26 +168,8 @@ nsUrlClassifierPrefixSet::~nsUrlClassifierPrefixSet() NS_UnregisterMemoryReporter(mReporter); } -nsresult -nsUrlClassifierPrefixSet::InitKey() -{ - nsCOMPtr rg = - do_GetService("@mozilla.org/security/random-generator;1"); - NS_ENSURE_STATE(rg); - - PRUint8 *temp; - nsresult rv = rg->GenerateRandomBytes(sizeof(mRandomKey), &temp); - NS_ENSURE_SUCCESS(rv, rv); - memcpy(&mRandomKey, temp, sizeof(mRandomKey)); - NS_Free(temp); - - LOG(("Initialized PrefixSet, key = %X", mRandomKey)); - - return NS_OK; -} - NS_IMETHODIMP -nsUrlClassifierPrefixSet::SetPrefixes(const PRUint32 * aArray, PRUint32 aLength) +nsUrlClassifierPrefixSet::SetPrefixes(const PRUint32* aArray, PRUint32 aLength) { if (aLength <= 0) { MutexAutoLock lock(mPrefixSetLock); @@ -206,7 +188,7 @@ nsUrlClassifierPrefixSet::SetPrefixes(const PRUint32 * aArray, PRUint32 aLength) } nsresult -nsUrlClassifierPrefixSet::MakePrefixSet(const PRUint32 * prefixes, PRUint32 aLength) +nsUrlClassifierPrefixSet::MakePrefixSet(const PRUint32* aPrefixes, PRUint32 aLength) { if (aLength == 0) { return NS_OK; @@ -214,7 +196,7 @@ nsUrlClassifierPrefixSet::MakePrefixSet(const PRUint32 * prefixes, PRUint32 aLen #ifdef DEBUG for (PRUint32 i = 1; i < aLength; i++) { - MOZ_ASSERT(prefixes[i] >= prefixes[i-1]); + MOZ_ASSERT(aPrefixes[i] >= aPrefixes[i-1]); } #endif @@ -222,7 +204,7 @@ nsUrlClassifierPrefixSet::MakePrefixSet(const PRUint32 * prefixes, PRUint32 aLen FallibleTArray newIndexStarts; FallibleTArray newDeltas; - if (!newIndexPrefixes.AppendElement(prefixes[0])) { + if (!newIndexPrefixes.AppendElement(aPrefixes[0])) { return NS_ERROR_OUT_OF_MEMORY; } if (!newIndexStarts.AppendElement(newDeltas.Length())) { @@ -230,25 +212,25 @@ nsUrlClassifierPrefixSet::MakePrefixSet(const PRUint32 * prefixes, PRUint32 aLen } PRUint32 numOfDeltas = 0; - PRUint32 currentItem = prefixes[0]; + PRUint32 currentItem = aPrefixes[0]; for (PRUint32 i = 1; i < aLength; i++) { if ((numOfDeltas >= DELTAS_LIMIT) || - (prefixes[i] - currentItem >= MAX_INDEX_DIFF)) { + (aPrefixes[i] - currentItem >= MAX_INDEX_DIFF)) { if (!newIndexStarts.AppendElement(newDeltas.Length())) { return NS_ERROR_OUT_OF_MEMORY; } - if (!newIndexPrefixes.AppendElement(prefixes[i])) { + if (!newIndexPrefixes.AppendElement(aPrefixes[i])) { return NS_ERROR_OUT_OF_MEMORY; } numOfDeltas = 0; } else { - PRUint16 delta = prefixes[i] - currentItem; + PRUint16 delta = aPrefixes[i] - currentItem; if (!newDeltas.AppendElement(delta)) { return NS_ERROR_OUT_OF_MEMORY; } numOfDeltas++; } - currentItem = prefixes[i]; + currentItem = aPrefixes[i]; } newIndexPrefixes.Compact(); @@ -271,6 +253,53 @@ nsUrlClassifierPrefixSet::MakePrefixSet(const PRUint32 * prefixes, PRUint32 aLen return NS_OK; } +NS_IMETHODIMP +nsUrlClassifierPrefixSet::GetPrefixes(PRUint32* aCount, + PRUint32** aPrefixes) +{ + NS_ENSURE_ARG_POINTER(aCount); + *aCount = 0; + NS_ENSURE_ARG_POINTER(aPrefixes); + *aPrefixes = nsnull; + + nsTArray aArray; + PRUint32 prefixLength = mIndexPrefixes.Length(); + + for (PRUint32 i = 0; i < prefixLength; i++) { + PRUint32 prefix = mIndexPrefixes[i]; + PRUint32 start = mIndexStarts[i]; + PRUint32 end = (i == (prefixLength - 1)) ? mDeltas.Length() + : mIndexStarts[i + 1]; + aArray.AppendElement(prefix); + for (PRUint32 j = start; j < end; j++) { + prefix += mDeltas[j]; + aArray.AppendElement(prefix); + } + } + + NS_ASSERTION(mIndexStarts.Length() + mDeltas.Length() == aArray.Length(), + "Lengths are inconsistent"); + + PRUint32 itemCount = aArray.Length(); + + if (itemCount == 1 && aArray[0] == 0) { + /* sentinel for empty set */ + aArray.Clear(); + itemCount = 0; + } + + PRUint32* retval = static_cast(nsMemory::Alloc(itemCount * sizeof(PRUint32))); + NS_ENSURE_TRUE(retval, NS_ERROR_OUT_OF_MEMORY); + for (PRUint32 i = 0; i < itemCount; i++) { + retval[i] = aArray[i]; + } + + *aCount = itemCount; + *aPrefixes = retval; + + return NS_OK; +} + PRUint32 nsUrlClassifierPrefixSet::BinSearch(PRUint32 start, PRUint32 end, PRUint32 target) @@ -290,7 +319,7 @@ PRUint32 nsUrlClassifierPrefixSet::BinSearch(PRUint32 start, } nsresult -nsUrlClassifierPrefixSet::Contains(PRUint32 aPrefix, bool * aFound) +nsUrlClassifierPrefixSet::Contains(PRUint32 aPrefix, bool* aFound) { mPrefixSetLock.AssertCurrentThreadOwns(); @@ -366,32 +395,13 @@ nsUrlClassifierPrefixSet::IsEmpty(bool * aEmpty) } NS_IMETHODIMP -nsUrlClassifierPrefixSet::GetKey(PRUint32 * aKey) - { - MutexAutoLock lock(mPrefixSetLock); - *aKey = mRandomKey; - return NS_OK; -} - -NS_IMETHODIMP -nsUrlClassifierPrefixSet::Probe(PRUint32 aPrefix, PRUint32 aKey, +nsUrlClassifierPrefixSet::Probe(PRUint32 aPrefix, bool* aReady, bool* aFound) { MutexAutoLock lock(mPrefixSetLock); *aFound = false; - // We might have raced here with a LoadPrefixSet call, - // loading a saved PrefixSet with another key than the one used to probe us. - // This must occur exactly between the GetKey call and the Probe call. - // This could cause a false negative immediately after browser start. - // Claim we are still busy loading instead. - if (aKey != mRandomKey) { - LOG(("Potential race condition detected, avoiding")); - *aReady = false; - return NS_OK; - } - // check whether we are opportunistically probing or should wait if (*aReady) { // we should block until we are ready @@ -415,7 +425,7 @@ nsUrlClassifierPrefixSet::Probe(PRUint32 aPrefix, PRUint32 aKey, } nsresult -nsUrlClassifierPrefixSet::LoadFromFd(AutoFDClose & fileFd) +nsUrlClassifierPrefixSet::LoadFromFd(AutoFDClose& fileFd) { PRUint32 magic; PRInt32 read; @@ -427,8 +437,6 @@ nsUrlClassifierPrefixSet::LoadFromFd(AutoFDClose & fileFd) PRUint32 indexSize; PRUint32 deltaSize; - read = PR_Read(fileFd, &mRandomKey, sizeof(PRUint32)); - NS_ENSURE_TRUE(read == sizeof(PRUint32), NS_ERROR_FILE_CORRUPTED); read = PR_Read(fileFd, &indexSize, sizeof(PRUint32)); NS_ENSURE_TRUE(read == sizeof(PRUint32), NS_ERROR_FILE_CORRUPTED); read = PR_Read(fileFd, &deltaSize, sizeof(PRUint32)); @@ -481,8 +489,10 @@ nsUrlClassifierPrefixSet::LoadFromFd(AutoFDClose & fileFd) } NS_IMETHODIMP -nsUrlClassifierPrefixSet::LoadFromFile(nsIFile * aFile) +nsUrlClassifierPrefixSet::LoadFromFile(nsIFile* aFile) { + Telemetry::AutoTimer timer; + nsresult rv; nsCOMPtr file(do_QueryInterface(aFile, &rv)); NS_ENSURE_SUCCESS(rv, rv); @@ -495,7 +505,7 @@ nsUrlClassifierPrefixSet::LoadFromFile(nsIFile * aFile) } nsresult -nsUrlClassifierPrefixSet::StoreToFd(AutoFDClose & fileFd) +nsUrlClassifierPrefixSet::StoreToFd(AutoFDClose& fileFd) { { Telemetry::AutoTimer timer; @@ -511,9 +521,6 @@ nsUrlClassifierPrefixSet::StoreToFd(AutoFDClose & fileFd) written = PR_Write(fileFd, &magic, sizeof(PRUint32)); NS_ENSURE_TRUE(written > 0, NS_ERROR_FAILURE); - written = PR_Write(fileFd, &mRandomKey, sizeof(PRUint32)); - NS_ENSURE_TRUE(written > 0, NS_ERROR_FAILURE); - PRUint32 indexSize = mIndexStarts.Length(); PRUint32 deltaSize = mDeltas.Length(); written = PR_Write(fileFd, &indexSize, sizeof(PRUint32)); @@ -536,7 +543,7 @@ nsUrlClassifierPrefixSet::StoreToFd(AutoFDClose & fileFd) } NS_IMETHODIMP -nsUrlClassifierPrefixSet::StoreToFile(nsIFile * aFile) +nsUrlClassifierPrefixSet::StoreToFile(nsIFile* aFile) { if (!mHasPrefixes) { LOG(("Attempt to serialize empty PrefixSet")); diff --git a/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.h b/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.h index deda71e0feba..35963462d182 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.h +++ b/toolkit/components/url-classifier/nsUrlClassifierPrefixSet.h @@ -44,6 +44,7 @@ #include "nsISupportsUtils.h" #include "nsID.h" #include "nsIFile.h" +#include "nsIMutableArray.h" #include "nsIUrlClassifierPrefixSet.h" #include "nsIMemoryReporter.h" #include "nsToolkitCompsCID.h" @@ -59,12 +60,13 @@ public: nsUrlClassifierPrefixSet(); virtual ~nsUrlClassifierPrefixSet(); + NS_IMETHOD Init(const nsACString& aName); NS_IMETHOD SetPrefixes(const PRUint32* aArray, PRUint32 aLength); - NS_IMETHOD Probe(PRUint32 aPrefix, PRUint32 aKey, bool* aReady, bool* aFound); - NS_IMETHOD IsEmpty(bool * aEmpty); + NS_IMETHOD GetPrefixes(PRUint32* aCount, PRUint32** aPrefixes); + NS_IMETHOD Probe(PRUint32 aPrefix, bool* aReady, bool* aFound); + NS_IMETHOD IsEmpty(bool* aEmpty); NS_IMETHOD LoadFromFile(nsIFile* aFile); NS_IMETHOD StoreToFile(nsIFile* aFile); - NS_IMETHOD GetKey(PRUint32* aKey); NS_DECL_ISUPPORTS @@ -84,15 +86,12 @@ protected: nsresult Contains(PRUint32 aPrefix, bool* aFound); nsresult MakePrefixSet(const PRUint32* aArray, PRUint32 aLength); PRUint32 BinSearch(PRUint32 start, PRUint32 end, PRUint32 target); - nsresult LoadFromFd(mozilla::AutoFDClose & fileFd); - nsresult StoreToFd(mozilla::AutoFDClose & fileFd); - nsresult InitKey(); + nsresult LoadFromFd(mozilla::AutoFDClose& fileFd); + nsresult StoreToFd(mozilla::AutoFDClose& fileFd); // boolean indicating whether |setPrefixes| has been // called with a non-empty array. bool mHasPrefixes; - // key used to randomize hash collisions - PRUint32 mRandomKey; // the prefix for each index. FallibleTArray mIndexPrefixes; // the value corresponds to the beginning of the run @@ -100,7 +99,6 @@ protected: FallibleTArray mIndexStarts; // array containing deltas from indices. FallibleTArray mDeltas; - }; #endif diff --git a/toolkit/components/url-classifier/nsUrlClassifierProxies.cpp b/toolkit/components/url-classifier/nsUrlClassifierProxies.cpp index dabcf59ac38e..6d9b2732eae3 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierProxies.cpp +++ b/toolkit/components/url-classifier/nsUrlClassifierProxies.cpp @@ -183,7 +183,7 @@ UrlClassifierDBServiceWorkerProxy::CloseDb() } NS_IMETHODIMP -UrlClassifierDBServiceWorkerProxy::CacheCompletions(nsTArray* aEntries) +UrlClassifierDBServiceWorkerProxy::CacheCompletions(CacheResultArray * aEntries) { nsCOMPtr r = new CacheCompletionsRunnable(mTarget, aEntries); return DispatchToWorkerThread(r); @@ -196,12 +196,27 @@ UrlClassifierDBServiceWorkerProxy::CacheCompletionsRunnable::Run() return NS_OK; } +NS_IMETHODIMP +UrlClassifierDBServiceWorkerProxy::CacheMisses(PrefixArray * aEntries) +{ + nsCOMPtr r = new CacheMissesRunnable(mTarget, aEntries); + return DispatchToWorkerThread(r); +} + +NS_IMETHODIMP +UrlClassifierDBServiceWorkerProxy::CacheMissesRunnable::Run() +{ + mTarget->CacheMisses(mEntries); + return NS_OK; +} + + NS_IMPL_THREADSAFE_ISUPPORTS1(UrlClassifierLookupCallbackProxy, nsIUrlClassifierLookupCallback) NS_IMETHODIMP UrlClassifierLookupCallbackProxy::LookupComplete - (nsTArray* aResults) + (LookupResultArray * aResults) { nsCOMPtr r = new LookupCompleteRunnable(mTarget, aResults); return NS_DispatchToMainThread(r); diff --git a/toolkit/components/url-classifier/nsUrlClassifierProxies.h b/toolkit/components/url-classifier/nsUrlClassifierProxies.h index 843f43a2673d..9cb9303c97f1 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierProxies.h +++ b/toolkit/components/url-classifier/nsUrlClassifierProxies.h @@ -40,6 +40,9 @@ #include "nsIUrlClassifierDBService.h" #include "nsThreadUtils.h" +#include "LookupCache.h" + +using namespace mozilla::safebrowsing; /** * Thread proxy from the main thread to the worker thread. @@ -150,7 +153,7 @@ public: { public: CacheCompletionsRunnable(nsIUrlClassifierDBServiceWorker* aTarget, - nsTArray* aEntries) + CacheResultArray *aEntries) : mTarget(aTarget) , mEntries(aEntries) { } @@ -159,7 +162,23 @@ public: private: nsCOMPtr mTarget; - nsTArray* mEntries; + CacheResultArray *mEntries; + }; + + class CacheMissesRunnable : public nsRunnable + { + public: + CacheMissesRunnable(nsIUrlClassifierDBServiceWorker* aTarget, + PrefixArray *aEntries) + : mTarget(aTarget) + , mEntries(aEntries) + { } + + NS_DECL_NSIRUNNABLE + + private: + nsCOMPtr mTarget; + PrefixArray *mEntries; }; private: @@ -182,7 +201,7 @@ public: { public: LookupCompleteRunnable(nsIUrlClassifierLookupCallback* aTarget, - nsTArray* aResults) + LookupResultArray *aResults) : mTarget(aTarget) , mResults(aResults) { } @@ -191,7 +210,7 @@ public: private: nsCOMPtr mTarget; - nsTArray* mResults; + LookupResultArray * mResults; }; private: diff --git a/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp b/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp index 9466a0e89b7d..9f2199e08f63 100644 --- a/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp +++ b/toolkit/components/url-classifier/nsUrlClassifierStreamUpdater.cpp @@ -171,11 +171,16 @@ nsUrlClassifierStreamUpdater::FetchUpdate(const nsACString & aUpdateUrl, const nsACString & aStreamTable, const nsACString & aServerMAC) { + LOG(("(pre) Fetching update from %s\n", PromiseFlatCString(aUpdateUrl).get())); + nsCOMPtr uri; nsresult rv = NS_NewURI(getter_AddRefs(uri), aUpdateUrl); NS_ENSURE_SUCCESS(rv, rv); - LOG(("Fetching update from %s\n", PromiseFlatCString(aUpdateUrl).get())); + nsCAutoString urlSpec; + uri->GetAsciiSpec(urlSpec); + + LOG(("(post) Fetching update from %s\n", urlSpec.get())); return FetchUpdate(uri, aRequestBody, aStreamTable, aServerMAC); } @@ -240,6 +245,11 @@ nsUrlClassifierStreamUpdater::DownloadUpdates( mIsUpdating = true; *_retval = true; + nsCAutoString urlSpec; + mUpdateUrl->GetAsciiSpec(urlSpec); + + LOG(("FetchUpdate: %s", urlSpec.get())); + //LOG(("requestBody: %s", aRequestBody.get())); return FetchUpdate(mUpdateUrl, aRequestBody, EmptyCString(), EmptyCString()); } diff --git a/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js b/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js index c9cf375cd30d..56e59389d9db 100644 --- a/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js +++ b/toolkit/components/url-classifier/tests/unit/head_urlclassifier.js @@ -24,14 +24,26 @@ prefBranch.setIntPref("urlclassifier.gethashnoise", 0); prefBranch.setBoolPref("browser.safebrowsing.malware.enabled", true); prefBranch.setBoolPref("browser.safebrowsing.enabled", true); -function cleanUp() { +function delFile(name) { try { // Delete a previously created sqlite file var file = dirSvc.get('ProfLD', Ci.nsIFile); - file.append("urlclassifier3.sqlite"); + file.append(name); if (file.exists()) file.remove(false); - } catch (e) {} + } catch(e) { + } +} + +function cleanUp() { + delFile("classifier.hashkey"); + delFile("urlclassifier3.sqlite"); + delFile("safebrowsing/test-phish-simple.sbstore"); + delFile("safebrowsing/test-malware-simple.sbstore"); + delFile("safebrowsing/test-phish-simple.cache"); + delFile("safebrowsing/test-malware-simple.cache"); + delFile("safebrowsing/test-phish-simple.pset"); + delFile("safebrowsing/test-malware-simple.pset"); } var dbservice = Cc["@mozilla.org/url-classifier/dbservice;1"].getService(Ci.nsIUrlClassifierDBService); @@ -276,11 +288,10 @@ function runNextTest() dbservice.resetDatabase(); dbservice.setHashCompleter('test-phish-simple', null); - dumpn("running " + gTests[gNextTest]); - dump("running " + gTests[gNextTest]); - - gTests[gNextTest++](); + let test = gTests[gNextTest++]; + dump("running " + test.name + "\n"); + test(); } function runTests(tests) diff --git a/toolkit/components/url-classifier/tests/unit/test_addsub.js b/toolkit/components/url-classifier/tests/unit/test_addsub.js index 1303bd399ba4..1ed65c7baad5 100644 --- a/toolkit/components/url-classifier/tests/unit/test_addsub.js +++ b/toolkit/components/url-classifier/tests/unit/test_addsub.js @@ -55,6 +55,7 @@ function testSimpleSub() "chunkType" : "s", "urls": subUrls }]); + var assertions = { "tableData" : "test-phish-simple;a:1:s:50", "urlsExist" : [ "bar.com/b" ], @@ -361,7 +362,8 @@ function testExpireLists() { { "chunkType" : "sd:1-3,5" }]); var assertions = { - "tableData" : "test-phish-simple;" + // "tableData" : "test-phish-simple;" + "tableData": "" }; doTest([addUpdate, subUpdate, expireUpdate], assertions); @@ -479,10 +481,7 @@ function run_test() testSubPartiallyMatches2, testSubsDifferentChunks, testSubsDifferentChunksSameHostId, - testExpireLists, - testDuplicateAddChunks, - testExpireWholeSub, - testPreventWholeSub, + testExpireLists ]); } diff --git a/toolkit/components/url-classifier/tests/unit/test_cleankeycache.js b/toolkit/components/url-classifier/tests/unit/test_cleankeycache.js deleted file mode 100644 index d2e22d0c4749..000000000000 --- a/toolkit/components/url-classifier/tests/unit/test_cleankeycache.js +++ /dev/null @@ -1,195 +0,0 @@ -//* -*- Mode: Javascript; tab-width: 8; indent-tabs-mode: nil; js-indent-level: 2 -*- * -// Test an add of two urls to a fresh database -function testCleanHostKeys() { - var addUrls = [ "foo.com/a" ]; - var update = buildPhishingUpdate( - [ - { "chunkNum" : 1, - "urls" : addUrls - }]); - - doStreamUpdate(update, function() { - var ios = Components.classes["@mozilla.org/network/io-service;1"]. - getService(Components.interfaces.nsIIOService); - - // Check with a clean host key - var uri = ios.newURI("http://bar.com/a", null, null); - - // Use the nsIURIClassifier interface (the - // nsIUrlClassifierDBService will always queue a lookup, - // nsIURIClassifier won't if the host key is known to be clean. - var classifier = dbservice.QueryInterface(Ci.nsIURIClassifier); - var result = classifier.classify(uri, function(errorCode) { - var result2 = classifier.classify(uri, function() { - do_throw("shouldn't get a callback"); - }); - // second call shouldn't result in a callback. - do_check_eq(result2, false); - do_throw("shouldn't get a callback"); - }); - - // The first classifier call will not result in a callback - do_check_eq(result, false); - runNextTest(); - }, updateError); -} - -// Make sure that an update properly clears the host key cache -function testUpdate() { - var ios = Components.classes["@mozilla.org/network/io-service;1"]. - getService(Components.interfaces.nsIIOService); - - // Must put something in the PrefixSet - var preUrls = [ "foo.com/b" ]; - var preUpdate = buildPhishingUpdate( - [ - { "chunkNum" : 1, - "urls" : preUrls - }]); - - doStreamUpdate(preUpdate, function() { - // First lookup won't happen... - var uri = ios.newURI("http://foo.com/a", null, null); - - // Use the nsIURIClassifier interface (the - // nsIUrlClassifierDBService will always queue a lookup, - // nsIURIClassifier won't if the host key is known to be clean. - var classifier = dbservice.QueryInterface(Ci.nsIURIClassifier); - var result = classifier.classify(uri, function(errorCode) { - // shouldn't arrive here - do_check_eq(errorCode, Cr.NS_OK); - do_throw("shouldn't get a callback"); - }); - do_check_eq(result, false); - - // Now add the url to the db... - var addUrls = [ "foo.com/a" ]; - var update = buildPhishingUpdate( - [ - { "chunkNum" : 2, - "urls" : addUrls - }]); - doStreamUpdate(update, function() { - var result2 = classifier.classify(uri, function(errorCode) { - do_check_neq(errorCode, Cr.NS_OK); - runNextTest(); - }); - // second call should result in a callback. - do_check_eq(result2, true); - }, updateError); - }, updateError); -} - -function testResetFullCache() { - // Must put something in the PrefixSet - var preUrls = [ "zaz.com/b" ]; - var preUpdate = buildPhishingUpdate( - [ - { "chunkNum" : 1, - "urls" : preUrls - }]); - - doStreamUpdate(preUpdate, function() { - // First do enough queries to fill up the clean hostkey cache - var ios = Components.classes["@mozilla.org/network/io-service;1"]. - getService(Components.interfaces.nsIIOService); - - // Use the nsIURIClassifier interface (the - // nsIUrlClassifierDBService will always queue a lookup, - // nsIURIClassifier won't if the host key is known to be clean. - var classifier = dbservice.QueryInterface(Ci.nsIURIClassifier); - - var uris1 = [ - "www.foo.com/", - "www.bar.com/", - "www.blah.com/", - "www.site.com/", - "www.example.com/", - "www.test.com/", - "www.malware.com/", - "www.phishing.com/", - "www.clean.com/" ]; - - var uris2 = []; - - var runSecondLookup = function() { - if (uris2.length == 0) { - runNextTest(); - return; - } - - var spec = uris2.pop(); - var uri = ios.newURI("http://" + spec, null, null); - - var result = classifier.classify(uri, function(errorCode) { - }); - runSecondLookup(); - // now look up a few more times. - } - - var runInitialLookup = function() { - if (uris1.length == 0) { - // We're done filling up the cache. Run an update to flush it, - // then start lookup up again. - var addUrls = [ "notgoingtocheck.com/a" ]; - var update = buildPhishingUpdate( - [ - { "chunkNum" : 1, - "urls" : addUrls - }]); - doStreamUpdate(update, function() { - runSecondLookup(); - }, updateError); - return; - } - var spec = uris1.pop(); - - uris2.push(spec); - var uri = ios.newURI("http://" + spec, null, null); - var result = classifier.classify(uri, function(errorCode) { - }); - runInitialLookup(); - // None of these will generate a callback - do_check_eq(result, false); - if (!result) { - doNextTest(); - } - } - - // XXX bug 457790: dbservice.resetDatabase() doesn't have a way to - // wait to make sure it has been applied. Until this is added, we'll - // just use a timeout. - var t = new Timer(3000, runInitialLookup); - }, updateError); -} - -function testBug475436() { - var addUrls = [ "foo.com/a", "www.foo.com/" ]; - var update = buildPhishingUpdate( - [ - { "chunkNum" : 1, - "urls" : addUrls - }]); - - var assertions = { - "tableData" : "test-phish-simple;a:1", - "urlsExist" : ["foo.com/a", "foo.com/a" ] - }; - - doUpdateTest([update], assertions, runNextTest, updateError); -} - -function run_test() -{ - runTests([ - // XXX: We need to run testUpdate first, because of a - // race condition (bug 457790) calling dbservice.classify() - // directly after dbservice.resetDatabase(). - testUpdate, - testCleanHostKeys, - testResetFullCache, - testBug475436 - ]); -} - -do_test_pending(); diff --git a/toolkit/components/url-classifier/tests/unit/test_partial.js b/toolkit/components/url-classifier/tests/unit/test_partial.js index 13df8eca09a8..2e997bc36d78 100644 --- a/toolkit/components/url-classifier/tests/unit/test_partial.js +++ b/toolkit/components/url-classifier/tests/unit/test_partial.js @@ -461,7 +461,8 @@ function testWrongTable() "tableData" : "test-phish-simple;a:1", // The urls were added as phishing urls, but the completer is claiming // that they are malware urls, and we trust the completer in this case. - "malwareUrlsExist" : addUrls, + // The result will be discarded, so we can only check for non-existence. + "urlsDontExist" : addUrls, // Make sure the completer was actually queried. "completerQueried" : [completer, addUrls] }; @@ -470,57 +471,14 @@ function testWrongTable() function() { // Give the dbservice a chance to (not) cache the result. var timer = new Timer(3000, function() { - // The dbservice shouldn't have cached this result, - // so this completer should be queried. - var newCompleter = installCompleter('test-malware-simple', [[1, addUrls]], []); - - // The above installCompleter installs the - // completer for test-malware-simple, we want it - // to be used for test-phish-simple too. - dbservice.setHashCompleter("test-phish-simple", + // The miss earlier will have caused a miss to be cached. + // Resetting the completer does not count as an update, + // so we will not be probed again. + var newCompleter = installCompleter('test-malware-simple', [[1, addUrls]], []); dbservice.setHashCompleter("test-phish-simple", newCompleter); - var assertions = { - "malwareUrlsExist" : addUrls, - "completerQueried" : [newCompleter, addUrls] - }; - checkAssertions(assertions, runNextTest); - }); - }, updateError); -} - -function testWrongChunk() -{ - var addUrls = [ "foo.com/a" ]; - var update = buildPhishingUpdate( - [ - { "chunkNum" : 1, - "urls" : addUrls - }], - 4); - var completer = installCompleter('test-phish-simple', - [[2, // wrong chunk number - addUrls]], []); - - var assertions = { - "tableData" : "test-phish-simple;a:1", - "urlsExist" : addUrls, - // Make sure the completer was actually queried. - "completerQueried" : [completer, addUrls] - }; - - doUpdateTest([update], assertions, - function() { - // Give the dbservice a chance to (not) cache the result. - var timer = new Timer(3000, function() { - // The dbservice shouldn't have cached this result, - // so this completer should be queried. - var newCompleter = installCompleter('test-phish-simple', [[2, addUrls]], []); - - var assertions = { - "urlsExist" : addUrls, - "completerQueried" : [newCompleter, addUrls] + "urlsDontExist" : addUrls }; checkAssertions(assertions, runNextTest); }); @@ -818,7 +776,6 @@ function run_test() testMixedSizesDifferentDomains, testInvalidHashSize, testWrongTable, - testWrongChunk, testCachedResults, testCachedResultsWithSub, testCachedResultsWithExpire, @@ -826,7 +783,7 @@ function run_test() testStaleList, testStaleListEmpty, testErrorList, - testErrorListIndependent, + testErrorListIndependent ]); } diff --git a/toolkit/components/url-classifier/tests/unit/test_prefixset.js b/toolkit/components/url-classifier/tests/unit/test_prefixset.js index b71d3ccd66f2..72927a194d13 100644 --- a/toolkit/components/url-classifier/tests/unit/test_prefixset.js +++ b/toolkit/components/url-classifier/tests/unit/test_prefixset.js @@ -1,7 +1,9 @@ // newPset: returns an empty nsIUrlClassifierPrefixSet. function newPset() { - return Cc["@mozilla.org/url-classifier/prefixset;1"] - .createInstance(Ci.nsIUrlClassifierPrefixSet); + let pset = Cc["@mozilla.org/url-classifier/prefixset;1"] + .createInstance(Ci.nsIUrlClassifierPrefixSet); + pset.init("all"); + return pset; } // arrContains: returns true if |arr| contains the element |target|. Uses binary @@ -28,10 +30,22 @@ function arrContains(arr, target) { return (!(i < 0 || i >= arr.length) && arr[i] == target); } +// checkContents: Check whether the PrefixSet pset contains +// the prefixes in the passed array. +function checkContents(pset, prefixes) { + var outcount = {}, outset = {}; + outset = pset.getPrefixes(outcount); + let inset = prefixes; + do_check_eq(inset.length, outset.length); + inset.sort(function(x,y) x - y); + for (let i = 0; i < inset.length; i++) { + do_check_eq(inset[i], outset[i]); + } +} + function wrappedProbe(pset, prefix) { - let key = pset.getKey(); let dummy = {}; - return pset.probe(prefix, key, dummy); + return pset.probe(prefix, dummy); }; // doRandomLookups: we use this to test for false membership with random input @@ -74,6 +88,9 @@ function testBasicPset() { do_check_true(wrappedProbe(pset, 1593203)); do_check_false(wrappedProbe(pset, 999)); do_check_false(wrappedProbe(pset, 0)); + + + checkContents(pset, prefixes); } function testDuplicates() { @@ -88,6 +105,9 @@ function testDuplicates() { do_check_true(wrappedProbe(pset, 9)); do_check_false(wrappedProbe(pset, 4)); do_check_false(wrappedProbe(pset, 8)); + + + checkContents(pset, prefixes); } function testSimplePset() { @@ -97,6 +117,9 @@ function testSimplePset() { doRandomLookups(pset, prefixes, 100); doExpectedLookups(pset, prefixes, 1); + + + checkContents(pset, prefixes); } function testReSetPrefixes() { @@ -113,6 +136,9 @@ function testReSetPrefixes() { for (let i = 0; i < prefixes.length; i++) { do_check_false(wrappedProbe(pset, prefixes[i])); } + + + checkContents(pset, secondPrefixes); } function testLargeSet() { @@ -131,6 +157,9 @@ function testLargeSet() { doExpectedLookups(pset, arr, 1); doRandomLookups(pset, arr, 1000); + + + checkContents(pset, arr); } function testTinySet() { @@ -141,10 +170,12 @@ function testTinySet() { do_check_true(wrappedProbe(pset, 1)); do_check_false(wrappedProbe(pset, 100000)); + checkContents(pset, prefixes); prefixes = []; pset.setPrefixes(prefixes, prefixes.length); do_check_false(wrappedProbe(pset, 1)); + checkContents(pset, prefixes); } let tests = [testBasicPset, diff --git a/toolkit/components/url-classifier/tests/unit/test_streamupdater.js b/toolkit/components/url-classifier/tests/unit/test_streamupdater.js index 224cc871c2c3..c934890dba0e 100644 --- a/toolkit/components/url-classifier/tests/unit/test_streamupdater.js +++ b/toolkit/components/url-classifier/tests/unit/test_streamupdater.js @@ -80,8 +80,6 @@ function testSimpleForward() { // Make sure that a nested forward (a forward within a forward) causes // the update to fail. function testNestedForward() { - testFillDb(); // Make sure the db isn't empty - var add1Urls = [ "foo.com/a", "bar.com/c" ]; var add2Urls = [ "foo.com/b" ]; @@ -203,8 +201,6 @@ function testValidMAC() { // Test a simple update with an invalid message authentication code. function testInvalidMAC() { - testFillDb(); // Make sure the db isn't empty - var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; var update = buildPhishingUpdate( [ @@ -224,8 +220,6 @@ function testInvalidMAC() { // Test a simple update without a message authentication code, when it is // expecting one. function testNoMAC() { - testFillDb(); // Make sure the db isn't empty - var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; var update = buildPhishingUpdate( [ @@ -282,8 +276,6 @@ function testValidForwardMAC() { // Test an update with a valid message authentication code, but with // invalid MACs on the forwards. function testInvalidForwardMAC() { - testFillDb(); // Make sure the db isn't empty - var add1Urls = [ "foo.com/a", "bar.com/c" ]; var add2Urls = [ "foo.com/b" ]; var add3Urls = [ "bar.com/d" ]; @@ -323,8 +315,6 @@ function testInvalidForwardMAC() { // Test an update with a valid message authentication code, but no MAC // specified for sub-urls. function testNoForwardMAC() { - testFillDb(); // Make sure the db isn't empty - var add1Urls = [ "foo.com/a", "bar.com/c" ]; var add2Urls = [ "foo.com/b" ]; var add3Urls = [ "bar.com/d" ]; @@ -391,8 +381,6 @@ gAssertions.gotRekey = function(data, cb) // Tests a rekey request. function testRekey() { - testFillDb(); - var addUrls = [ "foo.com/a", "foo.com/b", "bar.com/c" ]; var update = buildPhishingUpdate( [ @@ -457,6 +445,9 @@ function run_test() testInvalidUrlForward, testErrorUrlForward, testMultipleTables, + testReset, + // XXX: we're currently "once MAC, always MAC", + // so any test not using a MAC must go above testValidMAC, testInvalidMAC, testNoMAC, @@ -464,7 +455,6 @@ function run_test() testInvalidForwardMAC, testNoForwardMAC, testRekey, - testReset, ]); } diff --git a/toolkit/components/url-classifier/tests/unit/xpcshell.ini b/toolkit/components/url-classifier/tests/unit/xpcshell.ini index 8de09eda577c..e80ef1e69613 100644 --- a/toolkit/components/url-classifier/tests/unit/xpcshell.ini +++ b/toolkit/components/url-classifier/tests/unit/xpcshell.ini @@ -4,7 +4,6 @@ tail = tail_urlclassifier.js [test_addsub.js] [test_backoff.js] -[test_cleankeycache.js] [test_dbservice.js] [test_hashcompleter.js] [test_partial.js]