Bug 669410 - Add PrefixSet datastructure for SafeBrowsing. r=dcamp

This commit is contained in:
Gian-Carlo Pascutto 2011-09-08 22:15:08 +02:00
Родитель 7ee55d6822
Коммит 9a5f444432
9 изменённых файлов: 472 добавлений и 1 удалений

Просмотреть файл

@ -81,6 +81,9 @@
#define NS_PARENTALCONTROLSSERVICE_CONTRACTID \
"@mozilla.org/parental-controls-service;1"
#define NS_URLCLASSIFIERPREFIXSET_CONTRACTID \
"@mozilla.org/url-classifier/prefixset;1"
#define NS_URLCLASSIFIERDBSERVICE_CONTRACTID \
"@mozilla.org/url-classifier/dbservice;1"
@ -156,7 +159,11 @@
// {e7f70966-9a37-48d7-8aeb-35998f31090e}
#define NS_TYPEAHEADFIND_CID \
{ 0xe7f70966, 0x9a37, 0x48d7, { 0x8a, 0xeb, 0x35, 0x99, 0x8f, 0x31, 0x09, 0x0e} }
// {61a2318e-0a7a-483e-9105-367d4827f288}
#define NS_URLCLASSIFIERPREFIXSET_CID \
{ 0x61a2318e, 0x0a7a, 0x483e, { 0x91, 0x05, 0x36, 0x7d, 0x48, 0x27, 0xf2, 0x88} }
// {5eb7c3c1-ec1f-4007-87cc-eefb37d68ce6}
#define NS_URLCLASSIFIERDBSERVICE_CID \
{ 0x5eb7c3c1, 0xec1f, 0x4007, { 0x87, 0xcc, 0xee, 0xfb, 0x37, 0xd6, 0x8c, 0xe6} }

Просмотреть файл

@ -63,6 +63,7 @@
#include "nsUrlClassifierDBService.h"
#include "nsUrlClassifierStreamUpdater.h"
#include "nsUrlClassifierUtils.h"
#include "nsUrlClassifierPrefixSet.h"
#endif
#ifdef MOZ_FEEDS
@ -94,6 +95,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsDownloadProxy)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsTypeAheadFind)
#ifdef MOZ_URL_CLASSIFIER
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUrlClassifierPrefixSet)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUrlClassifierStreamUpdater)
NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsUrlClassifierUtils, Init)
@ -138,6 +140,7 @@ NS_DEFINE_NAMED_CID(NS_DOWNLOAD_CID);
NS_DEFINE_NAMED_CID(NS_FIND_SERVICE_CID);
NS_DEFINE_NAMED_CID(NS_TYPEAHEADFIND_CID);
#ifdef MOZ_URL_CLASSIFIER
NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERPREFIXSET_CID);
NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERDBSERVICE_CID);
NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERSTREAMUPDATER_CID);
NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERUTILS_CID);
@ -164,6 +167,7 @@ static const mozilla::Module::CIDEntry kToolkitCIDs[] = {
{ &kNS_FIND_SERVICE_CID, false, NULL, nsFindServiceConstructor },
{ &kNS_TYPEAHEADFIND_CID, false, NULL, nsTypeAheadFindConstructor },
#ifdef MOZ_URL_CLASSIFIER
{ &kNS_URLCLASSIFIERPREFIXSET_CID, false, NULL, nsUrlClassifierPrefixSetConstructor },
{ &kNS_URLCLASSIFIERDBSERVICE_CID, false, NULL, nsUrlClassifierDBServiceConstructor },
{ &kNS_URLCLASSIFIERSTREAMUPDATER_CID, false, NULL, nsUrlClassifierStreamUpdaterConstructor },
{ &kNS_URLCLASSIFIERUTILS_CID, false, NULL, nsUrlClassifierUtilsConstructor },
@ -192,6 +196,7 @@ static const mozilla::Module::ContractIDEntry kToolkitContracts[] = {
{ NS_FIND_SERVICE_CONTRACTID, &kNS_FIND_SERVICE_CID },
{ NS_TYPEAHEADFIND_CONTRACTID, &kNS_TYPEAHEADFIND_CID },
#ifdef MOZ_URL_CLASSIFIER
{ NS_URLCLASSIFIERPREFIXSET_CONTRACTID, &kNS_URLCLASSIFIERPREFIXSET_CID },
{ NS_URLCLASSIFIERDBSERVICE_CONTRACTID, &kNS_URLCLASSIFIERDBSERVICE_CID },
{ NS_URICLASSIFIERSERVICE_CONTRACTID, &kNS_URLCLASSIFIERDBSERVICE_CID },
{ NS_URLCLASSIFIERSTREAMUPDATER_CONTRACTID, &kNS_URLCLASSIFIERSTREAMUPDATER_CID },

Просмотреть файл

@ -53,6 +53,7 @@ XPIDLSRCS = \
nsIUrlClassifierDBService.idl \
nsIUrlClassifierHashCompleter.idl \
nsIUrlClassifierStreamUpdater.idl \
nsIUrlClassifierPrefixSet.idl \
nsIUrlClassifierUtils.idl \
nsIUrlListManager.idl \
$(NULL)
@ -61,6 +62,7 @@ CPPSRCS = \
nsUrlClassifierDBService.cpp \
nsUrlClassifierStreamUpdater.cpp \
nsUrlClassifierUtils.cpp \
nsUrlClassifierPrefixSet.cpp \
nsUrlClassifierProxies.cpp \
$(NULL)

Просмотреть файл

@ -0,0 +1,12 @@
#include "nsISupports.idl"
interface nsIArray;
[scriptable, uuid(6e9f759a-3f8d-4552-99ed-dbf0ea0a5f67)]
interface nsIUrlClassifierPrefixSet : nsISupports
{
void setPrefixes([const, array, size_is(aLength)] in unsigned long aPrefixes,
in unsigned long aLength);
boolean contains(in unsigned long aPrefix);
};

Просмотреть файл

@ -24,6 +24,7 @@
* Brett Wilson <brettw@gmail.com>
* Dave Camp <dcamp@mozilla.com>
* David Dahl <ddahl@mozilla.com>
* Gian-Carlo Pascutto <gpascutto@mozilla.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or

Просмотреть файл

@ -0,0 +1,207 @@
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Url Classifier code
*
* The Initial Developer of the Original Code is
* the Mozilla Foundation.
* Portions created by the Initial Developer are Copyright (C) 2011
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Gian-Carlo Pascutto <gpascutto@mozilla.com>
* Mehdi Mulani <mars.martian+bugmail@gmail.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsAutoPtr.h"
#include "nsCOMPtr.h"
#include "nsTArray.h"
#include "nsUrlClassifierPrefixSet.h"
#include "nsIUrlClassifierPrefixSet.h"
#include "nsToolkitCompsCID.h"
#include "nsTArray.h"
#include "nsThreadUtils.h"
#include "mozilla/Mutex.h"
#include "prlog.h"
// NSPR_LOG_MODULES=UrlClassifierPrefixSet:5
#if defined(PR_LOGGING)
static const PRLogModuleInfo *gUrlClassifierPrefixSetLog = nsnull;
#define LOG(args) PR_LOG(gUrlClassifierPrefixSetLog, PR_LOG_DEBUG, args)
#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierPrefixSetLog, 4)
#else
#define LOG(args)
#define LOG_ENABLED() (PR_FALSE)
#endif
NS_IMPL_THREADSAFE_ISUPPORTS1(nsUrlClassifierPrefixSet, nsIUrlClassifierPrefixSet);
nsUrlClassifierPrefixSet::nsUrlClassifierPrefixSet()
: mHasPrefixes(PR_FALSE)
{
#if defined(PR_LOGGING)
if (!gUrlClassifierPrefixSetLog)
gUrlClassifierPrefixSetLog = PR_NewLogModule("UrlClassifierPrefixSet");
#endif
LOG(("Instantiating PrefixSet"));
}
nsresult
nsUrlClassifierPrefixSet::SetPrefixes(const PRUint32 * aArray, PRUint32 aLength)
{
if (mHasPrefixes) {
mDeltas.Clear();
mIndexPrefixes.Clear();
mIndexStarts.Clear();
mHasPrefixes = PR_FALSE;
}
if (aLength > 0) {
// Ensure they are sorted before adding
nsTArray<PRUint32> prefixes;
prefixes.AppendElements(aArray, aLength);
prefixes.Sort();
AddPrefixes(prefixes.Elements(), prefixes.Length());
}
return NS_OK;
}
nsresult
nsUrlClassifierPrefixSet::AddPrefixes(const PRUint32 * prefixes, PRUint32 aLength)
{
if (aLength == 0) {
return NS_OK;
}
mIndexPrefixes.AppendElement(prefixes[0]);
mIndexStarts.AppendElement(mDeltas.Length());
PRUint32 numOfDeltas = 0;
PRUint32 currentItem = prefixes[0];
for (PRUint32 i = 1; i < aLength; i++) {
if ((numOfDeltas >= DELTAS_LIMIT) ||
(prefixes[i] - currentItem >= MAX_INDEX_DIFF)) {
mIndexStarts.AppendElement(mDeltas.Length());
mIndexPrefixes.AppendElement(prefixes[i]);
numOfDeltas = 0;
} else {
PRUint16 delta = prefixes[i] - currentItem;
mDeltas.AppendElement(delta);
numOfDeltas++;
}
currentItem = prefixes[i];
}
mIndexPrefixes.Compact();
mIndexStarts.Compact();
mDeltas.Compact();
LOG(("Total number of indices: %d", mIndexPrefixes.Length()));
LOG(("Total number of deltas: %d", mDeltas.Length()));
mHasPrefixes = PR_TRUE;
return NS_OK;
}
PRUint32 nsUrlClassifierPrefixSet::BinSearch(PRUint32 start,
PRUint32 end,
PRUint32 target)
{
while (start != end && end >= start) {
int i = start + ((end - start) >> 1);
int value = mIndexPrefixes[i];
if (value < target) {
start = i + 1;
} else if (value > target) {
end = i - 1;
} else {
return i;
}
}
return end;
}
nsresult
nsUrlClassifierPrefixSet::Contains(PRUint32 aPrefix, PRBool * aFound)
{
*aFound = PR_FALSE;
if (!mHasPrefixes) {
return NS_OK;
}
PRUint32 target = aPrefix;
// We want to do a "Price is Right" binary search, that is, we want to find
// the index of the value either equal to the target or the closest value
// that is less than the target.
//
if (target < mIndexPrefixes[0]) {
return NS_OK;
}
// |binsearch| does not necessarily return the correct index (when the
// target is not found) but rather it returns an index at least one away
// from the correct index.
// Because of this, we need to check if the target lies before the beginning
// of the indices.
int i = BinSearch(0, mIndexPrefixes.Length() - 1, target);
if (mIndexPrefixes[i] > target && i > 0) {
i--;
}
// Now search through the deltas for the target.
PRUint32 diff = target - mIndexPrefixes[i];
PRUint32 deltaIndex = mIndexStarts[i];
PRUint32 end = (i + 1 < mIndexStarts.Length()) ? mIndexStarts[i+1]
: mDeltas.Length();
while (diff > 0 && deltaIndex < end) {
diff -= mDeltas[deltaIndex];
deltaIndex++;
}
if (diff == 0) {
*aFound = PR_TRUE;
}
return NS_OK;
}
nsresult
nsUrlClassifierPrefixSet::EstimateSize(PRUint32 * aSize)
{
*aSize = sizeof(PRBool);
if (mHasPrefixes) {
*aSize += sizeof(PRUint16) * mDeltas.Length();
*aSize += sizeof(PRUint32) * mIndexPrefixes.Length();
*aSize += sizeof(PRUint32) * mIndexStarts.Length();
}
return NS_OK;
}

Просмотреть файл

@ -0,0 +1,83 @@
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Url Classifier code
*
* The Initial Developer of the Original Code is
* the Mozilla Foundation
* Portions created by the Initial Developer are Copyright (C) 2011
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Gian-Carlo Pascutto <gpascutto@mozilla.com>
* Mehdi Mulani <mars.martian+bugmail@gmail.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsUrlClassifierPrefixSet_h_
#define nsUrlClassifierPrefixSet_h_
#include "nsISupportsUtils.h"
#include "nsID.h"
#include "nsIUrlClassifierPrefixSet.h"
#include "nsToolkitCompsCID.h"
class nsUrlClassifierPrefixSet : public nsIUrlClassifierPrefixSet
{
public:
nsUrlClassifierPrefixSet();
virtual ~nsUrlClassifierPrefixSet() {};
// Can send an empty Array to clean the tree
virtual nsresult SetPrefixes(const PRUint32*, PRUint32);
// Given prefixes must be in sorted order and bigger than
// anything currently in the Prefix Tree
virtual nsresult AddPrefixes(const PRUint32*, PRUint32);
virtual nsresult Contains(PRUint32, PRBool*);
virtual nsresult EstimateSize(PRUint32*);
NS_DECL_ISUPPORTS
protected:
static const PRUint32 DELTAS_LIMIT = 100;
static const PRUint32 MAX_INDEX_DIFF = (1 << 16);
PRUint32 BinSearch(PRUint32 start, PRUint32 end, PRUint32 target);
// boolean indicating whether |setPrefixes| has been
// called with a non-empty array.
PRBool mHasPrefixes;
// array containing deltas from indices.
nsTArray<PRUint16> mDeltas;
// the prefix for each index.
nsTArray<PRUint32> mIndexPrefixes;
// the value corresponds to the beginning of the run
// (an index in |_deltas|) for the index
nsTArray<PRUint32> mIndexStarts;
};
#endif

Просмотреть файл

@ -0,0 +1,153 @@
// newPset: returns an empty nsIUrlClassifierPrefixSet.
function newPset() {
return Cc["@mozilla.org/url-classifier/prefixset;1"]
.createInstance(Ci.nsIUrlClassifierPrefixSet);
}
// arrContains: returns true if |arr| contains the element |target|. Uses binary
// search and requires |arr| to be sorted.
function arrContains(arr, target) {
let start = 0;
let end = arr.length - 1;
let i = 0;
while (end > start) {
i = start + (end - start >> 1);
let value = arr[i];
if (value < target)
start = i+1;
else if (value > target)
end = i-1;
else
break;
}
if (start == end)
i = start;
return (!(i < 0 || i >= arr.length) && arr[i] == target);
}
// doRandomLookups: we use this to test for false membership with random input
// over the range of prefixes (unsigned 32-bits integers).
// pset: a nsIUrlClassifierPrefixSet to test.
// prefixes: an array of prefixes supposed to make up the prefix set.
// N: number of random lookups to make.
function doRandomLookups(pset, prefixes, N) {
for (let i = 0; i < N; i++) {
let randInt = prefixes[0];
while (arrContains(prefixes, randInt))
randInt = Math.floor(Math.random() * Math.pow(2, 32));
do_check_false(pset.contains(randInt));
}
}
// doExpectedLookups: we use this to test expected membership.
// pset: a nsIUrlClassifierPrefixSet to test.
// prefixes:
function doExpectedLookups(pset, prefixes, N) {
for (let i = 0; i < N; i++) {
prefixes.forEach(function (x) {
dump("Checking " + x + "\n");
do_check_true(pset.contains(x));
});
}
}
// testBasicPset: A very basic test of the prefix set to make sure that it
// exists and to give a basic example of its use.
function testBasicPset() {
let pset = Cc["@mozilla.org/url-classifier/prefixset;1"]
.createInstance(Ci.nsIUrlClassifierPrefixSet);
let prefixes = [2,100,50,2000,78000,1593203];
pset.setPrefixes(prefixes, prefixes.length);
do_check_true(pset.contains(100));
do_check_false(pset.contains(100000));
do_check_true(pset.contains(1593203));
do_check_false(pset.contains(999));
do_check_false(pset.contains(0));
}
function testDuplicates() {
let pset = Cc["@mozilla.org/url-classifier/prefixset;1"]
.createInstance(Ci.nsIUrlClassifierPrefixSet);
let prefixes = [1,1,2,2,2,3,3,3,3,3,3,5,6,6,7,7,9,9,9];
pset.setPrefixes(prefixes, prefixes.length);
do_check_true(pset.contains(1));
do_check_true(pset.contains(2));
do_check_true(pset.contains(5));
do_check_true(pset.contains(9));
do_check_false(pset.contains(4));
do_check_false(pset.contains(8));
}
function testSimplePset() {
let pset = newPset();
let prefixes = [1,2,100,400,123456789];
pset.setPrefixes(prefixes, prefixes.length);
doRandomLookups(pset, prefixes, 100);
doExpectedLookups(pset, prefixes, 1);
}
function testUnsortedPset() {
let pset = newPset();
let prefixes = [5,1,20,100,200000,100000];
pset.setPrefixes(prefixes, prefixes.length);
doRandomLookups(pset, prefixes, 100);
doExpectedLookups(pset, prefixes, 1);
}
function testReSetPrefixes() {
let pset = newPset();
let prefixes = [1, 5, 100, 1000, 150000];
pset.setPrefixes(prefixes, prefixes.length);
doExpectedLookups(pset, prefixes, 1);
let secondPrefixes = [12, 50, 300, 2000, 5000, 200000];
pset.setPrefixes(secondPrefixes, secondPrefixes.length);
doExpectedLookups(pset, secondPrefixes, 1);
for (let i = 0; i < prefixes.length; i++) {
do_check_false(pset.contains(prefixes[i]));
}
}
function testLargeSet() {
let N = 1000;
let arr = [];
for (let i = 0; i < N; i++) {
let randInt = Math.floor(Math.random() * Math.pow(2, 32));
arr.push(randInt);
}
arr.sort(function(x,y) x - y);
let pset = newPset();
pset.setPrefixes(arr, arr.length);
doExpectedLookups(pset, arr, 1);
doRandomLookups(pset, arr, 1000);
}
let tests = [testBasicPset,
testSimplePset,
testUnsortedPset,
testReSetPrefixes,
testLargeSet,
testDuplicates];
function run_test() {
// None of the tests use |executeSoon| or any sort of callbacks, so we can
// just run them in succession.
for (let i = 0; i < tests.length; i++) {
dump("Running " + tests[i].name + "\n");
tests[i]();
}
}

Просмотреть файл

@ -8,4 +8,5 @@ tail = tail_urlclassifier.js
[test_dbservice.js]
[test_hashcompleter.js]
[test_partial.js]
[test_prefixset.js]
[test_streamupdater.js]