зеркало из https://github.com/mozilla/gecko-dev.git
592 строки
15 KiB
C++
592 строки
15 KiB
C++
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "LookupCache.h"
|
|
#include "HashStore.h"
|
|
#include "nsISeekableStream.h"
|
|
#include "mozilla/Telemetry.h"
|
|
#include "mozilla/Logging.h"
|
|
#include "nsNetUtil.h"
|
|
#include "prprf.h"
|
|
#include "Classifier.h"
|
|
|
|
// We act as the main entry point for all the real lookups,
|
|
// so note that those are not done to the actual HashStore.
|
|
// The latter solely exists to store the data needed to handle
|
|
// the updates from the protocol.
|
|
|
|
// This module provides a front for PrefixSet, mUpdateCompletions,
|
|
// and mGetHashCache, which together contain everything needed to
|
|
// provide a classification as long as the data is up to date.
|
|
|
|
// PrefixSet stores and provides lookups for 4-byte prefixes.
|
|
// mUpdateCompletions contains 32-byte completions which were
|
|
// contained in updates. They are retrieved from HashStore/.sbtore
|
|
// on startup.
|
|
// mGetHashCache contains 32-byte completions which were
|
|
// returned from the gethash server. They are not serialized,
|
|
// only cached until the next update.
|
|
|
|
// Name of the persistent PrefixSet storage
|
|
#define PREFIXSET_SUFFIX ".pset"
|
|
|
|
// MOZ_LOG=UrlClassifierDbService:5
|
|
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
|
|
#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
|
|
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
|
|
|
|
namespace mozilla {
|
|
namespace safebrowsing {
|
|
|
|
const int LookupCacheV2::VER = 2;
|
|
|
|
LookupCache::LookupCache(const nsACString& aTableName, nsIFile* aRootStoreDir)
|
|
: mPrimed(false)
|
|
, mTableName(aTableName)
|
|
, mRootStoreDirectory(aRootStoreDir)
|
|
{
|
|
UpdateRootDirHandle(mRootStoreDirectory);
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::Open()
|
|
{
|
|
LOG(("Loading PrefixSet"));
|
|
nsresult rv = LoadPrefixSet();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
|
|
{
|
|
nsresult rv;
|
|
|
|
if (aNewRootStoreDirectory != mRootStoreDirectory) {
|
|
rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
}
|
|
|
|
rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
|
|
mTableName,
|
|
getter_AddRefs(mStoreDirectory));
|
|
|
|
if (NS_FAILED(rv)) {
|
|
LOG(("Failed to get private store directory for %s", mTableName.get()));
|
|
mStoreDirectory = mRootStoreDirectory;
|
|
}
|
|
|
|
if (LOG_ENABLED()) {
|
|
nsString path;
|
|
mStoreDirectory->GetPath(path);
|
|
LOG(("Private store directory for %s is %s", mTableName.get(),
|
|
NS_ConvertUTF16toUTF8(path).get()));
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::Reset()
|
|
{
|
|
LOG(("LookupCache resetting"));
|
|
|
|
nsCOMPtr<nsIFile> prefixsetFile;
|
|
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = prefixsetFile->Remove(false);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
ClearAll();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
|
|
{
|
|
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
|
|
if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
|
|
mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
|
|
}
|
|
}
|
|
mGetHashCache.Sort();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
#if defined(DEBUG)
|
|
void
|
|
LookupCache::DumpCache()
|
|
{
|
|
if (!LOG_ENABLED())
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
|
|
nsAutoCString str;
|
|
mGetHashCache[i].ToHexString(str);
|
|
LOG(("Caches: %s", str.get()));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
nsresult
|
|
LookupCache::WriteFile()
|
|
{
|
|
nsCOMPtr<nsIFile> psFile;
|
|
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = StoreToFile(psFile);
|
|
NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
void
|
|
LookupCache::ClearAll()
|
|
{
|
|
ClearCache();
|
|
ClearPrefixes();
|
|
mPrimed = false;
|
|
}
|
|
|
|
void
|
|
LookupCache::ClearCache()
|
|
{
|
|
mGetHashCache.Clear();
|
|
}
|
|
|
|
/* static */ bool
|
|
LookupCache::IsCanonicalizedIP(const nsACString& aHost)
|
|
{
|
|
// The canonicalization process will have left IP addresses in dotted
|
|
// decimal with no surprises.
|
|
uint32_t i1, i2, i3, i4;
|
|
char c;
|
|
if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
|
|
&i1, &i2, &i3, &i4, &c) == 4) {
|
|
return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* static */ nsresult
|
|
LookupCache::GetLookupFragments(const nsACString& aSpec,
|
|
nsTArray<nsCString>* aFragments)
|
|
|
|
{
|
|
aFragments->Clear();
|
|
|
|
nsACString::const_iterator begin, end, iter;
|
|
aSpec.BeginReading(begin);
|
|
aSpec.EndReading(end);
|
|
|
|
iter = begin;
|
|
if (!FindCharInReadable('/', iter, end)) {
|
|
return NS_OK;
|
|
}
|
|
|
|
const nsCSubstring& host = Substring(begin, iter++);
|
|
nsAutoCString path;
|
|
path.Assign(Substring(iter, end));
|
|
|
|
/**
|
|
* From the protocol doc:
|
|
* For the hostname, the client will try at most 5 different strings. They
|
|
* are:
|
|
* a) The exact hostname of the url
|
|
* b) The 4 hostnames formed by starting with the last 5 components and
|
|
* successivly removing the leading component. The top-level component
|
|
* can be skipped. This is not done if the hostname is a numerical IP.
|
|
*/
|
|
nsTArray<nsCString> hosts;
|
|
hosts.AppendElement(host);
|
|
|
|
if (!IsCanonicalizedIP(host)) {
|
|
host.BeginReading(begin);
|
|
host.EndReading(end);
|
|
int numHostComponents = 0;
|
|
while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
|
|
numHostComponents < MAX_HOST_COMPONENTS) {
|
|
// don't bother checking toplevel domains
|
|
if (++numHostComponents >= 2) {
|
|
host.EndReading(iter);
|
|
hosts.AppendElement(Substring(end, iter));
|
|
}
|
|
end = begin;
|
|
host.BeginReading(begin);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* From the protocol doc:
|
|
* For the path, the client will also try at most 6 different strings.
|
|
* They are:
|
|
* a) the exact path of the url, including query parameters
|
|
* b) the exact path of the url, without query parameters
|
|
* c) the 4 paths formed by starting at the root (/) and
|
|
* successively appending path components, including a trailing
|
|
* slash. This behavior should only extend up to the next-to-last
|
|
* path component, that is, a trailing slash should never be
|
|
* appended that was not present in the original url.
|
|
*/
|
|
nsTArray<nsCString> paths;
|
|
nsAutoCString pathToAdd;
|
|
|
|
path.BeginReading(begin);
|
|
path.EndReading(end);
|
|
iter = begin;
|
|
if (FindCharInReadable('?', iter, end)) {
|
|
pathToAdd = Substring(begin, iter);
|
|
paths.AppendElement(pathToAdd);
|
|
end = iter;
|
|
}
|
|
|
|
int numPathComponents = 1;
|
|
iter = begin;
|
|
while (FindCharInReadable('/', iter, end) &&
|
|
numPathComponents < MAX_PATH_COMPONENTS) {
|
|
iter++;
|
|
pathToAdd.Assign(Substring(begin, iter));
|
|
paths.AppendElement(pathToAdd);
|
|
numPathComponents++;
|
|
}
|
|
|
|
// If we haven't already done so, add the full path
|
|
if (!pathToAdd.Equals(path)) {
|
|
paths.AppendElement(path);
|
|
}
|
|
// Check an empty path (for whole-domain blacklist entries)
|
|
paths.AppendElement(EmptyCString());
|
|
|
|
for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
|
|
for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
|
|
nsCString key;
|
|
key.Assign(hosts[hostIndex]);
|
|
key.Append('/');
|
|
key.Append(paths[pathIndex]);
|
|
LOG(("Checking fragment %s", key.get()));
|
|
|
|
aFragments->AppendElement(key);
|
|
}
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/* static */ nsresult
|
|
LookupCache::GetHostKeys(const nsACString& aSpec,
|
|
nsTArray<nsCString>* aHostKeys)
|
|
{
|
|
nsACString::const_iterator begin, end, iter;
|
|
aSpec.BeginReading(begin);
|
|
aSpec.EndReading(end);
|
|
|
|
iter = begin;
|
|
if (!FindCharInReadable('/', iter, end)) {
|
|
return NS_OK;
|
|
}
|
|
|
|
const nsCSubstring& host = Substring(begin, iter);
|
|
|
|
if (IsCanonicalizedIP(host)) {
|
|
nsCString *key = aHostKeys->AppendElement();
|
|
if (!key)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
key->Assign(host);
|
|
key->Append("/");
|
|
return NS_OK;
|
|
}
|
|
|
|
nsTArray<nsCString> hostComponents;
|
|
ParseString(PromiseFlatCString(host), '.', hostComponents);
|
|
|
|
if (hostComponents.Length() < 2) {
|
|
// no host or toplevel host, this won't match anything in the db
|
|
return NS_OK;
|
|
}
|
|
|
|
// First check with two domain components
|
|
int32_t last = int32_t(hostComponents.Length()) - 1;
|
|
nsCString *lookupHost = aHostKeys->AppendElement();
|
|
if (!lookupHost)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
lookupHost->Assign(hostComponents[last - 1]);
|
|
lookupHost->Append(".");
|
|
lookupHost->Append(hostComponents[last]);
|
|
lookupHost->Append("/");
|
|
|
|
// Now check with three domain components
|
|
if (hostComponents.Length() > 2) {
|
|
nsCString *lookupHost2 = aHostKeys->AppendElement();
|
|
if (!lookupHost2)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
lookupHost2->Assign(hostComponents[last - 2]);
|
|
lookupHost2->Append(".");
|
|
lookupHost2->Append(*lookupHost);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::LoadPrefixSet()
|
|
{
|
|
nsCOMPtr<nsIFile> psFile;
|
|
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
bool exists;
|
|
rv = psFile->Exists(&exists);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (exists) {
|
|
LOG(("stored PrefixSet exists, loading from disk"));
|
|
rv = LoadFromFile(psFile);
|
|
if (NS_FAILED(rv)) {
|
|
if (rv == NS_ERROR_FILE_CORRUPTED) {
|
|
Reset();
|
|
}
|
|
return rv;
|
|
}
|
|
mPrimed = true;
|
|
} else {
|
|
LOG(("no (usable) stored PrefixSet found"));
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (mPrimed) {
|
|
uint32_t size = SizeOfPrefixSet();
|
|
LOG(("SB tree done, size = %d bytes\n", size));
|
|
}
|
|
#endif
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::Init()
|
|
{
|
|
mPrefixSet = new nsUrlClassifierPrefixSet();
|
|
nsresult rv = mPrefixSet->Init(mTableName);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::Open()
|
|
{
|
|
nsresult rv = LookupCache::Open();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
LOG(("Reading Completions"));
|
|
rv = ReadCompletions();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
void
|
|
LookupCacheV2::ClearAll()
|
|
{
|
|
LookupCache::ClearAll();
|
|
mUpdateCompletions.Clear();
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::Has(const Completion& aCompletion,
|
|
bool* aHas, bool* aComplete)
|
|
{
|
|
*aHas = *aComplete = false;
|
|
|
|
uint32_t prefix = aCompletion.ToUint32();
|
|
|
|
bool found;
|
|
nsresult rv = mPrefixSet->Contains(prefix, &found);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
|
|
|
|
if (found) {
|
|
*aHas = true;
|
|
}
|
|
|
|
if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
|
|
(mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
|
|
LOG(("Complete in %s", mTableName.get()));
|
|
*aComplete = true;
|
|
*aHas = true;
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
|
|
AddCompleteArray& aAddCompletes)
|
|
{
|
|
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
|
|
static_cast<uint32_t>(aAddCompletes.Length()));
|
|
|
|
mUpdateCompletions.Clear();
|
|
mUpdateCompletions.SetCapacity(aAddCompletes.Length());
|
|
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
|
|
mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
|
|
}
|
|
aAddCompletes.Clear();
|
|
mUpdateCompletions.Sort();
|
|
|
|
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
|
|
static_cast<uint32_t>(aAddPrefixes.Length()));
|
|
|
|
nsresult rv = ConstructPrefixSet(aAddPrefixes);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
mPrimed = true;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
|
|
{
|
|
if (!mPrimed) {
|
|
// This can happen if its a new table, so no error.
|
|
LOG(("GetPrefixes from empty LookupCache"));
|
|
return NS_OK;
|
|
}
|
|
return mPrefixSet->GetPrefixesNative(aAddPrefixes);
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::ReadCompletions()
|
|
{
|
|
HashStore store(mTableName, mRootStoreDirectory);
|
|
|
|
nsresult rv = store.Open();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
mUpdateCompletions.Clear();
|
|
|
|
const AddCompleteArray& addComplete = store.AddCompletes();
|
|
for (uint32_t i = 0; i < addComplete.Length(); i++) {
|
|
mUpdateCompletions.AppendElement(addComplete[i].complete);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::ClearPrefixes()
|
|
{
|
|
return mPrefixSet->SetPrefixes(nullptr, 0);
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::StoreToFile(nsIFile* aFile)
|
|
{
|
|
return mPrefixSet->StoreToFile(aFile);
|
|
}
|
|
|
|
nsresult
|
|
LookupCacheV2::LoadFromFile(nsIFile* aFile)
|
|
{
|
|
return mPrefixSet->LoadFromFile(aFile);
|
|
}
|
|
|
|
size_t
|
|
LookupCacheV2::SizeOfPrefixSet()
|
|
{
|
|
return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
template <class T>
|
|
static void EnsureSorted(T* aArray)
|
|
{
|
|
typename T::elem_type* start = aArray->Elements();
|
|
typename T::elem_type* end = aArray->Elements() + aArray->Length();
|
|
typename T::elem_type* iter = start;
|
|
typename T::elem_type* previous = start;
|
|
|
|
while (iter != end) {
|
|
previous = iter;
|
|
++iter;
|
|
if (iter != end) {
|
|
MOZ_ASSERT(*previous <= *iter);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
nsresult
|
|
LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
|
|
{
|
|
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
|
|
|
|
nsTArray<uint32_t> array;
|
|
if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
|
|
array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
|
|
}
|
|
aAddPrefixes.Clear();
|
|
|
|
#ifdef DEBUG
|
|
// PrefixSet requires sorted order
|
|
EnsureSorted(&array);
|
|
#endif
|
|
|
|
// construct new one, replace old entries
|
|
nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
#ifdef DEBUG
|
|
uint32_t size;
|
|
size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
|
|
LOG(("SB tree done, size = %d bytes\n", size));
|
|
#endif
|
|
|
|
mPrimed = true;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
#if defined(DEBUG)
|
|
void
|
|
LookupCacheV2::DumpCompletions()
|
|
{
|
|
if (!LOG_ENABLED())
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
|
|
nsAutoCString str;
|
|
mUpdateCompletions[i].ToHexString(str);
|
|
LOG(("Update: %s", str.get()));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
} // namespace safebrowsing
|
|
} // namespace mozilla
|