зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1380154 - Part 2: Generate a DAFSA and use it for eTLDs. r=jduell
This replaces our giant sorted array of eTLD names with a more compact DAFSA. MozReview-Commit-ID: 3zMBzUM9QUg
This commit is contained in:
Родитель
65313fd340
Коммит
a750d8eb22
|
@ -19,6 +19,13 @@
|
||||||
#include "nsNetCID.h"
|
#include "nsNetCID.h"
|
||||||
#include "nsServiceManagerUtils.h"
|
#include "nsServiceManagerUtils.h"
|
||||||
|
|
||||||
|
namespace etld_dafsa {
|
||||||
|
|
||||||
|
// Generated file that includes kDafsa
|
||||||
|
#include "etld_data.inc"
|
||||||
|
|
||||||
|
} // namespace etld_dafsa
|
||||||
|
|
||||||
using namespace mozilla;
|
using namespace mozilla;
|
||||||
|
|
||||||
NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
|
NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
|
||||||
|
@ -26,56 +33,11 @@ NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
#define ETLD_STR_NUM_1(line) str##line
|
|
||||||
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
|
|
||||||
#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
|
|
||||||
|
|
||||||
const ETLDEntry ETLDEntry::entries[] = {
|
|
||||||
#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
|
|
||||||
#include "etld_data.inc"
|
|
||||||
#undef ETLD_ENTRY
|
|
||||||
};
|
|
||||||
|
|
||||||
const union ETLDEntry::etld_strings ETLDEntry::strings = {
|
|
||||||
{
|
|
||||||
#define ETLD_ENTRY(name, ex, wild) name,
|
|
||||||
#include "etld_data.inc"
|
|
||||||
#undef ETLD_ENTRY
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* static */ const ETLDEntry*
|
|
||||||
ETLDEntry::GetEntry(const char* aDomain)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries),
|
|
||||||
Cmp(aDomain), &i)) {
|
|
||||||
return &entries[i];
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dummy function to statically ensure that our indices don't overflow
|
|
||||||
// the storage provided for them.
|
|
||||||
void
|
|
||||||
ETLDEntry::FuncForStaticAsserts(void)
|
|
||||||
{
|
|
||||||
#define ETLD_ENTRY(name, ex, wild) \
|
|
||||||
static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
|
|
||||||
"invalid strtab index");
|
|
||||||
#include "etld_data.inc"
|
|
||||||
#undef ETLD_ENTRY
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef ETLD_ENTRY_OFFSET
|
|
||||||
#undef ETLD_STR_NUM
|
|
||||||
#undef ETLD_STR_NUM1
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
|
|
||||||
static nsEffectiveTLDService *gService = nullptr;
|
static nsEffectiveTLDService *gService = nullptr;
|
||||||
|
|
||||||
nsEffectiveTLDService::nsEffectiveTLDService()
|
nsEffectiveTLDService::nsEffectiveTLDService()
|
||||||
|
: mIDNService()
|
||||||
|
, mGraph(etld_dafsa::kDafsa)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,24 +48,6 @@ nsEffectiveTLDService::Init()
|
||||||
mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
|
mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
|
||||||
if (NS_FAILED(rv)) return rv;
|
if (NS_FAILED(rv)) return rv;
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
// Sanity-check the eTLD entries.
|
|
||||||
for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) {
|
|
||||||
const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName();
|
|
||||||
nsDependentCString name(domain);
|
|
||||||
nsAutoCString normalizedName(domain);
|
|
||||||
MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
|
|
||||||
"normalization failure!");
|
|
||||||
MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!");
|
|
||||||
|
|
||||||
// Domains must be in sorted order for binary search to work.
|
|
||||||
if (i > 0) {
|
|
||||||
const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName();
|
|
||||||
MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
MOZ_ASSERT(!gService);
|
MOZ_ASSERT(!gService);
|
||||||
gService = this;
|
gService = this;
|
||||||
RegisterWeakMemoryReporter(this);
|
RegisterWeakMemoryReporter(this);
|
||||||
|
@ -244,6 +188,9 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
|
||||||
int32_t aAdditionalParts,
|
int32_t aAdditionalParts,
|
||||||
nsACString &aBaseDomain)
|
nsACString &aBaseDomain)
|
||||||
{
|
{
|
||||||
|
const int kExceptionRule = 1;
|
||||||
|
const int kWildcardRule = 2;
|
||||||
|
|
||||||
if (aHostname.IsEmpty())
|
if (aHostname.IsEmpty())
|
||||||
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
|
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
|
||||||
|
|
||||||
|
@ -280,19 +227,19 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
|
||||||
return NS_ERROR_INVALID_ARG;
|
return NS_ERROR_INVALID_ARG;
|
||||||
|
|
||||||
// Perform the lookup.
|
// Perform the lookup.
|
||||||
const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain);
|
const int result = mGraph.Lookup(Substring(currDomain, end));
|
||||||
if (entry) {
|
if (result != Dafsa::kKeyNotFound) {
|
||||||
if (entry->IsWild() && prevDomain) {
|
if (result == kWildcardRule && prevDomain) {
|
||||||
// wildcard rules imply an eTLD one level inferior to the match.
|
// wildcard rules imply an eTLD one level inferior to the match.
|
||||||
eTLD = prevDomain;
|
eTLD = prevDomain;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (entry->IsNormal() || !nextDot) {
|
if ((result == kWildcardRule || result != kExceptionRule) || !nextDot) {
|
||||||
// specific match, or we've hit the top domain level
|
// specific match, or we've hit the top domain level
|
||||||
eTLD = currDomain;
|
eTLD = currDomain;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (entry->IsException()) {
|
if (result == kExceptionRule) {
|
||||||
// exception rules imply an eTLD one level superior to the match.
|
// exception rules imply an eTLD one level superior to the match.
|
||||||
eTLD = nextDot + 1;
|
eTLD = nextDot + 1;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -12,67 +12,11 @@
|
||||||
#include "nsString.h"
|
#include "nsString.h"
|
||||||
#include "nsCOMPtr.h"
|
#include "nsCOMPtr.h"
|
||||||
#include "mozilla/Attributes.h"
|
#include "mozilla/Attributes.h"
|
||||||
#include "mozilla/BinarySearch.h"
|
#include "mozilla/Dafsa.h"
|
||||||
#include "mozilla/MemoryReporting.h"
|
#include "mozilla/MemoryReporting.h"
|
||||||
|
|
||||||
class nsIIDNService;
|
class nsIIDNService;
|
||||||
|
|
||||||
// struct for static data generated from effective_tld_names.dat
|
|
||||||
struct ETLDEntry {
|
|
||||||
friend class nsEffectiveTLDService;
|
|
||||||
|
|
||||||
public:
|
|
||||||
bool IsNormal() const { return wild || !exception; }
|
|
||||||
bool IsException() const { return exception; }
|
|
||||||
bool IsWild() const { return wild; }
|
|
||||||
|
|
||||||
const char* GetEffectiveTLDName() const
|
|
||||||
{
|
|
||||||
return strings.strtab + strtab_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const ETLDEntry* GetEntry(const char* aDomain);
|
|
||||||
|
|
||||||
static const size_t ETLD_ENTRY_N_INDEX_BITS = 30;
|
|
||||||
|
|
||||||
// These fields must be public to allow static construction.
|
|
||||||
uint32_t strtab_index : ETLD_ENTRY_N_INDEX_BITS;
|
|
||||||
uint32_t exception : 1;
|
|
||||||
uint32_t wild : 1;
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Cmp {
|
|
||||||
int operator()(const ETLDEntry aEntry) const
|
|
||||||
{
|
|
||||||
return strcmp(mName, aEntry.GetEffectiveTLDName());
|
|
||||||
}
|
|
||||||
explicit Cmp(const char* aName) : mName(aName) {}
|
|
||||||
const char* mName;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define ETLD_STR_NUM_1(line) str##line
|
|
||||||
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
|
|
||||||
struct etld_string_list {
|
|
||||||
#define ETLD_ENTRY(name, ex, wild) char ETLD_STR_NUM(__LINE__)[sizeof(name)];
|
|
||||||
#include "etld_data.inc"
|
|
||||||
#undef ETLD_ENTRY
|
|
||||||
};
|
|
||||||
|
|
||||||
// This static string table is all the eTLD domain names packed together.
|
|
||||||
static const union etld_strings {
|
|
||||||
struct etld_string_list list;
|
|
||||||
char strtab[1];
|
|
||||||
} strings;
|
|
||||||
|
|
||||||
// This is the static entries table. Each entry has an index into the string
|
|
||||||
// table. The entries are in sorted order so that binary search can be used.
|
|
||||||
static const ETLDEntry entries[];
|
|
||||||
|
|
||||||
void FuncForStaticAsserts(void);
|
|
||||||
#undef ETLD_STR_NUM
|
|
||||||
#undef ETLD_STR_NUM1
|
|
||||||
};
|
|
||||||
|
|
||||||
class nsEffectiveTLDService final
|
class nsEffectiveTLDService final
|
||||||
: public nsIEffectiveTLDService
|
: public nsIEffectiveTLDService
|
||||||
, public nsIMemoryReporter
|
, public nsIMemoryReporter
|
||||||
|
@ -93,6 +37,7 @@ private:
|
||||||
~nsEffectiveTLDService();
|
~nsEffectiveTLDService();
|
||||||
|
|
||||||
nsCOMPtr<nsIIDNService> mIDNService;
|
nsCOMPtr<nsIIDNService> mIDNService;
|
||||||
|
mozilla::Dafsa mGraph;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // EffectiveTLDService_h
|
#endif // EffectiveTLDService_h
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import encodings.idna
|
import encodings.idna
|
||||||
|
import imp
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -34,12 +36,7 @@ def getEffectiveTLDs(path):
|
||||||
assert domain not in domains, \
|
assert domain not in domains, \
|
||||||
"repeating domain %s makes no sense" % domain
|
"repeating domain %s makes no sense" % domain
|
||||||
domains.add(domain)
|
domains.add(domain)
|
||||||
entries.append(entry)
|
yield entry
|
||||||
|
|
||||||
# Sort the entries so we can use binary search on them.
|
|
||||||
entries.sort(key=EffectiveTLDEntry.domain)
|
|
||||||
|
|
||||||
return entries
|
|
||||||
|
|
||||||
def _normalizeHostname(domain):
|
def _normalizeHostname(domain):
|
||||||
"""
|
"""
|
||||||
|
@ -103,19 +100,37 @@ class EffectiveTLDEntry:
|
||||||
def main(output, effective_tld_filename):
|
def main(output, effective_tld_filename):
|
||||||
"""
|
"""
|
||||||
effective_tld_filename is the effective TLD file to parse.
|
effective_tld_filename is the effective TLD file to parse.
|
||||||
A C++ array of { domain, exception, wild } entries representing the
|
A C++ array of a binary representation of a DAFSA representing the
|
||||||
eTLD file is then printed to output.
|
eTLD file is then printed to output.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def boolStr(b):
|
# Find and load the `make_dafsa.py` script under xpcom/ds.
|
||||||
if b:
|
tld_dir = os.path.dirname(effective_tld_filename)
|
||||||
return "true"
|
make_dafsa_py = os.path.join(tld_dir, '../../xpcom/ds/make_dafsa.py')
|
||||||
return "false"
|
sys.path.append(os.path.dirname(make_dafsa_py))
|
||||||
|
with open(make_dafsa_py, 'r') as fh:
|
||||||
|
make_dafsa = imp.load_module('script', fh, make_dafsa_py,
|
||||||
|
('.py', 'r', imp.PY_SOURCE))
|
||||||
|
|
||||||
|
def typeEnum(etld):
|
||||||
|
"""
|
||||||
|
Maps the flags to the DAFSA's enum types.
|
||||||
|
"""
|
||||||
|
if etld.exception():
|
||||||
|
return 1
|
||||||
|
elif etld.wild():
|
||||||
|
return 2
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def dafsa_words():
|
||||||
|
"""
|
||||||
|
make_dafsa expects lines of the form "<domain_name><enum_value>"
|
||||||
|
"""
|
||||||
for etld in getEffectiveTLDs(effective_tld_filename):
|
for etld in getEffectiveTLDs(effective_tld_filename):
|
||||||
exception = boolStr(etld.exception())
|
yield "%s%d" % (etld.domain(), typeEnum(etld))
|
||||||
wild = boolStr(etld.wild())
|
|
||||||
output.write('ETLD_ENTRY("%s", %s, %s)\n' % (etld.domain(), exception, wild))
|
output.write(make_dafsa.words_to_cxx(dafsa_words()))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(sys.stdout, sys.argv[1])
|
main(sys.stdout, sys.argv[1])
|
||||||
|
|
|
@ -32,7 +32,7 @@ public:
|
||||||
/**
|
/**
|
||||||
* Initializes the DAFSA with a binary encoding generated by `make_dafsa.py`.
|
* Initializes the DAFSA with a binary encoding generated by `make_dafsa.py`.
|
||||||
*/
|
*/
|
||||||
explicit constexpr Dafsa(const Graph& aData) : mData(aData) {}
|
explicit Dafsa(const Graph& aData) : mData(aData) {}
|
||||||
|
|
||||||
~Dafsa() = default;
|
~Dafsa() = default;
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче