зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1380154 - Part 2: Generate a DAFSA and use it for eTLDs. r=jduell
This replaces our giant sorted array of eTLD names with a more compact DAFSA. MozReview-Commit-ID: 3zMBzUM9QUg
This commit is contained in:
Родитель
65313fd340
Коммит
a750d8eb22
|
@ -19,6 +19,13 @@
|
|||
#include "nsNetCID.h"
|
||||
#include "nsServiceManagerUtils.h"
|
||||
|
||||
namespace etld_dafsa {
|
||||
|
||||
// Generated file that includes kDafsa
|
||||
#include "etld_data.inc"
|
||||
|
||||
} // namespace etld_dafsa
|
||||
|
||||
using namespace mozilla;
|
||||
|
||||
NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
|
||||
|
@ -26,56 +33,11 @@ NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
|
|||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
#define ETLD_STR_NUM_1(line) str##line
|
||||
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
|
||||
#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
|
||||
|
||||
const ETLDEntry ETLDEntry::entries[] = {
|
||||
#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
|
||||
#include "etld_data.inc"
|
||||
#undef ETLD_ENTRY
|
||||
};
|
||||
|
||||
const union ETLDEntry::etld_strings ETLDEntry::strings = {
|
||||
{
|
||||
#define ETLD_ENTRY(name, ex, wild) name,
|
||||
#include "etld_data.inc"
|
||||
#undef ETLD_ENTRY
|
||||
}
|
||||
};
|
||||
|
||||
/* static */ const ETLDEntry*
|
||||
ETLDEntry::GetEntry(const char* aDomain)
|
||||
{
|
||||
size_t i;
|
||||
if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries),
|
||||
Cmp(aDomain), &i)) {
|
||||
return &entries[i];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Dummy function to statically ensure that our indices don't overflow
|
||||
// the storage provided for them.
|
||||
void
|
||||
ETLDEntry::FuncForStaticAsserts(void)
|
||||
{
|
||||
#define ETLD_ENTRY(name, ex, wild) \
|
||||
static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
|
||||
"invalid strtab index");
|
||||
#include "etld_data.inc"
|
||||
#undef ETLD_ENTRY
|
||||
}
|
||||
|
||||
#undef ETLD_ENTRY_OFFSET
|
||||
#undef ETLD_STR_NUM
|
||||
#undef ETLD_STR_NUM1
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
static nsEffectiveTLDService *gService = nullptr;
|
||||
|
||||
nsEffectiveTLDService::nsEffectiveTLDService()
|
||||
: mIDNService()
|
||||
, mGraph(etld_dafsa::kDafsa)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -86,24 +48,6 @@ nsEffectiveTLDService::Init()
|
|||
mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
#ifdef DEBUG
|
||||
// Sanity-check the eTLD entries.
|
||||
for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) {
|
||||
const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName();
|
||||
nsDependentCString name(domain);
|
||||
nsAutoCString normalizedName(domain);
|
||||
MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
|
||||
"normalization failure!");
|
||||
MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!");
|
||||
|
||||
// Domains must be in sorted order for binary search to work.
|
||||
if (i > 0) {
|
||||
const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName();
|
||||
MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
MOZ_ASSERT(!gService);
|
||||
gService = this;
|
||||
RegisterWeakMemoryReporter(this);
|
||||
|
@ -244,6 +188,9 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
|
|||
int32_t aAdditionalParts,
|
||||
nsACString &aBaseDomain)
|
||||
{
|
||||
const int kExceptionRule = 1;
|
||||
const int kWildcardRule = 2;
|
||||
|
||||
if (aHostname.IsEmpty())
|
||||
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
|
||||
|
||||
|
@ -280,19 +227,19 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
|
|||
return NS_ERROR_INVALID_ARG;
|
||||
|
||||
// Perform the lookup.
|
||||
const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain);
|
||||
if (entry) {
|
||||
if (entry->IsWild() && prevDomain) {
|
||||
const int result = mGraph.Lookup(Substring(currDomain, end));
|
||||
if (result != Dafsa::kKeyNotFound) {
|
||||
if (result == kWildcardRule && prevDomain) {
|
||||
// wildcard rules imply an eTLD one level inferior to the match.
|
||||
eTLD = prevDomain;
|
||||
break;
|
||||
}
|
||||
if (entry->IsNormal() || !nextDot) {
|
||||
if ((result == kWildcardRule || result != kExceptionRule) || !nextDot) {
|
||||
// specific match, or we've hit the top domain level
|
||||
eTLD = currDomain;
|
||||
break;
|
||||
}
|
||||
if (entry->IsException()) {
|
||||
if (result == kExceptionRule) {
|
||||
// exception rules imply an eTLD one level superior to the match.
|
||||
eTLD = nextDot + 1;
|
||||
break;
|
||||
|
|
|
@ -12,67 +12,11 @@
|
|||
#include "nsString.h"
|
||||
#include "nsCOMPtr.h"
|
||||
#include "mozilla/Attributes.h"
|
||||
#include "mozilla/BinarySearch.h"
|
||||
#include "mozilla/Dafsa.h"
|
||||
#include "mozilla/MemoryReporting.h"
|
||||
|
||||
class nsIIDNService;
|
||||
|
||||
// struct for static data generated from effective_tld_names.dat
|
||||
struct ETLDEntry {
|
||||
friend class nsEffectiveTLDService;
|
||||
|
||||
public:
|
||||
bool IsNormal() const { return wild || !exception; }
|
||||
bool IsException() const { return exception; }
|
||||
bool IsWild() const { return wild; }
|
||||
|
||||
const char* GetEffectiveTLDName() const
|
||||
{
|
||||
return strings.strtab + strtab_index;
|
||||
}
|
||||
|
||||
static const ETLDEntry* GetEntry(const char* aDomain);
|
||||
|
||||
static const size_t ETLD_ENTRY_N_INDEX_BITS = 30;
|
||||
|
||||
// These fields must be public to allow static construction.
|
||||
uint32_t strtab_index : ETLD_ENTRY_N_INDEX_BITS;
|
||||
uint32_t exception : 1;
|
||||
uint32_t wild : 1;
|
||||
|
||||
private:
|
||||
struct Cmp {
|
||||
int operator()(const ETLDEntry aEntry) const
|
||||
{
|
||||
return strcmp(mName, aEntry.GetEffectiveTLDName());
|
||||
}
|
||||
explicit Cmp(const char* aName) : mName(aName) {}
|
||||
const char* mName;
|
||||
};
|
||||
|
||||
#define ETLD_STR_NUM_1(line) str##line
|
||||
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
|
||||
struct etld_string_list {
|
||||
#define ETLD_ENTRY(name, ex, wild) char ETLD_STR_NUM(__LINE__)[sizeof(name)];
|
||||
#include "etld_data.inc"
|
||||
#undef ETLD_ENTRY
|
||||
};
|
||||
|
||||
// This static string table is all the eTLD domain names packed together.
|
||||
static const union etld_strings {
|
||||
struct etld_string_list list;
|
||||
char strtab[1];
|
||||
} strings;
|
||||
|
||||
// This is the static entries table. Each entry has an index into the string
|
||||
// table. The entries are in sorted order so that binary search can be used.
|
||||
static const ETLDEntry entries[];
|
||||
|
||||
void FuncForStaticAsserts(void);
|
||||
#undef ETLD_STR_NUM
|
||||
#undef ETLD_STR_NUM1
|
||||
};
|
||||
|
||||
class nsEffectiveTLDService final
|
||||
: public nsIEffectiveTLDService
|
||||
, public nsIMemoryReporter
|
||||
|
@ -93,6 +37,7 @@ private:
|
|||
~nsEffectiveTLDService();
|
||||
|
||||
nsCOMPtr<nsIIDNService> mIDNService;
|
||||
mozilla::Dafsa mGraph;
|
||||
};
|
||||
|
||||
#endif // EffectiveTLDService_h
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
import codecs
|
||||
import encodings.idna
|
||||
import imp
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
@ -34,12 +36,7 @@ def getEffectiveTLDs(path):
|
|||
assert domain not in domains, \
|
||||
"repeating domain %s makes no sense" % domain
|
||||
domains.add(domain)
|
||||
entries.append(entry)
|
||||
|
||||
# Sort the entries so we can use binary search on them.
|
||||
entries.sort(key=EffectiveTLDEntry.domain)
|
||||
|
||||
return entries
|
||||
yield entry
|
||||
|
||||
def _normalizeHostname(domain):
|
||||
"""
|
||||
|
@ -103,19 +100,37 @@ class EffectiveTLDEntry:
|
|||
def main(output, effective_tld_filename):
|
||||
"""
|
||||
effective_tld_filename is the effective TLD file to parse.
|
||||
A C++ array of { domain, exception, wild } entries representing the
|
||||
A C++ array of a binary representation of a DAFSA representing the
|
||||
eTLD file is then printed to output.
|
||||
"""
|
||||
|
||||
def boolStr(b):
|
||||
if b:
|
||||
return "true"
|
||||
return "false"
|
||||
# Find and load the `make_dafsa.py` script under xpcom/ds.
|
||||
tld_dir = os.path.dirname(effective_tld_filename)
|
||||
make_dafsa_py = os.path.join(tld_dir, '../../xpcom/ds/make_dafsa.py')
|
||||
sys.path.append(os.path.dirname(make_dafsa_py))
|
||||
with open(make_dafsa_py, 'r') as fh:
|
||||
make_dafsa = imp.load_module('script', fh, make_dafsa_py,
|
||||
('.py', 'r', imp.PY_SOURCE))
|
||||
|
||||
def typeEnum(etld):
|
||||
"""
|
||||
Maps the flags to the DAFSA's enum types.
|
||||
"""
|
||||
if etld.exception():
|
||||
return 1
|
||||
elif etld.wild():
|
||||
return 2
|
||||
else:
|
||||
return 0
|
||||
|
||||
def dafsa_words():
|
||||
"""
|
||||
make_dafsa expects lines of the form "<domain_name><enum_value>"
|
||||
"""
|
||||
for etld in getEffectiveTLDs(effective_tld_filename):
|
||||
exception = boolStr(etld.exception())
|
||||
wild = boolStr(etld.wild())
|
||||
output.write('ETLD_ENTRY("%s", %s, %s)\n' % (etld.domain(), exception, wild))
|
||||
yield "%s%d" % (etld.domain(), typeEnum(etld))
|
||||
|
||||
output.write(make_dafsa.words_to_cxx(dafsa_words()))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.stdout, sys.argv[1])
|
||||
|
|
|
@ -32,7 +32,7 @@ public:
|
|||
/**
|
||||
* Initializes the DAFSA with a binary encoding generated by `make_dafsa.py`.
|
||||
*/
|
||||
explicit constexpr Dafsa(const Graph& aData) : mData(aData) {}
|
||||
explicit Dafsa(const Graph& aData) : mData(aData) {}
|
||||
|
||||
~Dafsa() = default;
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче