Bug 1380154 - Part 2: Generate a DAFSA and use it for eTLDs. r=jduell

This replaces our giant sorted array of eTLD names with a more compact DAFSA.

MozReview-Commit-ID: 3zMBzUM9QUg
This commit is contained in:
Eric Rahm 2017-07-17 16:10:18 -07:00
Родитель 65313fd340
Коммит a750d8eb22
4 изменённых файлов: 50 добавлений и 143 удалений

Просмотреть файл

@ -19,6 +19,13 @@
#include "nsNetCID.h" #include "nsNetCID.h"
#include "nsServiceManagerUtils.h" #include "nsServiceManagerUtils.h"
namespace etld_dafsa {
// Generated file that includes kDafsa
#include "etld_data.inc"
} // namespace etld_dafsa
using namespace mozilla; using namespace mozilla;
NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
@ -26,56 +33,11 @@ NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
#define ETLD_STR_NUM_1(line) str##line
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
const ETLDEntry ETLDEntry::entries[] = {
#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
#include "etld_data.inc"
#undef ETLD_ENTRY
};
const union ETLDEntry::etld_strings ETLDEntry::strings = {
{
#define ETLD_ENTRY(name, ex, wild) name,
#include "etld_data.inc"
#undef ETLD_ENTRY
}
};
/* static */ const ETLDEntry*
ETLDEntry::GetEntry(const char* aDomain)
{
size_t i;
if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries),
Cmp(aDomain), &i)) {
return &entries[i];
}
return nullptr;
}
// Dummy function to statically ensure that our indices don't overflow
// the storage provided for them.
void
ETLDEntry::FuncForStaticAsserts(void)
{
#define ETLD_ENTRY(name, ex, wild) \
static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
"invalid strtab index");
#include "etld_data.inc"
#undef ETLD_ENTRY
}
#undef ETLD_ENTRY_OFFSET
#undef ETLD_STR_NUM
#undef ETLD_STR_NUM1
// ----------------------------------------------------------------------
static nsEffectiveTLDService *gService = nullptr; static nsEffectiveTLDService *gService = nullptr;
nsEffectiveTLDService::nsEffectiveTLDService() nsEffectiveTLDService::nsEffectiveTLDService()
: mIDNService()
, mGraph(etld_dafsa::kDafsa)
{ {
} }
@ -86,24 +48,6 @@ nsEffectiveTLDService::Init()
mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
if (NS_FAILED(rv)) return rv; if (NS_FAILED(rv)) return rv;
#ifdef DEBUG
// Sanity-check the eTLD entries.
for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) {
const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName();
nsDependentCString name(domain);
nsAutoCString normalizedName(domain);
MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
"normalization failure!");
MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!");
// Domains must be in sorted order for binary search to work.
if (i > 0) {
const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName();
MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!");
}
}
#endif
MOZ_ASSERT(!gService); MOZ_ASSERT(!gService);
gService = this; gService = this;
RegisterWeakMemoryReporter(this); RegisterWeakMemoryReporter(this);
@ -244,6 +188,9 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
int32_t aAdditionalParts, int32_t aAdditionalParts,
nsACString &aBaseDomain) nsACString &aBaseDomain)
{ {
const int kExceptionRule = 1;
const int kWildcardRule = 2;
if (aHostname.IsEmpty()) if (aHostname.IsEmpty())
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
@ -280,19 +227,19 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
return NS_ERROR_INVALID_ARG; return NS_ERROR_INVALID_ARG;
// Perform the lookup. // Perform the lookup.
const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain); const int result = mGraph.Lookup(Substring(currDomain, end));
if (entry) { if (result != Dafsa::kKeyNotFound) {
if (entry->IsWild() && prevDomain) { if (result == kWildcardRule && prevDomain) {
// wildcard rules imply an eTLD one level inferior to the match. // wildcard rules imply an eTLD one level inferior to the match.
eTLD = prevDomain; eTLD = prevDomain;
break; break;
} }
if (entry->IsNormal() || !nextDot) { if ((result == kWildcardRule || result != kExceptionRule) || !nextDot) {
// specific match, or we've hit the top domain level // specific match, or we've hit the top domain level
eTLD = currDomain; eTLD = currDomain;
break; break;
} }
if (entry->IsException()) { if (result == kExceptionRule) {
// exception rules imply an eTLD one level superior to the match. // exception rules imply an eTLD one level superior to the match.
eTLD = nextDot + 1; eTLD = nextDot + 1;
break; break;

Просмотреть файл

@ -12,67 +12,11 @@
#include "nsString.h" #include "nsString.h"
#include "nsCOMPtr.h" #include "nsCOMPtr.h"
#include "mozilla/Attributes.h" #include "mozilla/Attributes.h"
#include "mozilla/BinarySearch.h" #include "mozilla/Dafsa.h"
#include "mozilla/MemoryReporting.h" #include "mozilla/MemoryReporting.h"
class nsIIDNService; class nsIIDNService;
// struct for static data generated from effective_tld_names.dat
struct ETLDEntry {
friend class nsEffectiveTLDService;
public:
bool IsNormal() const { return wild || !exception; }
bool IsException() const { return exception; }
bool IsWild() const { return wild; }
const char* GetEffectiveTLDName() const
{
return strings.strtab + strtab_index;
}
static const ETLDEntry* GetEntry(const char* aDomain);
static const size_t ETLD_ENTRY_N_INDEX_BITS = 30;
// These fields must be public to allow static construction.
uint32_t strtab_index : ETLD_ENTRY_N_INDEX_BITS;
uint32_t exception : 1;
uint32_t wild : 1;
private:
struct Cmp {
int operator()(const ETLDEntry aEntry) const
{
return strcmp(mName, aEntry.GetEffectiveTLDName());
}
explicit Cmp(const char* aName) : mName(aName) {}
const char* mName;
};
#define ETLD_STR_NUM_1(line) str##line
#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
struct etld_string_list {
#define ETLD_ENTRY(name, ex, wild) char ETLD_STR_NUM(__LINE__)[sizeof(name)];
#include "etld_data.inc"
#undef ETLD_ENTRY
};
// This static string table is all the eTLD domain names packed together.
static const union etld_strings {
struct etld_string_list list;
char strtab[1];
} strings;
// This is the static entries table. Each entry has an index into the string
// table. The entries are in sorted order so that binary search can be used.
static const ETLDEntry entries[];
void FuncForStaticAsserts(void);
#undef ETLD_STR_NUM
#undef ETLD_STR_NUM1
};
class nsEffectiveTLDService final class nsEffectiveTLDService final
: public nsIEffectiveTLDService : public nsIEffectiveTLDService
, public nsIMemoryReporter , public nsIMemoryReporter
@ -93,6 +37,7 @@ private:
~nsEffectiveTLDService(); ~nsEffectiveTLDService();
nsCOMPtr<nsIIDNService> mIDNService; nsCOMPtr<nsIIDNService> mIDNService;
mozilla::Dafsa mGraph;
}; };
#endif // EffectiveTLDService_h #endif // EffectiveTLDService_h

Просмотреть файл

@ -4,6 +4,8 @@
import codecs import codecs
import encodings.idna import encodings.idna
import imp
import os
import re import re
import sys import sys
@ -34,12 +36,7 @@ def getEffectiveTLDs(path):
assert domain not in domains, \ assert domain not in domains, \
"repeating domain %s makes no sense" % domain "repeating domain %s makes no sense" % domain
domains.add(domain) domains.add(domain)
entries.append(entry) yield entry
# Sort the entries so we can use binary search on them.
entries.sort(key=EffectiveTLDEntry.domain)
return entries
def _normalizeHostname(domain): def _normalizeHostname(domain):
""" """
@ -103,19 +100,37 @@ class EffectiveTLDEntry:
def main(output, effective_tld_filename): def main(output, effective_tld_filename):
""" """
effective_tld_filename is the effective TLD file to parse. effective_tld_filename is the effective TLD file to parse.
A C++ array of { domain, exception, wild } entries representing the A C++ array of a binary representation of a DAFSA representing the
eTLD file is then printed to output. eTLD file is then printed to output.
""" """
def boolStr(b): # Find and load the `make_dafsa.py` script under xpcom/ds.
if b: tld_dir = os.path.dirname(effective_tld_filename)
return "true" make_dafsa_py = os.path.join(tld_dir, '../../xpcom/ds/make_dafsa.py')
return "false" sys.path.append(os.path.dirname(make_dafsa_py))
with open(make_dafsa_py, 'r') as fh:
make_dafsa = imp.load_module('script', fh, make_dafsa_py,
('.py', 'r', imp.PY_SOURCE))
def typeEnum(etld):
"""
Maps the flags to the DAFSA's enum types.
"""
if etld.exception():
return 1
elif etld.wild():
return 2
else:
return 0
def dafsa_words():
"""
make_dafsa expects lines of the form "<domain_name><enum_value>"
"""
for etld in getEffectiveTLDs(effective_tld_filename): for etld in getEffectiveTLDs(effective_tld_filename):
exception = boolStr(etld.exception()) yield "%s%d" % (etld.domain(), typeEnum(etld))
wild = boolStr(etld.wild())
output.write('ETLD_ENTRY("%s", %s, %s)\n' % (etld.domain(), exception, wild)) output.write(make_dafsa.words_to_cxx(dafsa_words()))
if __name__ == '__main__': if __name__ == '__main__':
main(sys.stdout, sys.argv[1]) main(sys.stdout, sys.argv[1])

Просмотреть файл

@ -32,7 +32,7 @@ public:
/** /**
* Initializes the DAFSA with a binary encoding generated by `make_dafsa.py`. * Initializes the DAFSA with a binary encoding generated by `make_dafsa.py`.
*/ */
explicit constexpr Dafsa(const Graph& aData) : mData(aData) {} explicit Dafsa(const Graph& aData) : mData(aData) {}
~Dafsa() = default; ~Dafsa() = default;