From a750d8eb2287ce3d4b86fd61674a65002779b5e4 Mon Sep 17 00:00:00 2001 From: Eric Rahm Date: Mon, 17 Jul 2017 16:10:18 -0700 Subject: [PATCH] Bug 1380154 - Part 2: Generate a DAFSA and use it for eTLDs. r=jduell This replaces our giant sorted array of eTLD names with a more compact DAFSA. MozReview-Commit-ID: 3zMBzUM9QUg --- netwerk/dns/nsEffectiveTLDService.cpp | 87 ++++++--------------------- netwerk/dns/nsEffectiveTLDService.h | 59 +----------------- netwerk/dns/prepare_tlds.py | 45 +++++++++----- xpcom/ds/Dafsa.h | 2 +- 4 files changed, 50 insertions(+), 143 deletions(-) diff --git a/netwerk/dns/nsEffectiveTLDService.cpp b/netwerk/dns/nsEffectiveTLDService.cpp index 745803b6fae4..91e501adbca4 100644 --- a/netwerk/dns/nsEffectiveTLDService.cpp +++ b/netwerk/dns/nsEffectiveTLDService.cpp @@ -19,6 +19,13 @@ #include "nsNetCID.h" #include "nsServiceManagerUtils.h" +namespace etld_dafsa { + +// Generated file that includes kDafsa +#include "etld_data.inc" + +} // namespace etld_dafsa + using namespace mozilla; NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, @@ -26,56 +33,11 @@ NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, // ---------------------------------------------------------------------- -#define ETLD_STR_NUM_1(line) str##line -#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) -#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__)) - -const ETLDEntry ETLDEntry::entries[] = { -#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild }, -#include "etld_data.inc" -#undef ETLD_ENTRY -}; - -const union ETLDEntry::etld_strings ETLDEntry::strings = { - { -#define ETLD_ENTRY(name, ex, wild) name, -#include "etld_data.inc" -#undef ETLD_ENTRY - } -}; - -/* static */ const ETLDEntry* -ETLDEntry::GetEntry(const char* aDomain) -{ - size_t i; - if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries), - Cmp(aDomain), &i)) { - return &entries[i]; - } - return nullptr; -} - -// Dummy function to statically ensure that our indices don't overflow -// the storage provided for them. -void -ETLDEntry::FuncForStaticAsserts(void) -{ -#define ETLD_ENTRY(name, ex, wild) \ - static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \ - "invalid strtab index"); -#include "etld_data.inc" -#undef ETLD_ENTRY -} - -#undef ETLD_ENTRY_OFFSET -#undef ETLD_STR_NUM -#undef ETLD_STR_NUM1 - -// ---------------------------------------------------------------------- - static nsEffectiveTLDService *gService = nullptr; nsEffectiveTLDService::nsEffectiveTLDService() + : mIDNService() + , mGraph(etld_dafsa::kDafsa) { } @@ -86,24 +48,6 @@ nsEffectiveTLDService::Init() mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); if (NS_FAILED(rv)) return rv; -#ifdef DEBUG - // Sanity-check the eTLD entries. - for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) { - const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName(); - nsDependentCString name(domain); - nsAutoCString normalizedName(domain); - MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)), - "normalization failure!"); - MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!"); - - // Domains must be in sorted order for binary search to work. - if (i > 0) { - const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName(); - MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!"); - } - } -#endif - MOZ_ASSERT(!gService); gService = this; RegisterWeakMemoryReporter(this); @@ -244,6 +188,9 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, int32_t aAdditionalParts, nsACString &aBaseDomain) { + const int kExceptionRule = 1; + const int kWildcardRule = 2; + if (aHostname.IsEmpty()) return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; @@ -280,19 +227,19 @@ nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, return NS_ERROR_INVALID_ARG; // Perform the lookup. - const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain); - if (entry) { - if (entry->IsWild() && prevDomain) { + const int result = mGraph.Lookup(Substring(currDomain, end)); + if (result != Dafsa::kKeyNotFound) { + if (result == kWildcardRule && prevDomain) { // wildcard rules imply an eTLD one level inferior to the match. eTLD = prevDomain; break; } - if (entry->IsNormal() || !nextDot) { + if ((result == kWildcardRule || result != kExceptionRule) || !nextDot) { // specific match, or we've hit the top domain level eTLD = currDomain; break; } - if (entry->IsException()) { + if (result == kExceptionRule) { // exception rules imply an eTLD one level superior to the match. eTLD = nextDot + 1; break; diff --git a/netwerk/dns/nsEffectiveTLDService.h b/netwerk/dns/nsEffectiveTLDService.h index 92e7abe059d6..940a987e9bae 100644 --- a/netwerk/dns/nsEffectiveTLDService.h +++ b/netwerk/dns/nsEffectiveTLDService.h @@ -12,67 +12,11 @@ #include "nsString.h" #include "nsCOMPtr.h" #include "mozilla/Attributes.h" -#include "mozilla/BinarySearch.h" +#include "mozilla/Dafsa.h" #include "mozilla/MemoryReporting.h" class nsIIDNService; -// struct for static data generated from effective_tld_names.dat -struct ETLDEntry { - friend class nsEffectiveTLDService; - -public: - bool IsNormal() const { return wild || !exception; } - bool IsException() const { return exception; } - bool IsWild() const { return wild; } - - const char* GetEffectiveTLDName() const - { - return strings.strtab + strtab_index; - } - - static const ETLDEntry* GetEntry(const char* aDomain); - - static const size_t ETLD_ENTRY_N_INDEX_BITS = 30; - - // These fields must be public to allow static construction. - uint32_t strtab_index : ETLD_ENTRY_N_INDEX_BITS; - uint32_t exception : 1; - uint32_t wild : 1; - -private: - struct Cmp { - int operator()(const ETLDEntry aEntry) const - { - return strcmp(mName, aEntry.GetEffectiveTLDName()); - } - explicit Cmp(const char* aName) : mName(aName) {} - const char* mName; - }; - -#define ETLD_STR_NUM_1(line) str##line -#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) - struct etld_string_list { -#define ETLD_ENTRY(name, ex, wild) char ETLD_STR_NUM(__LINE__)[sizeof(name)]; -#include "etld_data.inc" -#undef ETLD_ENTRY - }; - - // This static string table is all the eTLD domain names packed together. - static const union etld_strings { - struct etld_string_list list; - char strtab[1]; - } strings; - - // This is the static entries table. Each entry has an index into the string - // table. The entries are in sorted order so that binary search can be used. - static const ETLDEntry entries[]; - - void FuncForStaticAsserts(void); -#undef ETLD_STR_NUM -#undef ETLD_STR_NUM1 -}; - class nsEffectiveTLDService final : public nsIEffectiveTLDService , public nsIMemoryReporter @@ -93,6 +37,7 @@ private: ~nsEffectiveTLDService(); nsCOMPtr mIDNService; + mozilla::Dafsa mGraph; }; #endif // EffectiveTLDService_h diff --git a/netwerk/dns/prepare_tlds.py b/netwerk/dns/prepare_tlds.py index a97b20948c42..c2935c6f7b8e 100644 --- a/netwerk/dns/prepare_tlds.py +++ b/netwerk/dns/prepare_tlds.py @@ -4,6 +4,8 @@ import codecs import encodings.idna +import imp +import os import re import sys @@ -34,12 +36,7 @@ def getEffectiveTLDs(path): assert domain not in domains, \ "repeating domain %s makes no sense" % domain domains.add(domain) - entries.append(entry) - - # Sort the entries so we can use binary search on them. - entries.sort(key=EffectiveTLDEntry.domain) - - return entries + yield entry def _normalizeHostname(domain): """ @@ -103,19 +100,37 @@ class EffectiveTLDEntry: def main(output, effective_tld_filename): """ effective_tld_filename is the effective TLD file to parse. - A C++ array of { domain, exception, wild } entries representing the + A C++ array of a binary representation of a DAFSA representing the eTLD file is then printed to output. """ - def boolStr(b): - if b: - return "true" - return "false" + # Find and load the `make_dafsa.py` script under xpcom/ds. + tld_dir = os.path.dirname(effective_tld_filename) + make_dafsa_py = os.path.join(tld_dir, '../../xpcom/ds/make_dafsa.py') + sys.path.append(os.path.dirname(make_dafsa_py)) + with open(make_dafsa_py, 'r') as fh: + make_dafsa = imp.load_module('script', fh, make_dafsa_py, + ('.py', 'r', imp.PY_SOURCE)) - for etld in getEffectiveTLDs(effective_tld_filename): - exception = boolStr(etld.exception()) - wild = boolStr(etld.wild()) - output.write('ETLD_ENTRY("%s", %s, %s)\n' % (etld.domain(), exception, wild)) + def typeEnum(etld): + """ + Maps the flags to the DAFSA's enum types. + """ + if etld.exception(): + return 1 + elif etld.wild(): + return 2 + else: + return 0 + + def dafsa_words(): + """ + make_dafsa expects lines of the form "" + """ + for etld in getEffectiveTLDs(effective_tld_filename): + yield "%s%d" % (etld.domain(), typeEnum(etld)) + + output.write(make_dafsa.words_to_cxx(dafsa_words())) if __name__ == '__main__': main(sys.stdout, sys.argv[1]) diff --git a/xpcom/ds/Dafsa.h b/xpcom/ds/Dafsa.h index 4573e5f0054e..04077a39aeff 100644 --- a/xpcom/ds/Dafsa.h +++ b/xpcom/ds/Dafsa.h @@ -32,7 +32,7 @@ public: /** * Initializes the DAFSA with a binary encoding generated by `make_dafsa.py`. */ - explicit constexpr Dafsa(const Graph& aData) : mData(aData) {} + explicit Dafsa(const Graph& aData) : mData(aData) {} ~Dafsa() = default;