2007-07-26 10:31:49 +04:00
|
|
|
//* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2006-06-09 22:23:10 +04:00
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
* http://www.mozilla.org/MPL/
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
|
|
* for the specific language governing rights and limitations under the
|
|
|
|
* License.
|
|
|
|
*
|
|
|
|
* The Original Code is Mozilla Effective-TLD Service
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is
|
|
|
|
* Google Inc.
|
|
|
|
* Portions created by the Initial Developer are Copyright (C) 2006
|
|
|
|
* the Initial Developer. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s):
|
|
|
|
* Pamela Greene <pamg.bugs@gmail.com> (original author)
|
2007-07-26 10:31:49 +04:00
|
|
|
* Daniel Witte <dwitte@stanford.edu>
|
2008-02-15 01:57:20 +03:00
|
|
|
* Jeff Walden <jwalden+code@mit.edu>
|
2006-06-09 22:23:10 +04:00
|
|
|
*
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
|
|
* the provisions above, a recipient may use your version of this file under
|
|
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
|
|
*
|
|
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
|
|
|
|
// This service reads a file of rules describing TLD-like domain names. For a
|
|
|
|
// complete description of the expected file format and parsing rules, see
|
|
|
|
// http://wiki.mozilla.org/Gecko:Effective_TLD_Service
|
|
|
|
|
2011-10-11 09:50:08 +04:00
|
|
|
#include "mozilla/Util.h"
|
|
|
|
|
2006-06-09 22:23:10 +04:00
|
|
|
#include "nsEffectiveTLDService.h"
|
|
|
|
#include "nsIIDNService.h"
|
|
|
|
#include "nsNetUtil.h"
|
2007-10-25 12:14:26 +04:00
|
|
|
#include "prnetdb.h"
|
2007-03-23 02:01:14 +03:00
|
|
|
|
2010-05-20 03:22:19 +04:00
|
|
|
#include "mozilla/FunctionTimer.h"
|
|
|
|
|
2011-10-11 09:50:08 +04:00
|
|
|
using namespace mozilla;
|
|
|
|
|
2006-06-09 22:23:10 +04:00
|
|
|
NS_IMPL_ISUPPORTS1(nsEffectiveTLDService, nsIEffectiveTLDService)
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------
|
|
|
|
|
2008-02-15 01:57:20 +03:00
|
|
|
static const ETLDEntry gEntries[] =
|
|
|
|
#include "etld_data.inc"
|
|
|
|
;
|
2006-06-09 22:23:10 +04:00
|
|
|
|
|
|
|
// ----------------------------------------------------------------------
|
|
|
|
|
|
|
|
nsresult
|
|
|
|
nsEffectiveTLDService::Init()
|
|
|
|
{
|
2010-05-20 03:22:19 +04:00
|
|
|
NS_TIME_FUNCTION;
|
|
|
|
|
2008-02-15 01:57:20 +03:00
|
|
|
// We'll probably have to rehash at least once, since nsTHashtable doesn't
|
|
|
|
// use a perfect hash, but at least we'll save a few rehashes along the way.
|
|
|
|
// Next optimization here is to precompute the hash using something like
|
|
|
|
// gperf, but one step at a time. :-)
|
2011-10-11 09:50:08 +04:00
|
|
|
if (!mHash.Init(ArrayLength(gEntries) - 1))
|
2007-07-26 10:31:49 +04:00
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
2007-03-23 02:01:14 +03:00
|
|
|
|
2007-10-25 12:14:26 +04:00
|
|
|
nsresult rv;
|
|
|
|
mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
|
|
|
|
if (NS_FAILED(rv)) return rv;
|
|
|
|
|
2008-02-15 01:57:20 +03:00
|
|
|
// Initialize eTLD hash from static array
|
2011-10-11 09:50:08 +04:00
|
|
|
for (PRUint32 i = 0; i < ArrayLength(gEntries) - 1; i++) {
|
2008-02-15 01:57:20 +03:00
|
|
|
#ifdef DEBUG
|
|
|
|
nsDependentCString name(gEntries[i].domain);
|
|
|
|
nsCAutoString normalizedName(gEntries[i].domain);
|
|
|
|
NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
|
|
|
|
"normalization failure!");
|
|
|
|
NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
|
|
|
|
#endif
|
|
|
|
nsDomainEntry *entry = mHash.PutEntry(gEntries[i].domain);
|
|
|
|
NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
|
|
|
|
entry->SetData(&gEntries[i]);
|
2007-07-26 10:31:49 +04:00
|
|
|
}
|
2008-02-15 01:57:20 +03:00
|
|
|
return NS_OK;
|
2007-03-23 02:01:14 +03:00
|
|
|
}
|
|
|
|
|
2007-10-25 12:14:26 +04:00
|
|
|
// External function for dealing with URI's correctly.
|
|
|
|
// Pulls out the host portion from an nsIURI, and calls through to
|
|
|
|
// GetPublicSuffixFromHost().
|
|
|
|
NS_IMETHODIMP
|
|
|
|
nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
|
|
|
|
nsACString &aPublicSuffix)
|
|
|
|
{
|
|
|
|
NS_ENSURE_ARG_POINTER(aURI);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
|
|
|
|
NS_ENSURE_ARG_POINTER(innerURI);
|
|
|
|
|
|
|
|
nsCAutoString host;
|
2007-12-05 00:57:31 +03:00
|
|
|
nsresult rv = innerURI->GetAsciiHost(host);
|
|
|
|
if (NS_FAILED(rv)) return rv;
|
2007-10-25 12:14:26 +04:00
|
|
|
|
|
|
|
return GetBaseDomainInternal(host, 0, aPublicSuffix);
|
|
|
|
}
|
|
|
|
|
|
|
|
// External function for dealing with URI's correctly.
|
|
|
|
// Pulls out the host portion from an nsIURI, and calls through to
|
|
|
|
// GetBaseDomainFromHost().
|
|
|
|
NS_IMETHODIMP
|
|
|
|
nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
|
|
|
|
PRUint32 aAdditionalParts,
|
|
|
|
nsACString &aBaseDomain)
|
|
|
|
{
|
|
|
|
NS_ENSURE_ARG_POINTER(aURI);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
|
|
|
|
NS_ENSURE_ARG_POINTER(innerURI);
|
|
|
|
|
|
|
|
nsCAutoString host;
|
2007-12-05 00:57:31 +03:00
|
|
|
nsresult rv = innerURI->GetAsciiHost(host);
|
|
|
|
if (NS_FAILED(rv)) return rv;
|
2007-10-25 12:14:26 +04:00
|
|
|
|
|
|
|
return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
|
|
|
|
}
|
|
|
|
|
|
|
|
// External function for dealing with a host string directly: finds the public
|
|
|
|
// suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
|
|
|
|
NS_IMETHODIMP
|
|
|
|
nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
|
|
|
|
nsACString &aPublicSuffix)
|
|
|
|
{
|
2007-12-05 00:57:31 +03:00
|
|
|
// Create a mutable copy of the hostname and normalize it to ACE.
|
|
|
|
// This will fail if the hostname includes invalid characters.
|
|
|
|
nsCAutoString normHostname(aHostname);
|
|
|
|
nsresult rv = NormalizeHostname(normHostname);
|
|
|
|
if (NS_FAILED(rv)) return rv;
|
|
|
|
|
|
|
|
return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
|
2007-10-25 12:14:26 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// External function for dealing with a host string directly: finds the base
|
|
|
|
// domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
|
|
|
|
// requested. See GetBaseDomainInternal().
|
2007-10-25 12:14:27 +04:00
|
|
|
NS_IMETHODIMP
|
2007-10-25 12:14:26 +04:00
|
|
|
nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
|
|
|
|
PRUint32 aAdditionalParts,
|
|
|
|
nsACString &aBaseDomain)
|
2007-10-25 12:14:27 +04:00
|
|
|
{
|
2007-12-05 00:57:31 +03:00
|
|
|
// Create a mutable copy of the hostname and normalize it to ACE.
|
|
|
|
// This will fail if the hostname includes invalid characters.
|
|
|
|
nsCAutoString normHostname(aHostname);
|
|
|
|
nsresult rv = NormalizeHostname(normHostname);
|
|
|
|
if (NS_FAILED(rv)) return rv;
|
|
|
|
|
|
|
|
return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
|
2007-10-25 12:14:26 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the base domain for a host, with requested number of additional parts.
|
|
|
|
// This will fail, generating an error, if the host is an IPv4/IPv6 address,
|
|
|
|
// if more subdomain parts are requested than are available, or if the hostname
|
|
|
|
// includes characters that are not valid in a URL. Normalization is performed
|
|
|
|
// on the host string and the result will be in UTF8.
|
|
|
|
nsresult
|
2007-12-05 00:57:31 +03:00
|
|
|
nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
|
|
|
|
PRUint32 aAdditionalParts,
|
|
|
|
nsACString &aBaseDomain)
|
2007-10-25 12:14:26 +04:00
|
|
|
{
|
|
|
|
if (aHostname.IsEmpty())
|
2010-01-12 21:29:20 +03:00
|
|
|
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
|
2007-10-25 12:14:26 +04:00
|
|
|
|
|
|
|
// chomp any trailing dot, and keep track of it for later
|
2011-09-29 10:19:26 +04:00
|
|
|
bool trailingDot = aHostname.Last() == '.';
|
2007-07-26 10:31:49 +04:00
|
|
|
if (trailingDot)
|
2007-12-05 00:57:31 +03:00
|
|
|
aHostname.Truncate(aHostname.Length() - 1);
|
2007-07-26 10:31:49 +04:00
|
|
|
|
2010-01-12 21:29:20 +03:00
|
|
|
// check the edge cases of the host being '.' or having a second trailing '.',
|
|
|
|
// since subsequent checks won't catch it.
|
|
|
|
if (aHostname.IsEmpty() || aHostname.Last() == '.')
|
|
|
|
return NS_ERROR_INVALID_ARG;
|
|
|
|
|
2007-10-25 12:14:26 +04:00
|
|
|
// Check if we're dealing with an IPv4/IPv6 hostname, and return
|
|
|
|
PRNetAddr addr;
|
2007-12-05 00:57:31 +03:00
|
|
|
PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
|
2007-10-25 12:14:26 +04:00
|
|
|
if (result == PR_SUCCESS)
|
|
|
|
return NS_ERROR_HOST_IS_IP_ADDRESS;
|
|
|
|
|
2008-02-15 01:57:20 +03:00
|
|
|
// Walk up the domain tree, most specific to least specific,
|
|
|
|
// looking for matches at each level. Note that a given level may
|
2007-07-26 10:31:49 +04:00
|
|
|
// have multiple attributes (e.g. IsWild() and IsNormal()).
|
|
|
|
const char *prevDomain = nsnull;
|
2007-12-05 00:57:31 +03:00
|
|
|
const char *currDomain = aHostname.get();
|
2007-07-26 10:31:49 +04:00
|
|
|
const char *nextDot = strchr(currDomain, '.');
|
2007-12-05 00:57:31 +03:00
|
|
|
const char *end = currDomain + aHostname.Length();
|
2007-10-25 12:14:26 +04:00
|
|
|
const char *eTLD = currDomain;
|
2007-07-26 10:31:49 +04:00
|
|
|
while (1) {
|
2010-01-12 21:29:20 +03:00
|
|
|
// sanity check the string we're about to look up: it should not begin with
|
|
|
|
// a '.'; this would mean the hostname began with a '.' or had an
|
|
|
|
// embedded '..' sequence.
|
|
|
|
if (*currDomain == '.')
|
|
|
|
return NS_ERROR_INVALID_ARG;
|
|
|
|
|
|
|
|
// perform the hash lookup.
|
2007-07-26 10:31:49 +04:00
|
|
|
nsDomainEntry *entry = mHash.GetEntry(currDomain);
|
|
|
|
if (entry) {
|
|
|
|
if (entry->IsWild() && prevDomain) {
|
|
|
|
// wildcard rules imply an eTLD one level inferior to the match.
|
2007-10-25 12:14:26 +04:00
|
|
|
eTLD = prevDomain;
|
2007-07-26 10:31:49 +04:00
|
|
|
break;
|
|
|
|
|
2007-09-03 07:03:41 +04:00
|
|
|
} else if (entry->IsNormal() || !nextDot) {
|
|
|
|
// specific match, or we've hit the top domain level
|
2007-10-25 12:14:26 +04:00
|
|
|
eTLD = currDomain;
|
2007-07-26 10:31:49 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
} else if (entry->IsException()) {
|
|
|
|
// exception rules imply an eTLD one level superior to the match.
|
2007-10-25 12:14:26 +04:00
|
|
|
eTLD = nextDot + 1;
|
2007-07-26 10:31:49 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2007-10-25 12:14:27 +04:00
|
|
|
|
2007-07-26 12:55:53 +04:00
|
|
|
if (!nextDot) {
|
2007-10-25 12:14:26 +04:00
|
|
|
// we've hit the top domain level; use it by default.
|
|
|
|
eTLD = currDomain;
|
2007-07-26 12:55:53 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2007-07-26 10:31:49 +04:00
|
|
|
prevDomain = currDomain;
|
|
|
|
currDomain = nextDot + 1;
|
|
|
|
nextDot = strchr(currDomain, '.');
|
2007-03-23 02:01:14 +03:00
|
|
|
}
|
2007-10-25 12:14:27 +04:00
|
|
|
|
2007-10-25 12:14:26 +04:00
|
|
|
// count off the number of requested domains.
|
2007-12-05 00:57:31 +03:00
|
|
|
const char *begin = aHostname.get();
|
2007-10-25 12:14:26 +04:00
|
|
|
const char *iter = eTLD;
|
|
|
|
while (1) {
|
|
|
|
if (iter == begin)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (*(--iter) == '.' && aAdditionalParts-- == 0) {
|
|
|
|
++iter;
|
|
|
|
++aAdditionalParts;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (aAdditionalParts != 0)
|
|
|
|
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
|
|
|
|
|
|
|
|
aBaseDomain = Substring(iter, end);
|
2007-07-26 10:31:49 +04:00
|
|
|
// add on the trailing dot, if applicable
|
2007-10-25 12:14:26 +04:00
|
|
|
if (trailingDot)
|
|
|
|
aBaseDomain.Append('.');
|
2007-10-25 12:14:27 +04:00
|
|
|
|
2007-07-26 10:31:49 +04:00
|
|
|
return NS_OK;
|
2007-03-23 02:01:14 +03:00
|
|
|
}
|
2007-10-25 12:14:27 +04:00
|
|
|
|
2008-02-15 01:57:20 +03:00
|
|
|
// Normalizes the given hostname, component by component. ASCII/ACE
|
|
|
|
// components are lower-cased, and UTF-8 components are normalized per
|
|
|
|
// RFC 3454 and converted to ACE.
|
2007-07-26 10:31:49 +04:00
|
|
|
nsresult
|
|
|
|
nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
|
2007-03-23 02:01:14 +03:00
|
|
|
{
|
2008-02-15 01:57:20 +03:00
|
|
|
if (!IsASCII(aHostname)) {
|
|
|
|
nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
|
2006-06-09 22:23:10 +04:00
|
|
|
if (NS_FAILED(rv))
|
|
|
|
return rv;
|
|
|
|
}
|
2007-03-23 02:01:14 +03:00
|
|
|
|
2008-02-15 01:57:20 +03:00
|
|
|
ToLowerCase(aHostname);
|
2007-07-26 10:31:49 +04:00
|
|
|
return NS_OK;
|
2007-03-23 02:01:14 +03:00
|
|
|
}
|