2012-12-12 08:58:52 +04:00
|
|
|
// Copyright 2012 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package publicsuffix
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sort"
|
|
|
|
"strings"
|
|
|
|
"testing"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestNodeLabel(t *testing.T) {
|
|
|
|
for i, want := range nodeLabels {
|
|
|
|
got := nodeLabel(uint32(i))
|
|
|
|
if got != want {
|
|
|
|
t.Errorf("%d: got %q, want %q", i, got, want)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestFind(t *testing.T) {
|
|
|
|
testCases := []string{
|
|
|
|
"",
|
|
|
|
"a",
|
|
|
|
"a0",
|
|
|
|
"aaaa",
|
|
|
|
"ao",
|
|
|
|
"ap",
|
|
|
|
"ar",
|
|
|
|
"aro",
|
|
|
|
"arp",
|
|
|
|
"arpa",
|
|
|
|
"arpaa",
|
|
|
|
"arpb",
|
|
|
|
"az",
|
|
|
|
"b",
|
|
|
|
"b0",
|
|
|
|
"ba",
|
|
|
|
"z",
|
|
|
|
"zu",
|
|
|
|
"zv",
|
|
|
|
"zw",
|
|
|
|
"zx",
|
|
|
|
"zy",
|
|
|
|
"zz",
|
|
|
|
"zzzz",
|
|
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
|
|
got := find(tc, 0, numTLD)
|
|
|
|
want := notFound
|
|
|
|
for i := uint32(0); i < numTLD; i++ {
|
|
|
|
if tc == nodeLabel(i) {
|
|
|
|
want = i
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if got != want {
|
|
|
|
t.Errorf("%q: got %d, want %d", tc, got, want)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-09 15:10:50 +04:00
|
|
|
func TestICANN(t *testing.T) {
|
|
|
|
testCases := map[string]bool{
|
|
|
|
"foo.org": true,
|
|
|
|
"foo.co.uk": true,
|
|
|
|
"foo.dyndns.org": false,
|
|
|
|
"foo.go.dyndns.org": false,
|
|
|
|
"foo.blogspot.co.uk": false,
|
|
|
|
"foo.intranet": false,
|
|
|
|
}
|
|
|
|
for domain, want := range testCases {
|
|
|
|
_, got := PublicSuffix(domain)
|
|
|
|
if got != want {
|
|
|
|
t.Errorf("%q: got %v, want %v", domain, got, want)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-12 08:58:52 +04:00
|
|
|
var publicSuffixTestCases = []struct {
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
domain string
|
|
|
|
wantPS string
|
|
|
|
wantICANN bool
|
2012-12-12 08:58:52 +04:00
|
|
|
}{
|
|
|
|
// Empty string.
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"", "", false},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
// The .ao rules are:
|
|
|
|
// ao
|
|
|
|
// ed.ao
|
|
|
|
// gv.ao
|
|
|
|
// og.ao
|
|
|
|
// co.ao
|
|
|
|
// pb.ao
|
|
|
|
// it.ao
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"ao", "ao", true},
|
|
|
|
{"www.ao", "ao", true},
|
|
|
|
{"pb.ao", "pb.ao", true},
|
|
|
|
{"www.pb.ao", "pb.ao", true},
|
|
|
|
{"www.xxx.yyy.zzz.pb.ao", "pb.ao", true},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
// The .ar rules are:
|
2013-10-24 06:49:04 +04:00
|
|
|
// ar
|
|
|
|
// com.ar
|
|
|
|
// edu.ar
|
|
|
|
// gob.ar
|
2014-07-23 10:43:57 +04:00
|
|
|
// gov.ar
|
2013-10-24 06:49:04 +04:00
|
|
|
// int.ar
|
|
|
|
// mil.ar
|
|
|
|
// net.ar
|
|
|
|
// org.ar
|
|
|
|
// tur.ar
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
// blogspot.com.ar (in the PRIVATE DOMAIN section).
|
|
|
|
{"ar", "ar", true},
|
|
|
|
{"www.ar", "ar", true},
|
|
|
|
{"nic.ar", "ar", true},
|
|
|
|
{"www.nic.ar", "ar", true},
|
|
|
|
{"com.ar", "com.ar", true},
|
|
|
|
{"www.com.ar", "com.ar", true},
|
|
|
|
{"blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
|
|
|
|
{"www.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
|
|
|
|
{"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
|
|
|
|
{"logspot.com.ar", "com.ar", true},
|
|
|
|
{"zlogspot.com.ar", "com.ar", true},
|
|
|
|
{"zblogspot.com.ar", "com.ar", true},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
// The .arpa rules are:
|
2014-07-23 10:43:57 +04:00
|
|
|
// arpa
|
2012-12-12 08:58:52 +04:00
|
|
|
// e164.arpa
|
|
|
|
// in-addr.arpa
|
|
|
|
// ip6.arpa
|
|
|
|
// iris.arpa
|
|
|
|
// uri.arpa
|
|
|
|
// urn.arpa
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"arpa", "arpa", true},
|
|
|
|
{"www.arpa", "arpa", true},
|
|
|
|
{"urn.arpa", "urn.arpa", true},
|
|
|
|
{"www.urn.arpa", "urn.arpa", true},
|
|
|
|
{"www.xxx.yyy.zzz.urn.arpa", "urn.arpa", true},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
// The relevant {kobe,kyoto}.jp rules are:
|
|
|
|
// jp
|
|
|
|
// *.kobe.jp
|
|
|
|
// !city.kobe.jp
|
|
|
|
// kyoto.jp
|
|
|
|
// ide.kyoto.jp
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"jp", "jp", true},
|
|
|
|
{"kobe.jp", "jp", true},
|
|
|
|
{"c.kobe.jp", "c.kobe.jp", true},
|
|
|
|
{"b.c.kobe.jp", "c.kobe.jp", true},
|
|
|
|
{"a.b.c.kobe.jp", "c.kobe.jp", true},
|
|
|
|
{"city.kobe.jp", "kobe.jp", true},
|
|
|
|
{"www.city.kobe.jp", "kobe.jp", true},
|
|
|
|
{"kyoto.jp", "kyoto.jp", true},
|
|
|
|
{"test.kyoto.jp", "kyoto.jp", true},
|
|
|
|
{"ide.kyoto.jp", "ide.kyoto.jp", true},
|
|
|
|
{"b.ide.kyoto.jp", "ide.kyoto.jp", true},
|
|
|
|
{"a.b.ide.kyoto.jp", "ide.kyoto.jp", true},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
2012-12-20 12:36:00 +04:00
|
|
|
// The .tw rules are:
|
|
|
|
// tw
|
|
|
|
// edu.tw
|
|
|
|
// gov.tw
|
|
|
|
// mil.tw
|
|
|
|
// com.tw
|
|
|
|
// net.tw
|
|
|
|
// org.tw
|
|
|
|
// idv.tw
|
|
|
|
// game.tw
|
|
|
|
// ebiz.tw
|
|
|
|
// club.tw
|
|
|
|
// 網路.tw (xn--zf0ao64a.tw)
|
|
|
|
// 組織.tw (xn--uc0atv.tw)
|
|
|
|
// 商業.tw (xn--czrw28b.tw)
|
|
|
|
// blogspot.tw
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"tw", "tw", true},
|
|
|
|
{"aaa.tw", "tw", true},
|
|
|
|
{"www.aaa.tw", "tw", true},
|
|
|
|
{"xn--czrw28b.aaa.tw", "tw", true},
|
|
|
|
{"edu.tw", "edu.tw", true},
|
|
|
|
{"www.edu.tw", "edu.tw", true},
|
|
|
|
{"xn--czrw28b.edu.tw", "edu.tw", true},
|
|
|
|
{"xn--czrw28b.tw", "xn--czrw28b.tw", true},
|
|
|
|
{"www.xn--czrw28b.tw", "xn--czrw28b.tw", true},
|
|
|
|
{"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw", true},
|
|
|
|
{"xn--kpry57d.tw", "tw", true},
|
2012-12-20 12:36:00 +04:00
|
|
|
|
2012-12-12 08:58:52 +04:00
|
|
|
// The .uk rules are:
|
2014-07-23 10:43:57 +04:00
|
|
|
// uk
|
|
|
|
// ac.uk
|
|
|
|
// co.uk
|
|
|
|
// gov.uk
|
|
|
|
// ltd.uk
|
|
|
|
// me.uk
|
|
|
|
// net.uk
|
|
|
|
// nhs.uk
|
|
|
|
// org.uk
|
|
|
|
// plc.uk
|
|
|
|
// police.uk
|
2012-12-12 08:58:52 +04:00
|
|
|
// *.sch.uk
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
// blogspot.co.uk (in the PRIVATE DOMAIN section).
|
|
|
|
{"uk", "uk", true},
|
|
|
|
{"aaa.uk", "uk", true},
|
|
|
|
{"www.aaa.uk", "uk", true},
|
|
|
|
{"mod.uk", "uk", true},
|
|
|
|
{"www.mod.uk", "uk", true},
|
|
|
|
{"sch.uk", "uk", true},
|
|
|
|
{"mod.sch.uk", "mod.sch.uk", true},
|
|
|
|
{"www.sch.uk", "www.sch.uk", true},
|
|
|
|
{"co.uk", "co.uk", true},
|
|
|
|
{"www.co.uk", "co.uk", true},
|
|
|
|
{"blogspot.co.uk", "blogspot.co.uk", false}, // PRIVATE DOMAIN.
|
|
|
|
{"blogspot.nic.uk", "uk", true},
|
|
|
|
{"blogspot.sch.uk", "blogspot.sch.uk", true},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
2012-12-20 12:36:00 +04:00
|
|
|
// The .рф rules are
|
|
|
|
// рф (xn--p1ai)
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"xn--p1ai", "xn--p1ai", true},
|
|
|
|
{"aaa.xn--p1ai", "xn--p1ai", true},
|
|
|
|
{"www.xxx.yyy.xn--p1ai", "xn--p1ai", true},
|
2012-12-20 12:36:00 +04:00
|
|
|
|
2017-06-14 16:40:53 +03:00
|
|
|
// The .bd rules are:
|
|
|
|
// *.bd
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"bd", "bd", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
|
|
|
|
{"www.bd", "www.bd", true},
|
|
|
|
{"xxx.www.bd", "www.bd", true},
|
|
|
|
{"zzz.bd", "zzz.bd", true},
|
|
|
|
{"www.zzz.bd", "zzz.bd", true},
|
|
|
|
{"www.xxx.yyy.zzz.bd", "zzz.bd", true},
|
|
|
|
|
|
|
|
// The .ck rules are:
|
|
|
|
// *.ck
|
|
|
|
// !www.ck
|
|
|
|
{"ck", "ck", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
|
|
|
|
{"www.ck", "ck", true},
|
|
|
|
{"xxx.www.ck", "ck", true},
|
|
|
|
{"zzz.ck", "zzz.ck", true},
|
|
|
|
{"www.zzz.ck", "zzz.ck", true},
|
|
|
|
{"www.xxx.yyy.zzz.ck", "zzz.ck", true},
|
|
|
|
|
|
|
|
// The .myjino.ru rules (in the PRIVATE DOMAIN section) are:
|
|
|
|
// myjino.ru
|
|
|
|
// *.hosting.myjino.ru
|
|
|
|
// *.landing.myjino.ru
|
|
|
|
// *.spectrum.myjino.ru
|
|
|
|
// *.vps.myjino.ru
|
|
|
|
{"myjino.ru", "myjino.ru", false},
|
|
|
|
{"aaa.myjino.ru", "myjino.ru", false},
|
|
|
|
{"bbb.ccc.myjino.ru", "myjino.ru", false},
|
|
|
|
{"hosting.ddd.myjino.ru", "myjino.ru", false},
|
|
|
|
{"landing.myjino.ru", "myjino.ru", false},
|
|
|
|
{"www.landing.myjino.ru", "www.landing.myjino.ru", false},
|
|
|
|
{"spectrum.vps.myjino.ru", "spectrum.vps.myjino.ru", false},
|
|
|
|
|
|
|
|
// The .uberspace.de rules (in the PRIVATE DOMAIN section) are:
|
|
|
|
// *.uberspace.de
|
|
|
|
{"uberspace.de", "de", true}, // "de" is in the ICANN DOMAIN section. See footnote (†).
|
|
|
|
{"aaa.uberspace.de", "aaa.uberspace.de", false},
|
|
|
|
{"bbb.ccc.uberspace.de", "ccc.uberspace.de", false},
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
// There are no .nosuchtld rules.
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
{"nosuchtld", "nosuchtld", false},
|
|
|
|
{"foo.nosuchtld", "nosuchtld", false},
|
|
|
|
{"bar.foo.nosuchtld", "nosuchtld", false},
|
|
|
|
|
|
|
|
// (†) There is some disagreement on how wildcards behave: what should the
|
|
|
|
// public suffix of "platform.sh" be when both "*.platform.sh" and "sh" is
|
|
|
|
// in the PSL, but "platform.sh" is not? Two possible answers are
|
|
|
|
// "platform.sh" and "sh", there are valid arguments for either behavior,
|
|
|
|
// and different browsers have implemented different behaviors.
|
|
|
|
//
|
|
|
|
// This implementation, Go's golang.org/x/net/publicsuffix, returns "sh",
|
|
|
|
// the same as a literal interpretation of the "Formal Algorithm" section
|
|
|
|
// of https://publicsuffix.org/list/
|
|
|
|
//
|
|
|
|
// Together, the TestPublicSuffix and TestSlowPublicSuffix tests check that
|
|
|
|
// the Go implementation (func PublicSuffix in list.go) and the literal
|
|
|
|
// interpretation (func slowPublicSuffix in list_test.go) produce the same
|
|
|
|
// (golden) results on every test case in this publicSuffixTestCases slice,
|
|
|
|
// including some "platform.sh" style cases.
|
|
|
|
//
|
|
|
|
// More discussion of "the platform.sh problem" is at:
|
|
|
|
// - https://github.com/publicsuffix/list/issues/694
|
|
|
|
// - https://bugzilla.mozilla.org/show_bug.cgi?id=1124625#c6
|
|
|
|
// - https://wiki.mozilla.org/Public_Suffix_List/platform.sh_Problem
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
|
|
|
|
go.net/publicsuffix: tighten the encoding from 8 bytes per node to 4.
On the full list (running gen.go with -subset=false):
Before, there were 6086 nodes (at 8 bytes per node) before. After,
there were 6086 nodes (at 4 bytes per node) plus 354 children entries
(at 4 bytes per node). The difference is 22928 bytes.
In comparison, the (crushed) text is 21082 bytes, and for the curious,
the longest label is 36 bytes: "xn--correios-e-telecomunicaes-ghc29a".
All 32 bits in the nodes table are used, but there's wiggle room to
accomodate future changes to effective_tld_names.dat:
The largest children index is 353 (in 9 bits, so max is 511).
The largest node type is 2 (in 2 bits, so max is 3).
The largest text offset is 21080 (in 15 bits, so max is 32767).
The largest text length is 36 (in 6 bits, so max is 63).
benchmark old ns/op new ns/op delta
BenchmarkPublicSuffix 19948 19744 -1.02%
R=dr.volker.dobler
CC=golang-dev
https://golang.org/cl/6999045
2012-12-22 05:09:13 +04:00
|
|
|
func BenchmarkPublicSuffix(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
for _, tc := range publicSuffixTestCases {
|
|
|
|
List.PublicSuffix(tc.domain)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-12 08:58:52 +04:00
|
|
|
func TestPublicSuffix(t *testing.T) {
|
|
|
|
for _, tc := range publicSuffixTestCases {
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
gotPS, gotICANN := PublicSuffix(tc.domain)
|
|
|
|
if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
|
|
|
|
t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSlowPublicSuffix(t *testing.T) {
|
|
|
|
for _, tc := range publicSuffixTestCases {
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
gotPS, gotICANN := slowPublicSuffix(tc.domain)
|
|
|
|
if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
|
|
|
|
t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
func TestNumICANNRules(t *testing.T) {
|
|
|
|
if numICANNRules <= 0 {
|
|
|
|
t.Fatal("no ICANN rules")
|
|
|
|
}
|
|
|
|
if numICANNRules >= len(rules) {
|
|
|
|
t.Fatal("no Private rules")
|
|
|
|
}
|
|
|
|
// Check the last ICANN and first Private rules. If the underlying public
|
|
|
|
// suffix list changes, we may need to update these hard-coded checks.
|
|
|
|
if got, want := rules[numICANNRules-1], "zuerich"; got != want {
|
|
|
|
t.Errorf("last ICANN rule: got %q, wawnt %q", got, want)
|
|
|
|
}
|
|
|
|
if got, want := rules[numICANNRules], "cc.ua"; got != want {
|
|
|
|
t.Errorf("first Private rule: got %q, wawnt %q", got, want)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type slowPublicSuffixRule struct {
|
|
|
|
ruleParts []string
|
|
|
|
icann bool
|
|
|
|
}
|
|
|
|
|
2012-12-12 08:58:52 +04:00
|
|
|
// slowPublicSuffix implements the canonical (but O(number of rules)) public
|
|
|
|
// suffix algorithm described at http://publicsuffix.org/list/.
|
|
|
|
//
|
|
|
|
// 1. Match domain against all rules and take note of the matching ones.
|
|
|
|
// 2. If no rules match, the prevailing rule is "*".
|
|
|
|
// 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
|
|
|
// 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
|
|
|
// 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
|
|
|
// 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
|
|
|
|
// 7. The registered or registrable domain is the public suffix plus one additional label.
|
|
|
|
//
|
|
|
|
// This function returns the public suffix, not the registrable domain, and so
|
|
|
|
// it stops after step 6.
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
func slowPublicSuffix(domain string) (string, bool) {
|
2012-12-12 08:58:52 +04:00
|
|
|
match := func(rulePart, domainPart string) bool {
|
|
|
|
switch rulePart[0] {
|
|
|
|
case '*':
|
|
|
|
return true
|
|
|
|
case '!':
|
|
|
|
return rulePart[1:] == domainPart
|
|
|
|
}
|
|
|
|
return rulePart == domainPart
|
|
|
|
}
|
|
|
|
|
|
|
|
domainParts := strings.Split(domain, ".")
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
var matchingRules []slowPublicSuffixRule
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
loop:
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
for i, rule := range rules {
|
2012-12-12 08:58:52 +04:00
|
|
|
ruleParts := strings.Split(rule, ".")
|
|
|
|
if len(domainParts) < len(ruleParts) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for i := range ruleParts {
|
|
|
|
rulePart := ruleParts[len(ruleParts)-1-i]
|
|
|
|
domainPart := domainParts[len(domainParts)-1-i]
|
|
|
|
if !match(rulePart, domainPart) {
|
|
|
|
continue loop
|
|
|
|
}
|
|
|
|
}
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
matchingRules = append(matchingRules, slowPublicSuffixRule{
|
|
|
|
ruleParts: ruleParts,
|
|
|
|
icann: i < numICANNRules,
|
|
|
|
})
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
|
|
|
if len(matchingRules) == 0 {
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
matchingRules = append(matchingRules, slowPublicSuffixRule{
|
|
|
|
ruleParts: []string{"*"},
|
|
|
|
icann: false,
|
|
|
|
})
|
2012-12-12 08:58:52 +04:00
|
|
|
} else {
|
|
|
|
sort.Sort(byPriority(matchingRules))
|
|
|
|
}
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
|
2012-12-12 08:58:52 +04:00
|
|
|
prevailing := matchingRules[0]
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
if prevailing.ruleParts[0][0] == '!' {
|
|
|
|
prevailing.ruleParts = prevailing.ruleParts[1:]
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
if prevailing.ruleParts[0][0] == '*' {
|
|
|
|
replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)]
|
|
|
|
prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...)
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
return strings.Join(prevailing.ruleParts, "."), prevailing.icann
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
|
|
|
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
type byPriority []slowPublicSuffixRule
|
2012-12-12 08:58:52 +04:00
|
|
|
|
|
|
|
func (b byPriority) Len() int { return len(b) }
|
|
|
|
func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
|
|
|
func (b byPriority) Less(i, j int) bool {
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
if b[i].ruleParts[0][0] == '!' {
|
2012-12-12 08:58:52 +04:00
|
|
|
return true
|
|
|
|
}
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
if b[j].ruleParts[0][0] == '!' {
|
2012-12-12 08:58:52 +04:00
|
|
|
return false
|
|
|
|
}
|
publicsuffix: hold icann-ness until wildcards fully match
Consider the "uberspace.de" domain. The relevant rules in the Public
Suffix List are "*.uberspace.de" (in the PRVIATE DOMAIN section) and
"de" (in the ICANN DOMAIN section).
The PublicSuffix function returns a string and a bool. Both before and
after this commit, the string returned is "de", which is correct
according to a literal interpretation of the formal algorithm. But the
bool returned, icann-ness, is false before and true after. The correct
answer is true, since the matching rule, "de", is in the ICANN DOMAIN
section of the PSL.
Before this commit, the two-stage match for "*.uberspace" set the icann
bit when matching the back part, "uberspace", before checking that the
front part, the "*" wildcard, also matched.
A couple more examples, for the "bd" and "ck" domains. The relevant
rules are "*.bd" and "*.ck", with no non-wildcard "bd" or "ck" rule.
Before this commit, the PublicSuffix function would return (icann ==
true), when the correct result is (icann == false), the same as for
"nosuchtld".
Benchmarks get worse, but correctness trumps performance. Future commits
may be able to recover some of the loss. In any case, in absolute terms,
15µs is still pretty fast.
name old time/op new time/op delta
PublicSuffix-56 11.0µs ± 0% 14.8µs ± 2% +34.57% (p=0.000 n=9+10)
Change-Id: I85ca6ab57a31308af5a29c46313197897eab5ab6
Reviewed-on: https://go-review.googlesource.com/c/154977
Reviewed-by: Nigel Tao <nigeltao@golang.org>
2018-12-19 09:27:23 +03:00
|
|
|
return len(b[i].ruleParts) > len(b[j].ruleParts)
|
2012-12-12 08:58:52 +04:00
|
|
|
}
|
|
|
|
|
2013-01-22 14:23:30 +04:00
|
|
|
// eTLDPlusOneTestCases come from
|
2015-07-14 03:38:57 +03:00
|
|
|
// https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
|
2013-01-22 14:23:30 +04:00
|
|
|
var eTLDPlusOneTestCases = []struct {
|
|
|
|
domain, want string
|
|
|
|
}{
|
|
|
|
// Empty input.
|
|
|
|
{"", ""},
|
|
|
|
// Unlisted TLD.
|
|
|
|
{"example", ""},
|
|
|
|
{"example.example", "example.example"},
|
|
|
|
{"b.example.example", "example.example"},
|
|
|
|
{"a.b.example.example", "example.example"},
|
|
|
|
// TLD with only 1 rule.
|
|
|
|
{"biz", ""},
|
|
|
|
{"domain.biz", "domain.biz"},
|
|
|
|
{"b.domain.biz", "domain.biz"},
|
|
|
|
{"a.b.domain.biz", "domain.biz"},
|
|
|
|
// TLD with some 2-level rules.
|
|
|
|
{"com", ""},
|
|
|
|
{"example.com", "example.com"},
|
|
|
|
{"b.example.com", "example.com"},
|
|
|
|
{"a.b.example.com", "example.com"},
|
|
|
|
{"uk.com", ""},
|
|
|
|
{"example.uk.com", "example.uk.com"},
|
|
|
|
{"b.example.uk.com", "example.uk.com"},
|
|
|
|
{"a.b.example.uk.com", "example.uk.com"},
|
|
|
|
{"test.ac", "test.ac"},
|
|
|
|
// TLD with only 1 (wildcard) rule.
|
2015-10-28 15:07:21 +03:00
|
|
|
{"mm", ""},
|
|
|
|
{"c.mm", ""},
|
|
|
|
{"b.c.mm", "b.c.mm"},
|
|
|
|
{"a.b.c.mm", "b.c.mm"},
|
2013-01-22 14:23:30 +04:00
|
|
|
// More complex TLD.
|
|
|
|
{"jp", ""},
|
|
|
|
{"test.jp", "test.jp"},
|
|
|
|
{"www.test.jp", "test.jp"},
|
|
|
|
{"ac.jp", ""},
|
|
|
|
{"test.ac.jp", "test.ac.jp"},
|
|
|
|
{"www.test.ac.jp", "test.ac.jp"},
|
|
|
|
{"kyoto.jp", ""},
|
|
|
|
{"test.kyoto.jp", "test.kyoto.jp"},
|
|
|
|
{"ide.kyoto.jp", ""},
|
|
|
|
{"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
|
|
|
|
{"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
|
|
|
|
{"c.kobe.jp", ""},
|
|
|
|
{"b.c.kobe.jp", "b.c.kobe.jp"},
|
|
|
|
{"a.b.c.kobe.jp", "b.c.kobe.jp"},
|
|
|
|
{"city.kobe.jp", "city.kobe.jp"},
|
|
|
|
{"www.city.kobe.jp", "city.kobe.jp"},
|
|
|
|
// TLD with a wildcard rule and exceptions.
|
2013-10-23 01:19:41 +04:00
|
|
|
{"ck", ""},
|
|
|
|
{"test.ck", ""},
|
|
|
|
{"b.test.ck", "b.test.ck"},
|
|
|
|
{"a.b.test.ck", "b.test.ck"},
|
|
|
|
{"www.ck", "www.ck"},
|
|
|
|
{"www.www.ck", "www.ck"},
|
2013-01-22 14:23:30 +04:00
|
|
|
// US K12.
|
|
|
|
{"us", ""},
|
|
|
|
{"test.us", "test.us"},
|
|
|
|
{"www.test.us", "test.us"},
|
|
|
|
{"ak.us", ""},
|
|
|
|
{"test.ak.us", "test.ak.us"},
|
|
|
|
{"www.test.ak.us", "test.ak.us"},
|
|
|
|
{"k12.ak.us", ""},
|
|
|
|
{"test.k12.ak.us", "test.k12.ak.us"},
|
|
|
|
{"www.test.k12.ak.us", "test.k12.ak.us"},
|
2014-02-18 09:43:49 +04:00
|
|
|
// Punycoded IDN labels
|
|
|
|
{"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
|
|
|
|
{"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
|
|
|
|
{"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
|
|
|
|
{"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
|
|
|
|
{"xn--55qx5d.cn", ""},
|
|
|
|
{"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
|
|
|
|
{"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
|
|
|
|
{"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
|
|
|
|
{"xn--fiqs8s", ""},
|
2019-04-17 09:15:47 +03:00
|
|
|
|
|
|
|
// Invalid input
|
|
|
|
{".", ""},
|
|
|
|
{"de.", ""},
|
|
|
|
{".de", ""},
|
|
|
|
{".com.au", ""},
|
|
|
|
{"com.au.", ""},
|
|
|
|
{"com..au", ""},
|
2013-01-22 14:23:30 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestEffectiveTLDPlusOne(t *testing.T) {
|
|
|
|
for _, tc := range eTLDPlusOneTestCases {
|
|
|
|
got, _ := EffectiveTLDPlusOne(tc.domain)
|
|
|
|
if got != tc.want {
|
|
|
|
t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|