Bug 1583616 - Some numbers are searched and not autofilled. r=Standard8

The tokenizer currently thinks large numbers are "broken" IPs, thus it says they
can't be an origin. Unfortunately we use that same code path to identify possible
origin prefixes, and origins can start with a number. Thus we end up searching
rather than autofilling the origin.

For now fix the heuristic for IPs, in the future we may evaluate splitting these
code paths.

Differential Revision: https://phabricator.services.mozilla.com/D48187

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Marco Bonardo 2019-10-07 14:16:02 +00:00
Родитель cd434a9bf5
Коммит 45b356558e
3 изменённых файлов: 37 добавлений и 2 удалений

Просмотреть файл

@ -32,7 +32,8 @@ var UrlbarTokenizer = {
REGEXP_LIKE_PROTOCOL: /^[A-Z+.-]+:\/*(?!\/)/i, REGEXP_LIKE_PROTOCOL: /^[A-Z+.-]+:\/*(?!\/)/i,
REGEXP_USERINFO_INVALID_CHARS: /[^\w.~%!$&'()*+,;=:-]/, REGEXP_USERINFO_INVALID_CHARS: /[^\w.~%!$&'()*+,;=:-]/,
REGEXP_HOSTPORT_INVALID_CHARS: /[^\[\]A-Z0-9.:-]/i, REGEXP_HOSTPORT_INVALID_CHARS: /[^\[\]A-Z0-9.:-]/i,
REGEXP_HOSTPORT_IP_LIKE: /^[a-f0-9\.\[\]:]+$/i, REGEXP_SINGLE_WORD_HOST: /^[^.:]$/i,
REGEXP_HOSTPORT_IP_LIKE: /^(?=(.*[.:].*){2})[a-f0-9\.\[\]:]+$/i,
// This accepts partial IPv4. // This accepts partial IPv4.
REGEXP_HOSTPORT_INVALID_IP: /\.{2,}|\d{5,}|\d{4,}(?![:\]])|^\.|^(\d+\.){4,}\d+$|^\d{4,}$/, REGEXP_HOSTPORT_INVALID_IP: /\.{2,}|\d{5,}|\d{4,}(?![:\]])|^\.|^(\d+\.){4,}\d+$|^\d{4,}$/,
// This only accepts complete IPv4. // This only accepts complete IPv4.
@ -182,7 +183,8 @@ var UrlbarTokenizer = {
!this.REGEXP_LIKE_PROTOCOL.test(hostPort) && !this.REGEXP_LIKE_PROTOCOL.test(hostPort) &&
!this.REGEXP_USERINFO_INVALID_CHARS.test(userinfo) && !this.REGEXP_USERINFO_INVALID_CHARS.test(userinfo) &&
!this.REGEXP_HOSTPORT_INVALID_CHARS.test(hostPort) && !this.REGEXP_HOSTPORT_INVALID_CHARS.test(hostPort) &&
(!this.REGEXP_HOSTPORT_IP_LIKE.test(hostPort) || (this.REGEXP_SINGLE_WORD_HOST.test(hostPort) ||
!this.REGEXP_HOSTPORT_IP_LIKE.test(hostPort) ||
!this.REGEXP_HOSTPORT_INVALID_IP.test(hostPort)) !this.REGEXP_HOSTPORT_INVALID_IP.test(hostPort))
); );
}, },

Просмотреть файл

@ -375,6 +375,18 @@ add_task(async function test_tokenizer() {
{ value: "eXaMpLe", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, { value: "eXaMpLe", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
], ],
}, },
// This is not properly correct, an origin cannot be completely numeric,
// but we use this to check whether we should match against origins, thus
// whether an origin could start with this string.
// In the future we may evaluate reporting this as TEXT and instead
// introduce a "looksLikeStartOfOrigin".
{
desc: "plain number",
searchString: "1001",
expectedTokens: [
{ value: "1001", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
],
},
]; ];
for (let queryContext of testContexts) { for (let queryContext of testContexts) {

Просмотреть файл

@ -234,6 +234,27 @@ add_task(async function test_ip() {
} }
}); });
// host starting with large number.
add_task(async function large_number_host() {
await PlacesTestUtils.addVisits([
{
uri: "http://12345example.it:8888/",
},
]);
await check_autocomplete({
search: "1234",
completed: "http://12345example.it:8888/",
matches: [
{
value: "12345example.it:8888/",
comment: "12345example.it:8888",
style: ["autofill", "heuristic"],
},
],
});
await cleanup();
});
// When determining which origins should be autofilled, all the origins sharing // When determining which origins should be autofilled, all the origins sharing
// a host should be added together to get their combined frecency -- i.e., // a host should be added together to get their combined frecency -- i.e.,
// prefixes should be collapsed. And then from that list, the origin with the // prefixes should be collapsed. And then from that list, the origin with the