Bug 1398567 - Invert URIFixup default behavior to search unless the string looks like a URI. r=Gijs

With recent fixes that can properly identify whitelisted domains, whitelisted
domain suffixed, valid known public suffixes, and forcing to visit URI-like
strings that end with a slash, it's time to re-evaluate the URIFixup behavior.
Until now URIFixup considered everything a URI unless it had specific search
characteristics, this patch inverts that behavior.
The scope of this change is to improve the urlbar behavior as the main Search
Access Point, since that's the direction we're moving towards.

This lands with a temporary hidden feature pref browser.fixup.defaultToSearch,
that will be removed once the feature has been released.

Differential Revision: https://phabricator.services.mozilla.com/D76852
This commit is contained in:
Marco Bonardo 2020-05-27 16:55:14 +00:00
Родитель bec092afba
Коммит e8fd1b7c50
8 изменённых файлов: 252 добавлений и 58 удалений

Просмотреть файл

@ -115,8 +115,8 @@ function test() {
testVal("<foo.bar@:ba:z@>mozilla.org"); testVal("<foo.bar@:ba:z@>mozilla.org");
testVal("<foo.:bar:@baz@>mozilla.org"); testVal("<foo.:bar:@baz@>mozilla.org");
testVal( testVal(
"foopy:\\blah@somewhere.com//whatever", "foopy:\\blah@somewhere.com//whatever/",
"foopy</blah@somewhere.com//whatever>" "foopy</blah@somewhere.com//whatever/>"
); );
testVal("<https://sub.>mozilla.org<:666/file.ext>"); testVal("<https://sub.>mozilla.org<:666/file.ext>");
@ -151,10 +151,10 @@ function test() {
testVal(IP + "</file.ext>"); testVal(IP + "</file.ext>");
testVal(IP + "<:666/file.ext>"); testVal(IP + "<:666/file.ext>");
testVal("<https://>" + IP); testVal("<https://>" + IP);
testVal("<https://>" + IP + "</file.ext>"); testVal(`<https://>${IP}</file.ext>`);
testVal("<https://user:pass@>" + IP + "<:666/file.ext>"); testVal(`<https://user:pass@>${IP}<:666/file.ext>`);
testVal("<user:pass@>" + IP + "<:666/file.ext>"); testVal(`<user:pass@>${IP}<:666/file.ext>`);
testVal("user:\\pass@" + IP, "user</pass@" + IP + ">"); testVal(`user:\\pass@${IP}/`, `user</pass@${IP}/>`);
}); });
testVal("mailto:admin@mozilla.org"); testVal("mailto:admin@mozilla.org");

Просмотреть файл

@ -67,7 +67,12 @@ add_task(async function() {
testVal("http:// invalid url"); testVal("http:// invalid url");
testVal("http://someotherhostwithnodots"); testVal("http://someotherhostwithnodots");
testVal("http://localhost/ foo bar baz"); if (Services.prefs.getBoolPref("browser.fixup.defaultToSearch", true)) {
// This host is whitelisted, it can be trimmed.
testVal("http://localhost/ foo bar baz", "localhost/ foo bar baz");
} else {
testVal("http://localhost/ foo bar baz");
}
// This is not trimmed because it's not in the domain whitelist. // This is not trimmed because it's not in the domain whitelist.
testVal( testVal(
"http://localhost.localdomain/ foo bar baz", "http://localhost.localdomain/ foo bar baz",

Просмотреть файл

@ -44,5 +44,7 @@ add_task(async function test() {
await testVal(" test", true); await testVal(" test", true);
await testVal("test.test", true); await testVal("test.test", true);
await testVal("test test", false); await testVal("test test", false);
await testVal("test/test", false); // This may change in bug 1398567. // This is not a single word host, though it contains one. At a certain point
// we may evaluate to increase coverage of the feature to also ask for this.
await testVal("test/test", false);
}); });

Просмотреть файл

@ -798,18 +798,20 @@ add_task(async function prohibit_suggestions() {
], ],
}); });
context = createContext("test/test", { isPrivate: false }); if (!Services.prefs.getBoolPref("browser.fixup.defaultToSearch", true)) {
await check_results({ context = createContext("test/test", { isPrivate: false });
context, await check_results({
matches: [ context,
makeVisitResult(context, { matches: [
uri: "http://test/test", makeVisitResult(context, {
title: "http://test/test", uri: "http://test/test",
iconUri: "page-icon:http://test/", title: "http://test/test",
heuristic: true, iconUri: "page-icon:http://test/",
}), heuristic: true,
], }),
}); ],
});
}
context = createContext("data:text/plain,Content", { isPrivate: false }); context = createContext("data:text/plain,Content", { isPrivate: false });
await check_results({ await check_results({

Просмотреть файл

@ -12,9 +12,9 @@
* http://www.faqs.org/rfcs/rfc2396.html * http://www.faqs.org/rfcs/rfc2396.html
*/ */
// getFixupURIInfo has a complex logic, that likely could be simplified, but // TODO (Bug 1641220) getFixupURIInfo has a complex logic, that likely could be
// the risk of regressions is high, thus that should be done with care. // simplified, but the risk of regressing its behavior is high.
/* eslint complexity: ["error", 40] */ /* eslint complexity: ["error", 43] */
var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"]; var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"];
@ -58,13 +58,22 @@ XPCOMUtils.defineLazyPreferenceGetter(
"keyword.enabled", "keyword.enabled",
true true
); );
XPCOMUtils.defineLazyPreferenceGetter( XPCOMUtils.defineLazyPreferenceGetter(
this, this,
"alternateEnabled", "alternateEnabled",
"browser.fixup.alternate.enabled", "browser.fixup.alternate.enabled",
true true
); );
// This is a feature preference that inverts the keyword fixup behavior to
// search by default, unless the string has URI characteristics.
// When set to false, we'll consider most strings URIs, unless they have search
// characteristics.
XPCOMUtils.defineLazyPreferenceGetter(
this,
"defaultToSearch",
"browser.fixup.defaultToSearch",
true
);
const { const {
FIXUP_FLAG_NONE, FIXUP_FLAG_NONE,
@ -88,6 +97,13 @@ XPCOMUtils.defineLazyGetter(
// Regex used to look for ascii alphabetical characters. // Regex used to look for ascii alphabetical characters.
XPCOMUtils.defineLazyGetter(this, "asciiAlphaRegex", () => /[a-z]/i); XPCOMUtils.defineLazyGetter(this, "asciiAlphaRegex", () => /[a-z]/i);
// Regex used to identify specific URI characteristics to disallow searching.
XPCOMUtils.defineLazyGetter(
this,
"uriLikeRegex",
() => /(:\d{1,5}([?#/]|$)|\/.*[?#])/
);
// Regex used to identify numbers. // Regex used to identify numbers.
XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/); XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/);
@ -397,11 +413,24 @@ URIFixup.prototype = {
} }
} }
// Memoize the public suffix check, since it may be expensive and should
// only run once when necessary.
let suffixInfo;
function checkSuffix(info) {
if (!suffixInfo) {
suffixInfo = checkAndFixPublicSuffix(info);
}
return suffixInfo;
}
// See if it is a keyword and whether a keyword must be fixed up. // See if it is a keyword and whether a keyword must be fixed up.
if ( if (
keywordEnabled && keywordEnabled &&
fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP &&
!inputHadDuffProtocol && !inputHadDuffProtocol &&
// When defaultToSearch is false, we are conservative about always going
// through keywordURIFixup.
(!defaultToSearch || !checkSuffix(info).suffix) &&
keywordURIFixup(uriString, info, isPrivateContext, postData) keywordURIFixup(uriString, info, isPrivateContext, postData)
) { ) {
return info; return info;
@ -409,13 +438,13 @@ URIFixup.prototype = {
if ( if (
info.fixedURI && info.fixedURI &&
(!info.fixupChangedProtocol || checkAndFixPublicSuffix(info)) (!info.fixupChangedProtocol || !checkSuffix(info).hasUnknownSuffix)
) { ) {
return info; return info;
} }
// If we still haven't been able to construct a valid URI, try to force a // If we still haven't been able to construct a valid URI, try to force a
// keyword match. This catches search strings with '.' or ':' in them. // keyword match.
if (keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) { if (keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) {
tryKeywordFixupForURIInfo( tryKeywordFixupForURIInfo(
info.originalInput, info.originalInput,
@ -640,38 +669,49 @@ function isDomainWhitelisted(asciiHost) {
* If the suffix is unknown due to a typo this will try to fix it up. * If the suffix is unknown due to a typo this will try to fix it up.
* @param {URIFixupInfo} info about the uri to check. * @param {URIFixupInfo} info about the uri to check.
* @note this may modify the public suffix of info.fixedURI. * @note this may modify the public suffix of info.fixedURI.
* @returns {boolean} false if the public suffix is unknown, true in any other * @returns {object} result The lookup result.
* case, included if it's not present. * @returns {string} result.suffix The public suffix if one can be identified.
* @returns {boolean} result.hasUnknownSuffix True when the suffix is not in the
* Public Suffix List and it's not whitelisted. False in the other cases.
*/ */
function checkAndFixPublicSuffix(info) { function checkAndFixPublicSuffix(info) {
let uri = info.fixedURI; let uri = info.fixedURI;
let asciiHost = uri.asciiHost; let asciiHost = uri?.asciiHost;
if ( if (
!asciiHost || !asciiHost ||
// Quick bailouts for most common cases, according to Alexa Top 1 million.
asciiHost.endsWith(".com") ||
asciiHost.endsWith(".net") ||
asciiHost.endsWith(".org") ||
asciiHost.endsWith(".ru") ||
asciiHost.endsWith(".de") ||
!asciiHost.includes(".") || !asciiHost.includes(".") ||
asciiHost.endsWith(".") || asciiHost.endsWith(".") ||
isDomainWhitelisted(asciiHost) isDomainWhitelisted(asciiHost)
) { ) {
return true; return { suffix: "", hasUnknownSuffix: false };
}
// Quick bailouts for most common cases, according to Alexa Top 1 million.
if (
asciiHost.endsWith(".com") ||
asciiHost.endsWith(".net") ||
asciiHost.endsWith(".org") ||
asciiHost.endsWith(".ru") ||
asciiHost.endsWith(".de")
) {
return {
suffix: asciiHost.substring(asciiHost.lastIndexOf(".") + 1),
hasUnknownSuffix: false,
};
} }
try { try {
if (Services.eTLD.getKnownPublicSuffix(uri)) { let suffix = Services.eTLD.getKnownPublicSuffix(uri);
return true; if (suffix) {
return { suffix, hasUnknownSuffix: false };
} }
} catch (ex) { } catch (ex) {
return true; return { suffix: "", hasUnknownSuffix: false };
} }
// Suffix is unknown, try to fix most common 3 chars TLDs typos. // Suffix is unknown, try to fix most common 3 chars TLDs typos.
// .com is the most commonly mistyped tld, so it has more cases. // .com is the most commonly mistyped tld, so it has more cases.
let suffix = Services.eTLD.getPublicSuffix(uri); let suffix = Services.eTLD.getPublicSuffix(uri);
if (!suffix || numberRegex.test(suffix)) { if (!suffix || numberRegex.test(suffix)) {
return true; return { suffix: "", hasUnknownSuffix: false };
} }
for (let [typo, fixed] of [ for (let [typo, fixed] of [
["ocm", "com"], ["ocm", "com"],
@ -700,10 +740,10 @@ function checkAndFixPublicSuffix(info) {
if (updatePreferredURI) { if (updatePreferredURI) {
info.preferredURI = info.fixedURI; info.preferredURI = info.fixedURI;
} }
return true; return { suffix: fixed, hasUnknownSuffix: false };
} }
} }
return false; return { suffix: "", hasUnknownSuffix: true };
} }
function tryKeywordFixupForURIInfo( function tryKeywordFixupForURIInfo(
@ -864,6 +904,91 @@ function fixupURIProtocol(uriString) {
* @returns {boolean} Whether the keyword fixup was succesful. * @returns {boolean} Whether the keyword fixup was succesful.
*/ */
function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) { function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) {
if (!defaultToSearch) {
return keywordURIFixupLegacy(
uriString,
fixupInfo,
isPrivateContext,
postData
);
}
// Here is a few examples of strings that should be searched:
// "what is mozilla"
// "what is mozilla?"
// "docshell site:mozilla.org" - has a space in the origin part
// "?site:mozilla.org - anything that begins with a question mark
// "mozilla'.org" - Things that have a quote before the first dot/colon
// "mozilla/test" - non whiteliste host
// ".mozilla", "mozilla." - starts or ends with a dot ()
// These other strings should not be searched, because they could be URIs:
// "www.blah.com" - Domain with a known or whitelisted suffix
// "whitelisted" - Whitelisted domain
// "nonQualifiedHost:8888?something" - has a port
// "user@nonQualifiedHost"
// "blah.com."
// We do keyword lookups if the input starts with a question mark.
if (uriString.startsWith("?")) {
return tryKeywordFixupForURIInfo(
fixupInfo.originalInput,
fixupInfo,
isPrivateContext,
postData
);
}
// Check for IPs.
if (IPv4LikeRegex.test(uriString) || IPv6LikeRegex.test(uriString)) {
return false;
}
// Avoid lookup if we can identify a host and it's whitelisted, or ends with
// a dot and has some path.
// Note that if dnsFirstForSingleWords is true isDomainWhitelisted will always
// return true, so we can avoid checking dnsFirstForSingleWords after this.
let asciiHost = fixupInfo.fixedURI?.asciiHost;
if (
asciiHost &&
(isDomainWhitelisted(asciiHost) ||
(asciiHost.endsWith(".") &&
asciiHost.indexOf(".") != asciiHost.length - 1))
) {
return false;
}
// Even if the host is invalid, avoid lookup if the string has uri-like
// characteristics.
// Also avoid lookup if there's a valid userPass. We only check for spaces,
// the URI parser has encoded any disallowed chars at this point, but if the
// user typed spaces before the first @, it's unlikely a valid userPass, plus
// some urlbar features use the @ char and we don't want to break them.
let userPass = fixupInfo.fixedURI?.userPass;
if (
!uriLikeRegex.test(uriString) &&
!(userPass && /^[^\s@]+@/.test(uriString))
) {
return tryKeywordFixupForURIInfo(
fixupInfo.originalInput,
fixupInfo,
isPrivateContext,
postData
);
}
return false;
}
/**
* This is the old version of keywordURIFixup, used when
* browser.fixup.defaultToSearch is false
*/
function keywordURIFixupLegacy(
uriString,
fixupInfo,
isPrivateContext,
postData
) {
// These are keyword formatted strings // These are keyword formatted strings
// "what is mozilla" // "what is mozilla"
// "what is mozilla?" // "what is mozilla?"
@ -902,7 +1027,7 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) {
); );
} }
// Avoid lookup if we can identify an host and it's whitelisted. // Avoid lookup if we can identify a host and it's whitelisted.
// Note that if dnsFirstForSingleWords is true isDomainWhitelisted will always // Note that if dnsFirstForSingleWords is true isDomainWhitelisted will always
// return true, so we can avoid checking dnsFirstForSingleWords after this. // return true, so we can avoid checking dnsFirstForSingleWords after this.
let asciiHost = fixupInfo.fixedURI?.asciiHost; let asciiHost = fixupInfo.fixedURI?.asciiHost;
@ -949,7 +1074,7 @@ function keywordURIFixup(uriString, fixupInfo, isPrivateContext, postData) {
} }
// Keyword lookup if there is no dot and the string doesn't include a slash, // Keyword lookup if there is no dot and the string doesn't include a slash,
// or any alphabetical character or an host. // or any alphabetical character or a host.
if ( if (
firstDotIndex == -1 && firstDotIndex == -1 &&
(!uriString.includes("/") || !hasAsciiAlpha || !asciiHost) (!uriString.includes("/") || !hasAsciiAlpha || !asciiHost)

Просмотреть файл

@ -55,6 +55,10 @@ var data = [
var len = data.length; var len = data.length;
add_task(async function setup() {
await Services.search.init();
});
// Make sure we fix what needs fixing when there is no pref set. // Make sure we fix what needs fixing when there is no pref set.
add_task(function test_unset_pref_fixes_typos() { add_task(function test_unset_pref_fixes_typos() {
Services.prefs.clearUserPref(pref); Services.prefs.clearUserPref(pref);

Просмотреть файл

@ -33,6 +33,10 @@ var flagInputs = [
Services.uriFixup.FIXUP_FLAG_PRIVATE_CONTEXT, Services.uriFixup.FIXUP_FLAG_PRIVATE_CONTEXT,
]; ];
const kDefaultToSearch = Services.prefs.getBoolPref(
"browser.fixup.defaultToSearch",
true
);
/* /*
The following properties are supported for these test cases: The following properties are supported for these test cases:
{ {
@ -248,9 +252,9 @@ var testcases = [
affectedByDNSForSingleWordHosts: true, affectedByDNSForSingleWordHosts: true,
}, },
{ {
input: "host/foo.txt", input: "whitelisted/foo.txt",
fixedURI: "http://host/foo.txt", fixedURI: "http://whitelisted/foo.txt",
alternateURI: "http://www.host.com/foo.txt", alternateURI: "http://www.whitelisted.com/foo.txt",
protocolChange: true, protocolChange: true,
}, },
{ {
@ -512,11 +516,12 @@ var testcases = [
alternateURI: "http://www.'.com/?", alternateURI: "http://www.'.com/?",
keywordLookup: true, keywordLookup: true,
protocolChange: true, protocolChange: true,
affectedByDNSForSingleWordHosts: kDefaultToSearch,
}, },
{ {
input: "a?.com", input: "whitelisted?.com",
fixedURI: "http://a/?.com", fixedURI: "http://whitelisted/?.com",
alternateURI: "http://www.a.com/?.com", alternateURI: "http://www.whitelisted.com/?.com",
protocolChange: true, protocolChange: true,
}, },
{ {
@ -554,12 +559,16 @@ var testcases = [
fixedURI: "http://mozilla5/2", fixedURI: "http://mozilla5/2",
alternateURI: "http://www.mozilla5.com/2", alternateURI: "http://www.mozilla5.com/2",
protocolChange: true, protocolChange: true,
keywordLookup: kDefaultToSearch,
affectedByDNSForSingleWordHosts: kDefaultToSearch,
}, },
{ {
input: "mozilla/foo", input: "mozilla/foo",
fixedURI: "http://mozilla/foo", fixedURI: "http://mozilla/foo",
alternateURI: "http://www.mozilla.com/foo", alternateURI: "http://www.mozilla.com/foo",
protocolChange: true, protocolChange: true,
keywordLookup: kDefaultToSearch,
affectedByDNSForSingleWordHosts: kDefaultToSearch,
}, },
{ {
input: "mozilla\\", input: "mozilla\\",
@ -586,6 +595,28 @@ var testcases = [
fixedURI: "http://plonk:8080/", fixedURI: "http://plonk:8080/",
protocolChange: true, protocolChange: true,
}, },
{
input: "plonk:8080?test",
fixedURI: "http://plonk:8080/?test",
protocolChange: true,
},
{
input: "plonk:8080#test",
fixedURI: "http://plonk:8080/#test",
protocolChange: true,
},
{
input: "plonk/ #",
fixedURI: "http://plonk/%20#",
alternateURI: "http://www.plonk.com/%20#",
protocolChange: true,
keywordLookup: !kDefaultToSearch,
},
{
input: "blah.com.",
fixedURI: "http://blah.com./",
protocolChange: true,
},
{ {
input: input:
"\u10E0\u10D4\u10D2\u10D8\u10E1\u10E2\u10E0\u10D0\u10EA\u10D8\u10D0.\u10D2\u10D4", "\u10E0\u10D4\u10D2\u10D8\u10E1\u10E2\u10E0\u10D0\u10EA\u10D8\u10D0.\u10D2\u10D4",
@ -633,6 +664,16 @@ if (AppConstants.platform == "win") {
alternateURI: "http://www.mozilla.com/", alternateURI: "http://www.mozilla.com/",
protocolChange: true, protocolChange: true,
}); });
if (kDefaultToSearch) {
testcases.push({
input: "/a",
fixedURI: "http://a/",
alternateURI: "http://www.a.com/",
keywordLookup: true,
protocolChange: true,
affectedByDNSForSingleWordHosts: true,
});
}
} else { } else {
testcases.push({ testcases.push({
input: "/some/file.txt", input: "/some/file.txt",
@ -644,6 +685,11 @@ if (AppConstants.platform == "win") {
fixedURI: "file:////mozilla", fixedURI: "file:////mozilla",
protocolChange: true, protocolChange: true,
}); });
testcases.push({
input: "/a",
fixedURI: "file:///a",
protocolChange: true,
});
} }
function sanitize(input) { function sanitize(input) {

Просмотреть файл

@ -182,14 +182,24 @@ add_task(async function() {
matches: [makeSearchMatch("firefox", { heuristic: true })], matches: [makeSearchMatch("firefox", { heuristic: true })],
}); });
info("url with non-whitelisted host"); info("string with non-whitelisted host");
await check_autocomplete({ if (Services.prefs.getBoolPref("browser.fixup.defaultToSearch", true)) {
search: "firefox/get", await check_autocomplete({
searchParam: "enable-actions", search: "firefox/get",
matches: [ searchParam: "enable-actions",
makeVisitMatch("firefox/get", "http://firefox/get", { heuristic: true }), matches: [makeSearchMatch("firefox/get", { heuristic: true })],
], });
}); } else {
await check_autocomplete({
search: "firefox/get",
searchParam: "enable-actions",
matches: [
makeVisitMatch("firefox/get", "http://firefox/get", {
heuristic: true,
}),
],
});
}
Services.prefs.setBoolPref("browser.fixup.domainwhitelist.firefox", true); Services.prefs.setBoolPref("browser.fixup.domainwhitelist.firefox", true);
registerCleanupFunction(() => { registerCleanupFunction(() => {