Bug 1672533 - Dedupe SERPs in history whose query parameters are a subset of a generated search URL. r=adw

Differential Revision: https://phabricator.services.mozilla.com/D94396
This commit is contained in:
Harry Twyford 2020-10-23 16:01:42 +00:00
Родитель b2dda3f08d
Коммит 43ebd9bbc5
5 изменённых файлов: 134 добавлений и 60 удалений

Просмотреть файл

@ -17,6 +17,7 @@ XPCOMUtils.defineLazyModuleGetters(this, {
Services: "resource://gre/modules/Services.jsm",
UrlbarPrefs: "resource:///modules/UrlbarPrefs.jsm",
UrlbarMuxer: "resource:///modules/UrlbarUtils.jsm",
UrlbarSearchUtils: "resource:///modules/UrlbarSearchUtils.jsm",
UrlbarUtils: "resource:///modules/UrlbarUtils.jsm",
});
@ -384,7 +385,9 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
submission.engine,
resultQuery
);
if (this._serpURLsHaveSameParams(newSerpURL, result.payload.url)) {
if (
UrlbarSearchUtils.serpsAreEquivalent(result.payload.url, newSerpURL)
) {
return false;
}
}
@ -504,40 +507,6 @@ class MuxerUnifiedComplete extends UrlbarMuxer {
state.canAddTabToSearch = false;
}
}
/**
* This is a helper for determining whether two SERP URLs are the same for the
* purpose of deduping them. This method checks only URL params, not domains.
*
* @param {string} url1
* The first URL.
* @param {string} url2
* The second URL.
* @returns {boolean}
* True if the two URLs have the same URL params for the purpose of deduping
* them.
*/
_serpURLsHaveSameParams(url1, url2) {
let params1 = new URL(url1).searchParams;
let params2 = new URL(url2).searchParams;
// Currently we are conservative, and the two URLs must have exactly the
// same params except for "client" for us to consider them the same.
for (let params of [params1, params2]) {
params.delete("client");
}
// Check that each remaining url1 param is in url2, and vice versa.
for (let [p1, p2] of [
[params1, params2],
[params2, params1],
]) {
for (let [key, value] of p1) {
if (!p2.getAll(key).includes(value)) {
return false;
}
}
}
return true;
}
}
var UrlbarMuxerUnifiedComplete = new MuxerUnifiedComplete();

Просмотреть файл

@ -161,6 +161,43 @@ class SearchUtils {
}
}
/**
* Compares the query parameters of two SERPs to see if one is equivalent to
* the other. URL `x` is equivalent to URL `y` if
* (a) `y` contains at least all the query parameters contained in `x`, and
* (b) The values of the query parameters contained in both `x` and `y `are
* the same.
*
* @param {string} historySerp
* The SERP from history whose params should be contained in
* `generatedSerp`.
* @param {string} generatedSerp
* The search URL we would generate for a search result with the same search
* string used in `historySerp`.
* @param {array} [ignoreParams]
* A list of params to ignore in the matching, i.e. params that can be
* contained in `historySerp` but not be in `generatedSerp`.
* @returns {boolean} True if `historySerp` can be deduped by `generatedSerp`.
*
* @note This function does not compare the SERPs' origins or pathnames.
* `historySerp` can have a different origin and/or pathname than
* `generatedSerp` and still be considered equivalent.
*/
serpsAreEquivalent(historySerp, generatedSerp, ignoreParams = []) {
let historyParams = new URL(historySerp).searchParams;
let generatedParams = new URL(generatedSerp).searchParams;
if (
!Array.from(historyParams.entries()).every(
([key, value]) =>
ignoreParams.includes(key) || value === generatedParams.get(key)
)
) {
return false;
}
return true;
}
/**
* Gets the aliases of an engine. For the user's convenience, we recognize
* token versions of all non-token aliases. For example, if the user has an

Просмотреть файл

@ -189,6 +189,36 @@ add_task(async function test_builtin_aliased_search_engine_match() {
Assert.ok(engine);
});
add_task(async function test_serps_are_equivalent() {
info("Subset URL has extraneous parameters.");
let url1 = "https://example.com/search?q=test&type=images";
let url2 = "https://example.com/search?q=test";
Assert.ok(!UrlbarSearchUtils.serpsAreEquivalent(url1, url2));
info("Superset URL has extraneous parameters.");
Assert.ok(UrlbarSearchUtils.serpsAreEquivalent(url2, url1));
info("Same keys, different values.");
url1 = "https://example.com/search?q=test&type=images";
url2 = "https://example.com/search?q=test123&type=maps";
Assert.ok(!UrlbarSearchUtils.serpsAreEquivalent(url1, url2));
Assert.ok(!UrlbarSearchUtils.serpsAreEquivalent(url2, url1));
info("Subset matching isn't strict (URL is subset of itself).");
Assert.ok(UrlbarSearchUtils.serpsAreEquivalent(url1, url1));
info("Origin and pathname are ignored.");
url1 = "https://example.com/search?q=test";
url2 = "https://example-1.com/maps?q=test";
Assert.ok(UrlbarSearchUtils.serpsAreEquivalent(url1, url2));
Assert.ok(UrlbarSearchUtils.serpsAreEquivalent(url2, url1));
info("Params can be optionally ignored");
url1 = "https://example.com/search?q=test&abc=123&foo=bar";
url2 = "https://example.com/search?q=test";
Assert.ok(!UrlbarSearchUtils.serpsAreEquivalent(url1, url2));
Assert.ok(UrlbarSearchUtils.serpsAreEquivalent(url1, url2, ["abc", "foo"]));
});
function promiseSearchTopic(expectedVerb) {
return new Promise(resolve => {
Services.obs.addObserver(function observe(subject, topic, verb) {

Просмотреть файл

@ -1229,9 +1229,27 @@ Search.prototype = {
}
},
/**
* Maybe restyle a SERP in history as a search-type result. To do this,
* we extract the search term from the SERP in history then generate a search
* URL with that search term. We restyle the SERP in history if its query
* parameters are a subset of those of the generated SERP. We check for a
* subset instead of exact equivalence since the generated URL may contain
* attribution parameters while a SERP in history from an organic search would
* not. We don't allow extra params in the history URL since they might
* indicate the search is not a first-page web SERP (as opposed to a image or
* other non-web SERP).
*
* @note We will mistakenly dedupe SERPs for engines that have the same
* hostname as another engine. One example is if the user installed a
* Google Image Search engine. That engine's search URLs might only be
* distinguished by query params from search URLs from the default Google
* engine.
*/
_maybeRestyleSearchMatch(match) {
// Return if the URL does not represent a search result.
let parseResult = Services.search.parseSubmissionURL(match.value);
let historyUrl = match.value;
let parseResult = Services.search.parseSubmissionURL(historyUrl);
if (!parseResult?.engine) {
return false;
}
@ -1247,29 +1265,19 @@ Search.prototype = {
}
// The URL for the search suggestion formed by the user's typed query.
let [typedSuggestionUrl] = UrlbarUtils.getSearchQueryUrl(
let [generatedSuggestionUrl] = UrlbarUtils.getSearchQueryUrl(
parseResult.engine,
this._searchTokens.map(t => t.value).join(" ")
);
let historyParams = new URL(match.value).searchParams;
let typedParams = new URL(typedSuggestionUrl).searchParams;
// Checking the two URLs have the same query parameters with the same
// values, or a subset value in the case of the query parameter.
// Parameter order doesn't matter.
// We ignore termsParameterName when checking for a subset because we
// already checked that the typed query is a subset of the search history
// query above with this._searchTokens.every(...).
if (
Array.from(historyParams).length != Array.from(typedParams).length ||
!Array.from(historyParams.entries()).every(
([key, value]) =>
// We want to match all non-search-string GET parameters exactly, to avoid
// restyling non-first pages of search results, or image results as web
// results.
// We let termsParameterName through because we already checked that the
// typed query is a subset of the search history query above with
// this._searchTokens.every(...).
key == parseResult.termsParameterName ||
value === typedParams.get(key)
!UrlbarSearchUtils.serpsAreEquivalent(
historyUrl,
generatedSuggestionUrl,
[parseResult.termsParameterName]
)
) {
return false;

Просмотреть файл

@ -2,22 +2,28 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
add_task(async function test_searchEngine() {
const engineDomain = "s.example.com";
add_task(async function setup() {
let engine = await Services.search.addEngineWithDetails("SearchEngine", {
method: "GET",
template: "http://s.example.com/search",
template: `http://${engineDomain}/search`,
searchGetParams: "q={searchTerms}",
});
registerCleanupFunction(async () => Services.search.removeEngine(engine));
let uri = NetUtil.newURI("http://s.example.com/search?q=Terms");
Services.prefs.setBoolPref("browser.urlbar.restyleSearches", true);
registerCleanupFunction(async () => {
Services.prefs.clearUserPref("browser.urlbar.restyleSearches");
Services.search.removeEngine(engine);
});
});
add_task(async function test_searchEngine() {
let uri = Services.io.newURI(`http://${engineDomain}/search?q=Terms`);
await PlacesTestUtils.addVisits({
uri,
title: "Terms - SearchEngine Search",
});
info("Past search terms should be styled.");
Services.prefs.setBoolPref("browser.urlbar.restyleSearches", true);
await check_autocomplete({
search: "term",
matches: [
@ -30,6 +36,13 @@ add_task(async function test_searchEngine() {
],
});
info(
"Searching for a superset of the search string in history should not restyle."
);
await check_autocomplete({
search: "Terms Foo",
});
info("Bookmarked past searches should not be restyled");
await PlacesTestUtils.addBookmarkWithDetails({
uri,
@ -55,6 +68,23 @@ add_task(async function test_searchEngine() {
search: "term",
matches: [{ uri, title: "Terms - SearchEngine Search" }],
});
Services.prefs.setBoolPref("browser.urlbar.restyleSearches", true);
await cleanup();
});
add_task(async function test_extraneousParameters() {
info("SERPs in history with extraneous parameters should not be restyled.");
let uri = Services.io.newURI(
`http://${engineDomain}/search?q=Terms&p=2&type=img`
);
await PlacesTestUtils.addVisits({
uri,
title: "Terms - SearchEngine Search",
});
await check_autocomplete({
search: "term",
matches: [{ uri, title: "Terms - SearchEngine Search" }],
});
});