зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1716025 - Part 1 - Add hardcoded blocklist of sites to exclude from history metadata. r=mak
This patch creates a blocklist of sites that should not be saved as keyframes. The blocklist maps hostnames to regular expressions. While the map is a bit of an awkward structure, it lets us avoid checking every URL against a list of regular expressions. This performance gain will be more apparent as the list expands. Differential Revision: https://phabricator.services.mozilla.com/D117543
This commit is contained in:
Родитель
22900508de
Коммит
699c2f666c
|
@ -12,6 +12,7 @@ const { XPCOMUtils } = ChromeUtils.import(
|
|||
|
||||
XPCOMUtils.defineLazyModuleGetters(this, {
|
||||
BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.jsm",
|
||||
InteractionsBlocklist: "resource:///modules/InteractionsBlocklist.jsm",
|
||||
PrivateBrowsingUtils: "resource://gre/modules/PrivateBrowsingUtils.jsm",
|
||||
Services: "resource://gre/modules/Services.jsm",
|
||||
});
|
||||
|
@ -373,6 +374,11 @@ class _Interactions {
|
|||
this.registerEndOfInteraction(browser);
|
||||
}
|
||||
|
||||
if (InteractionsBlocklist.isUrlBlocklisted(docInfo.url)) {
|
||||
logConsole.debug("URL is blocklisted", docInfo);
|
||||
return;
|
||||
}
|
||||
|
||||
logConsole.debug("New interaction", docInfo);
|
||||
interaction = {
|
||||
url: docInfo.url,
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
"use strict";
|
||||
|
||||
var EXPORTED_SYMBOLS = ["InteractionsBlocklist"];
|
||||
|
||||
const { XPCOMUtils } = ChromeUtils.import(
|
||||
"resource://gre/modules/XPCOMUtils.jsm"
|
||||
);
|
||||
|
||||
XPCOMUtils.defineLazyModuleGetters(this, {
|
||||
Services: "resource://gre/modules/Services.jsm",
|
||||
UrlbarUtils: "resource:///modules/UrlbarUtils.jsm",
|
||||
});
|
||||
|
||||
XPCOMUtils.defineLazyGetter(this, "logConsole", function() {
|
||||
return console.createInstance({
|
||||
prefix: "InteractionsBlocklist",
|
||||
maxLogLevel: Services.prefs.getBoolPref(
|
||||
"browser.places.interactions.log",
|
||||
false
|
||||
)
|
||||
? "Debug"
|
||||
: "Warn",
|
||||
});
|
||||
});
|
||||
|
||||
// A blocklist of regular expressions. Maps base hostnames to a list regular
|
||||
// expressions for URLs with that base hostname. In this context, "base
|
||||
// hostname" means the hostname without any subdomains or a public suffix. For
|
||||
// example, the base hostname for "https://www.maps.google.com/a/place" is
|
||||
// "google". We do this mapping to improve performance; otherwise we'd have to
|
||||
// check all URLs against a long list of regular expressions. The regexes are
|
||||
// defined as escaped strings so that we build them lazily.
|
||||
// We may want to migrate this list to Remote Settings in the future.
|
||||
let HOST_BLOCKLIST = {
|
||||
baidu: [
|
||||
// Baidu SERP
|
||||
"^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*",
|
||||
],
|
||||
bing: [
|
||||
// Bing SERP
|
||||
"^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*",
|
||||
],
|
||||
duckduckgo: [
|
||||
// DuckDuckGo SERP
|
||||
"^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*",
|
||||
],
|
||||
example: [
|
||||
// For testing. Removed in part 2 of this patch.
|
||||
"^(https?:\\/\\/)?example\\.com\\/browser",
|
||||
],
|
||||
google: [
|
||||
// Google SERP
|
||||
"^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*",
|
||||
],
|
||||
yandex: [
|
||||
// Yandex SERP
|
||||
"^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*",
|
||||
],
|
||||
zoom: [
|
||||
// Zoom meeting interstitial
|
||||
"^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+",
|
||||
],
|
||||
};
|
||||
|
||||
HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, {
|
||||
get(target, property) {
|
||||
let regexes = target[property];
|
||||
if (!regexes || !Array.isArray(regexes)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (let i = 0; i < regexes.length; i++) {
|
||||
let regex = regexes[i];
|
||||
if (typeof regex === "string") {
|
||||
regex = new RegExp(regex, "i");
|
||||
if (regex) {
|
||||
regexes[i] = regex;
|
||||
} else {
|
||||
throw new Error("Blocklist contains invalid regex.");
|
||||
}
|
||||
}
|
||||
}
|
||||
return regexes;
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* A class that maintains a blocklist of URLs. The class exposes a method to
|
||||
* check if a particular URL is contained on the blocklist.
|
||||
*/
|
||||
class _InteractionsBlocklist {
|
||||
/**
|
||||
* Checks a URL against a blocklist of URLs. If the URL is blocklisted, we
|
||||
* should not record an interaction.
|
||||
*
|
||||
* @param {string} urlToCheck
|
||||
* The URL we are looking for on the blocklist.
|
||||
* @returns {boolean}
|
||||
* True if `url` is on a blocklist. False otherwise.
|
||||
*/
|
||||
isUrlBlocklisted(urlToCheck) {
|
||||
// First, find the URL's base host: the hostname without any subdomains or a
|
||||
// public suffix.
|
||||
let url;
|
||||
try {
|
||||
url = new URL(urlToCheck);
|
||||
if (!url) {
|
||||
throw new Error();
|
||||
}
|
||||
} catch (ex) {
|
||||
logConsole.warn(
|
||||
`Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}`
|
||||
);
|
||||
return false;
|
||||
}
|
||||
let hostWithoutSuffix = UrlbarUtils.stripPublicSuffixFromHost(url.host);
|
||||
let [hostWithSubdomains] = UrlbarUtils.stripPrefixAndTrim(
|
||||
hostWithoutSuffix,
|
||||
{
|
||||
stripWww: true,
|
||||
trimTrailingDot: true,
|
||||
}
|
||||
);
|
||||
let baseHost = hostWithSubdomains.substring(
|
||||
hostWithSubdomains.lastIndexOf(".") + 1
|
||||
);
|
||||
// Then fetch blocked regexes for that baseHost and compare them to the full
|
||||
// URL.
|
||||
let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()];
|
||||
if (!regexes) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return regexes.some(r => r.test(url.href));
|
||||
}
|
||||
}
|
||||
|
||||
const InteractionsBlocklist = new _InteractionsBlocklist();
|
|
@ -15,6 +15,7 @@ JAR_MANIFESTS += ["jar.mn"]
|
|||
|
||||
EXTRA_JS_MODULES += [
|
||||
"Interactions.jsm",
|
||||
"InteractionsBlocklist.jsm",
|
||||
"PlacesUIUtils.jsm",
|
||||
]
|
||||
|
||||
|
|
|
@ -10,5 +10,6 @@ support-files =
|
|||
head.js
|
||||
../keyword_form.html
|
||||
|
||||
[browser_interactions_blocklist.js]
|
||||
[browser_interactions_view_time.js]
|
||||
[browser_interactions_typing.js]
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/* Any copyright is dedicated to the Public Domain.
|
||||
* http://creativecommons.org/publicdomain/zero/1.0/ */
|
||||
|
||||
/**
|
||||
* Tests that interactions are not recorded for sites on the blocklist.
|
||||
*/
|
||||
|
||||
const ALLOWED_TEST_URL = "https://example.com/";
|
||||
const BLOCKED_TEST_URL = "https://example.com/browser";
|
||||
|
||||
add_task(async function setup() {
|
||||
sinon.spy(Interactions, "_updateDatabase");
|
||||
|
||||
registerCleanupFunction(() => {
|
||||
sinon.restore();
|
||||
});
|
||||
});
|
||||
|
||||
add_task(async function test() {
|
||||
await BrowserTestUtils.withNewTab(ALLOWED_TEST_URL, async browser => {
|
||||
Interactions._pageViewStartTime = Cu.now() - 10000;
|
||||
|
||||
BrowserTestUtils.loadURI(browser, BLOCKED_TEST_URL);
|
||||
await BrowserTestUtils.browserLoaded(browser, false, BLOCKED_TEST_URL);
|
||||
|
||||
await assertDatabaseValues([
|
||||
{
|
||||
url: ALLOWED_TEST_URL,
|
||||
totalViewTime: 10000,
|
||||
},
|
||||
]);
|
||||
|
||||
Interactions._pageViewStartTime = Cu.now() - 20000;
|
||||
|
||||
BrowserTestUtils.loadURI(browser, "about:blank");
|
||||
await BrowserTestUtils.browserLoaded(browser, false, "about:blank");
|
||||
|
||||
// We should not have updated the database with BLOCKED_TEST_URL because it
|
||||
// is blocklisted. We wait a little to make sure _updateDatabase is not
|
||||
// going to fire.
|
||||
// eslint-disable-next-line mozilla/no-arbitrary-setTimeout
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
await assertDatabaseValues([
|
||||
{
|
||||
url: ALLOWED_TEST_URL,
|
||||
totalViewTime: 10000,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
|
@ -21,43 +21,6 @@ add_task(async function setup() {
|
|||
});
|
||||
});
|
||||
|
||||
async function assertDatabaseValues(expected) {
|
||||
await BrowserTestUtils.waitForCondition(
|
||||
() => Interactions._updateDatabase.callCount == expected.length,
|
||||
"Should have saved to the database"
|
||||
);
|
||||
|
||||
let args = Interactions._updateDatabase.args;
|
||||
for (let i = 0; i < expected.length; i++) {
|
||||
let actual = args[i][0];
|
||||
Assert.equal(
|
||||
actual.url,
|
||||
expected[i].url,
|
||||
"Should have saved the page into the database"
|
||||
);
|
||||
if (expected[i].exactTotalViewTime) {
|
||||
Assert.equal(
|
||||
actual.totalViewTime,
|
||||
expected[i].exactTotalViewTime,
|
||||
"Should have kept the exact time"
|
||||
);
|
||||
} else {
|
||||
Assert.greater(
|
||||
actual.totalViewTime,
|
||||
expected[i].totalViewTime,
|
||||
"Should have stored the interaction time"
|
||||
);
|
||||
}
|
||||
if (expected[i].maxViewTime) {
|
||||
Assert.less(
|
||||
actual.totalViewTime,
|
||||
expected[i].maxViewTime,
|
||||
"Should have recorded an interaction below the maximum expected"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
add_task(async function test_interactions_simple_load_and_navigate_away() {
|
||||
await BrowserTestUtils.withNewTab(TEST_URL, async browser => {
|
||||
Interactions._pageViewStartTime = Cu.now() - 10000;
|
||||
|
|
|
@ -30,3 +30,40 @@ function disableIdleService() {
|
|||
idleService.addIdleObserver(Interactions, pageViewIdleTime);
|
||||
});
|
||||
}
|
||||
|
||||
async function assertDatabaseValues(expected) {
|
||||
await BrowserTestUtils.waitForCondition(
|
||||
() => Interactions._updateDatabase.callCount == expected.length,
|
||||
"Should have saved to the database"
|
||||
);
|
||||
|
||||
let args = Interactions._updateDatabase.args;
|
||||
for (let i = 0; i < expected.length; i++) {
|
||||
let actual = args[i][0];
|
||||
Assert.equal(
|
||||
actual.url,
|
||||
expected[i].url,
|
||||
"Should have saved the page into the database"
|
||||
);
|
||||
if (expected[i].exactTotalViewTime) {
|
||||
Assert.equal(
|
||||
actual.totalViewTime,
|
||||
expected[i].exactTotalViewTime,
|
||||
"Should have kept the exact time"
|
||||
);
|
||||
} else {
|
||||
Assert.greater(
|
||||
actual.totalViewTime,
|
||||
expected[i].totalViewTime,
|
||||
"Should have stored the interaction time"
|
||||
);
|
||||
}
|
||||
if (expected[i].maxViewTime) {
|
||||
Assert.less(
|
||||
actual.totalViewTime,
|
||||
expected[i].maxViewTime,
|
||||
"Should have recorded an interaction below the maximum expected"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/* Any copyright is dedicated to the Public Domain.
|
||||
* http://creativecommons.org/publicdomain/zero/1.0/ */
|
||||
|
||||
/**
|
||||
* Tests that blocked sites are caught by InteractionsBlocklist.
|
||||
*/
|
||||
|
||||
const { InteractionsBlocklist } = ChromeUtils.import(
|
||||
"resource:///modules/InteractionsBlocklist.jsm"
|
||||
);
|
||||
|
||||
let BLOCKED_URLS = [
|
||||
"https://www.bing.com/search?q=mozilla",
|
||||
"https://duckduckgo.com/?q=a+test&kp=1&t=ffab",
|
||||
"https://www.google.com/search?q=mozilla",
|
||||
"https://www.google.ca/search?q=test",
|
||||
"https://mozilla.zoom.us/j/123456789",
|
||||
"https://yandex.az/search/?text=mozilla",
|
||||
"https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=mozilla&rn=&fenlei=256&oq=&rsv_pq=970f2b8f001757b9&rsv_t=1f5d2V2o80HPdZtZnhodwkc7nZXTvDI1zwdPy%2FAeomnvFFGIrU1F3D9WoK4&rqlang=cn",
|
||||
];
|
||||
|
||||
let ALLOWED_URLS = [
|
||||
"https://example.com",
|
||||
"https://zoom.us/pricing",
|
||||
"https://www.google.ca/maps/place/Toronto,+ON/@43.7181557,-79.5181414,11z/data=!3m1!4b1!4m5!3m4!1s0x89d4cb90d7c63ba5:0x323555502ab4c477!8m2!3d43.653226!4d-79.3831843",
|
||||
];
|
||||
|
||||
add_task(async function test() {
|
||||
for (let url of BLOCKED_URLS) {
|
||||
Assert.ok(
|
||||
InteractionsBlocklist.isUrlBlocklisted(url),
|
||||
`${url} is blocklisted.`
|
||||
);
|
||||
}
|
||||
|
||||
for (let url of ALLOWED_URLS) {
|
||||
Assert.ok(
|
||||
!InteractionsBlocklist.isUrlBlocklisted(url),
|
||||
`${url} is not blocklisted.`
|
||||
);
|
||||
}
|
||||
});
|
|
@ -17,5 +17,6 @@ support-files =
|
|||
[test_browserGlue_prefs.js]
|
||||
[test_browserGlue_restore.js]
|
||||
[test_clearHistory_shutdown.js]
|
||||
[test_interactions_blocklist.js]
|
||||
[test_PUIU_batchUpdatesForNode.js]
|
||||
[test_PUIU_setCharsetForPage.js]
|
||||
|
|
|
@ -758,6 +758,8 @@ var UrlbarUtils = {
|
|||
* Whether to trim a trailing `?`.
|
||||
* @param {boolean} options.trimEmptyHash
|
||||
* Whether to trim a trailing `#`.
|
||||
* @param {boolean} options.trimTrailingDot
|
||||
* Whether to trim a trailing '.'.
|
||||
* @returns {array} [modified, prefix, suffix]
|
||||
* modified: {string} The modified spec.
|
||||
* prefix: {string} The parts stripped from the prefix, if any.
|
||||
|
@ -789,6 +791,10 @@ var UrlbarUtils = {
|
|||
spec = spec.slice(0, -1);
|
||||
suffix = "/" + suffix;
|
||||
}
|
||||
if (options.trimTrailingDot && spec.endsWith(".")) {
|
||||
spec = spec.slice(0, -1);
|
||||
suffix = "." + suffix;
|
||||
}
|
||||
return [spec, prefix, suffix];
|
||||
},
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче