Bug 1511065 - Switch Search Provider telemetry probes to Remote Settings. r=daleharvey

Differential Revision: https://phabricator.services.mozilla.com/D92202
This commit is contained in:
Mark Banner 2020-10-09 16:41:34 +00:00
Родитель d92de51aae
Коммит 633ab11df1
10 изменённых файлов: 315 добавлений и 163 удалений

Просмотреть файл

@ -50,11 +50,17 @@ class SearchProviders {
// Filter-out non-ad providers so that we're not trying to match against
// those unnecessarily.
for (let [providerName, info] of Object.entries(this._searchProviderInfo)) {
if (!("extraAdServersRegexps" in info)) {
delete this._searchProviderInfo[providerName];
}
}
this._searchProviderInfo = this._searchProviderInfo
.filter(p => "extraAdServersRegexps" in p)
.map(p => {
return {
...p,
searchPageRegexp: new RegExp(p.searchPageRegexp),
extraAdServersRegexps: p.extraAdServersRegexps.map(
r => new RegExp(r)
),
};
});
return this._searchProviderInfo;
}
@ -99,9 +105,7 @@ class SearchTelemetryChild extends JSWindowActorChild {
* of provider name and the provider information.
*/
_getProviderInfoForUrl(url) {
return Object.entries(searchProviders.info || []).find(([_, info]) =>
info.searchPageRegexp.test(url)
);
return searchProviders.info?.find(info => info.searchPageRegexp.test(url));
}
/**
@ -125,7 +129,7 @@ class SearchTelemetryChild extends JSWindowActorChild {
return;
}
let regexps = providerInfo[1].extraAdServersRegexps;
let regexps = providerInfo.extraAdServersRegexps;
let anchors = doc.getElementsByTagName("a");
let hasAds = false;
for (let anchor of anchors) {

Просмотреть файл

@ -11,6 +11,7 @@ const { XPCOMUtils } = ChromeUtils.import(
);
XPCOMUtils.defineLazyModuleGetters(this, {
RemoteSettings: "resource://services-settings/remote-settings.js",
SearchUtils: "resource://gre/modules/SearchUtils.jsm",
Services: "resource://gre/modules/Services.jsm",
});
@ -22,98 +23,7 @@ const SEARCH_AD_CLICKS_SCALAR = "browser.search.ad_clicks";
const SEARCH_DATA_TRANSFERRED_SCALAR = "browser.search.data_transferred";
const SEARCH_TELEMETRY_PRIVATE_BROWSING_KEY_SUFFIX = "pb";
/**
* Used to identify various parameters used with partner search providers. This
* consists of the following structure:
* - {<string>} name
* Details for a particular provider with the string name.
* - {regexp} <string>.regexp
* The regular expression used to match the url for the search providers
* main page.
* - {string} <string>.queryParam
* The query parameter name that indicates a search has been made.
* - {string} [<string>.codeParam]
* The query parameter name that indicates a search provider's code.
* - {array} [<string>.codePrefixes]
* An array of the possible string prefixes for a codeParam, indicating a
* partner search.
* - {array} [<string>.followonParams]
* An array of parameters name that indicates this is a follow-on search.
* - {array} [<string>.extraAdServersRegexps]
* An array of regular expressions used to determine if a link on a search
* page might be an advert.
* - {array} [<object>.followonCookies]
* An array of cookie details, which should look like:
* - {string} [extraCodeParam]
* The query parameter name that indicates an extra search provider's
* code.
* - {array} [<string>.extraCodePrefixes]
* An array of the possible string prefixes for a codeParam, indicating
* a partner search.
* - {string} host
* Host name to which the cookie is linked to.
* - {string} name
* Name of the cookie to look for that should contain the search
* provider's code.
* - {string} codeParam
* The cookie parameter name that indicates a search provider's code.
* - {array} <string>.codePrefixes
* An array of the possible string prefixes for a codeParam, indicating
* a partner search.
*/
const SEARCH_PROVIDER_INFO = {
google: {
searchPageRegexp: /^https:\/\/www\.google\.(?:.+)\/search/,
queryParamName: "q",
codeParamName: "client",
codePrefixes: ["firefox"],
followOnParamNames: ["oq", "ved", "ei"],
extraAdServersRegexps: [
/^https:\/\/www\.google(?:adservices)?\.com\/(?:pagead\/)?aclk/,
],
},
duckduckgo: {
searchPageRegexp: /^https:\/\/duckduckgo\.com\//,
queryParamName: "q",
codeParamName: "t",
codePrefixes: ["ff", "newext"],
extraAdServersRegexps: [
/^https:\/\/duckduckgo.com\/y\.js?.*ad_provider\=/,
/^https:\/\/www\.amazon\.(?:[a-z.]{2,24}).*(?:tag=duckduckgo-)/,
],
},
yahoo: {
searchPageRegexp: /^https:\/\/(?:.*)search\.yahoo\.com\/search/,
queryParamName: "p",
},
baidu: {
searchPageRegexp: /^https:\/\/www\.baidu\.com\/(?:s|baidu)/,
queryParamName: "wd",
codeParamName: "tn",
codePrefixes: ["34046034_", "monline_"],
followOnParamNames: ["oq"],
},
bing: {
searchPageRegexp: /^https:\/\/www\.bing\.com\/search/,
queryParamName: "q",
codeParamName: "pc",
codePrefixes: ["MOZ", "MZ"],
followOnCookies: [
{
extraCodeParamName: "form",
extraCodePrefixes: ["QBRE"],
host: "www.bing.com",
name: "SRCHS",
codeParamName: "PC",
codePrefixes: ["MOZ", "MZ"],
},
],
extraAdServersRegexps: [
/^https:\/\/www\.bing\.com\/acli?c?k/,
/^https:\/\/www\.bing\.com\/fd\/ls\/GLinkPingPost\.aspx.*acli?c?k/,
],
},
};
const TELEMETRY_SETTINGS_KEY = "search-telemetry";
XPCOMUtils.defineLazyGetter(this, "logConsole", () => {
return console.createInstance({
@ -129,23 +39,37 @@ XPCOMUtils.defineLazyGetter(this, "logConsole", () => {
* It handles the *in-content:sap* keys of the SEARCH_COUNTS histogram.
*/
class TelemetryHandler {
// Whether or not this class is initialised.
_initialized = false;
// An instance of ContentHandler.
_contentHandler;
// The original provider information, mainly used for tests.
_originalProviderInfo = null;
// The current search provider info.
_searchProviderInfo = null;
// An instance of remote settings that is used to access the provider info.
_telemetrySettings;
// _browserInfoByURL is a map of tracked search urls to objects containing:
// * {object} info
// the search provider information associated with the url.
// * {WeakSet} browsers
// a weak set of browsers that have the url loaded.
// * {integer} count
// a manual count of browsers logged.
// We keep a weak set of browsers, in case we miss something on our counts
// and cause a memory leak - worst case our map is slightly bigger than it
// needs to be.
// The manual count is because WeakSet doesn't give us size/length
// information, but we want to know when we can clean up our associated
// entry.
_browserInfoByURL = new Map();
constructor() {
// _browserInfoByURL is a map of tracked search urls to objects containing:
// * {object} info
// the search provider information associated with the url.
// * {WeakSet} browsers
// a weak set of browsers that have the url loaded.
// * {integer} count
// a manual count of browsers logged.
// We keep a weak set of browsers, in case we miss something on our counts
// and cause a memory leak - worst case our map is slightly bigger than it
// needs to be.
// The manual count is because WeakSet doesn't give us size/length
// information, but we want to know when we can clean up our associated
// entry.
this._browserInfoByURL = new Map();
this._initialized = false;
this.__searchProviderInfo = null;
this._contentHandler = new ContentHandler({
browserInfoByURL: this._browserInfoByURL,
findBrowserItemForURL: (...args) => this._findBrowserItemForURL(...args),
@ -159,12 +83,25 @@ class TelemetryHandler {
* appropriate listeners to the window so that window opening and closing
* can be tracked.
*/
init() {
async init() {
if (this._initialized) {
return;
}
this._contentHandler.init();
this._telemetrySettings = RemoteSettings(TELEMETRY_SETTINGS_KEY);
let rawProviderInfo = [];
try {
rawProviderInfo = await this._telemetrySettings.get();
} catch (ex) {
logConsole.error("Could not get settings:", ex);
}
// Send the provider info to the child handler.
this._contentHandler.init(rawProviderInfo);
this._originalProviderInfo = rawProviderInfo;
// Now convert the regexps into
this._setSearchProviderInfo(rawProviderInfo);
for (let win of Services.wm.getEnumerator("navigator:browser")) {
this._registerWindow(win);
@ -210,20 +147,35 @@ class TelemetryHandler {
* Test-only function, used to override the provider information, so that
* unit tests can set it to easy to test values.
*
* @param {object} infoByProvider @see SEARCH_PROVIDER_INFO for type information.
* @param {array} providerInfo @see search-telemetry-schema.json for type information.
*/
overrideSearchTelemetryForTests(infoByProvider) {
if (infoByProvider) {
for (let info of Object.values(infoByProvider)) {
info.regexp = new RegExp(info.regexp);
overrideSearchTelemetryForTests(providerInfo) {
let info = providerInfo ? providerInfo : this._originalProviderInfo;
this._contentHandler.overrideSearchTelemetryForTests(info);
this._setSearchProviderInfo(info);
}
/**
* Used to set the local version of the search provider information.
* This automatically maps the regexps to RegExp objects so that
* we don't have to create a new instance each time.
*
* @param {array} providerInfo
* A raw array of provider information to set.
*/
_setSearchProviderInfo(providerInfo) {
this._searchProviderInfo = providerInfo.map(provider => {
let newProvider = {
...provider,
searchPageRegexp: new RegExp(provider.searchPageRegexp),
};
if (provider.extraAdServersRegexps) {
newProvider.extraAdServersRegexps = provider.extraAdServersRegexps.map(
r => new RegExp(r)
);
}
this.__searchProviderInfo = infoByProvider;
} else {
this.__searchProviderInfo = SEARCH_PROVIDER_INFO;
}
this._contentHandler.overrideSearchTelemetryForTests(
this.__searchProviderInfo
);
return newProvider;
});
}
reportPageWithAds(info) {
@ -441,7 +393,7 @@ class TelemetryHandler {
*/
_getProviderInfoForURL(url, useOnlyExtraAdServers = false) {
if (useOnlyExtraAdServers) {
return Object.entries(this._searchProviderInfo).find(([_, info]) => {
return this._searchProviderInfo.find(info => {
if (info.extraAdServersRegexps) {
for (let regexp of info.extraAdServersRegexps) {
if (regexp.test(url)) {
@ -453,7 +405,7 @@ class TelemetryHandler {
});
}
return Object.entries(this._searchProviderInfo).find(([_, info]) =>
return this._searchProviderInfo.find(info =>
info.searchPageRegexp.test(url)
);
}
@ -467,11 +419,10 @@ class TelemetryHandler {
* returns an object of strings for provider, code and type.
*/
_checkURLForSerpMatch(url) {
let info = this._getProviderInfoForURL(url);
if (!info) {
let searchProviderInfo = this._getProviderInfoForURL(url);
if (!searchProviderInfo) {
return null;
}
let [provider, searchProviderInfo] = info;
let queries = new URLSearchParams(url.split("#")[0].split("?")[1]);
if (!queries.get(searchProviderInfo.queryParamName)) {
return null;
@ -533,7 +484,7 @@ class TelemetryHandler {
}
}
}
return { provider, type, code };
return { provider: searchProviderInfo.telemetryId, type, code };
}
/**
@ -554,17 +505,6 @@ class TelemetryHandler {
histogram.add(payload);
logConsole.debug("Counting", payload, "for", url);
}
/**
* Returns the current search provider information in use.
* @see SEARCH_PROVIDER_INFO
*/
get _searchProviderInfo() {
if (!this.__searchProviderInfo) {
this.__searchProviderInfo = SEARCH_PROVIDER_INFO;
}
return this.__searchProviderInfo;
}
}
/**
@ -593,12 +533,12 @@ class ContentHandler {
/**
* Initializes the content handler. This will also set up the shared data that is
* shared with the SearchTelemetryChild actor.
*
* @param {array} providerInfo
* The provider information for the search telemetry to record.
*/
init() {
Services.ppmm.sharedData.set(
"SearchTelemetry:ProviderInfo",
SEARCH_PROVIDER_INFO
);
init(providerInfo) {
Services.ppmm.sharedData.set("SearchTelemetry:ProviderInfo", providerInfo);
Cc["@mozilla.org/network/http-activity-distributor;1"]
.getService(Ci.nsIHttpActivityDistributor)
@ -771,11 +711,15 @@ class ContentHandler {
}
try {
Services.telemetry.keyedScalarAdd(SEARCH_AD_CLICKS_SCALAR, info[0], 1);
Services.telemetry.keyedScalarAdd(
SEARCH_AD_CLICKS_SCALAR,
info.telemetryId,
1
);
channel._adClickRecorded = true;
logConsole.debug(
"Counting ad click in page for",
info[0],
info.telemetryId,
originURL,
URL
);

Просмотреть файл

@ -0,0 +1,7 @@
The schemas in this directory are the primary source for the schemas they represent.
They are uploaded to the RemoteSettings server to validate new configurations.
Any changes should be validated by the Search team.
See the documentation for more information: https://firefox-source-docs.mozilla.org/

Просмотреть файл

@ -0,0 +1,103 @@
{
"type": "object",
"required": [
"telemetryId",
"searchPageRegexp",
"queryParamName"
],
"properties": {
"telemetryId": {
"type": "string",
"title": "Telemetry Id",
"description": "The telemetry identifier for the provider.",
"pattern": "^[a-z0-9-._]*$"
},
"searchPageRegexp": {
"type": "string",
"title": "Search Page Regular Expression",
"description": "A regular expression which matches the search page of the provider."
},
"queryParamName": {
"type": "string",
"title": "Search Query Parameter Name",
"description": "The name of the query parameter for the user's search string."
},
"codeParamName": {
"type": "string",
"title": "Partner Code Parameter Name",
"description": "The name of the query parameter for the partner code."
},
"codePrefixes": {
"type": "array",
"title": "Partner Code Prefixes",
"description": "An array of prefixes (or complete values) to match against the partner code paramters in the url.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
}
},
"followOnParamNames": {
"type": "array",
"title": "Follow-on Search Parameter Names",
"description": "An array of query parameter names that are used when a follow-on search occurs.",
"items": {
"type": "string",
"pattern": "^[a-z0-9-._]*$"
}
},
"followOnCookies": {
"type": "array",
"title": "Follow-on Cookes",
"description": "An array of cookie details that are used to identify follow-on searches.",
"items": {
"type": "object",
"properties": {
"extraCodeParamName": {
"type": "string",
"description": "The query parameter name in the URL that indicates this might be a follow-on search.",
"pattern": "^[a-z0-9-._]*$"
},
"extraCodePrefixes": {
"type": "array",
"description": "Possbile values for the query parameter in the URL that indicates this might be a follow-on search.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
}
},
"host": {
"type": "string",
"description": "The hostname on which the cookie is stored.",
"pattern": "^[a-z0-9-._]*$"
},
"name": {
"type": "string",
"description": "The name of the cookie to check.",
"pattern": "^[a-zA-Z0-9-._]*$"
},
"codeParamName": {
"type": "string",
"description": "The name of parameter within the cookie.",
"pattern": "^[a-zA-Z0-9-._]*$"
},
"codePrefixes": {
"type": "array",
"description": "Possbile values for the parameter within the cookie.",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-._]*$"
}
}
}
}
},
"extraAdServersRegexps": {
"type": "array",
"title": "Extra Ad Server Regular Expressions",
"description": "An array of regular expressions that match URLs of potential ad servers.",
"items": {
"type": "string"
}
}
}
}

Просмотреть файл

@ -0,0 +1,12 @@
{
"ui:order": [
"telemetryId",
"searchPageRegexp",
"queryParamName",
"codeParamName",
"codePrefixes",
"followOnParamNames",
"followOnCookies",
"extraAdServersRegexps"
]
}

Просмотреть файл

@ -14,8 +14,9 @@ const { ADLINK_CHECK_TIMEOUT_MS } = ChromeUtils.import(
"resource:///actors/SearchTelemetryChild.jsm"
);
const TEST_PROVIDER_INFO = {
example: {
const TEST_PROVIDER_INFO = [
{
telemetryId: "example",
searchPageRegexp: /^http:\/\/mochi.test:.+\/browser\/browser\/components\/search\/test\/browser\/searchTelemetry(?:Ad)?.html/,
queryParamName: "s",
codeParamName: "abc",
@ -23,7 +24,7 @@ const TEST_PROVIDER_INFO = {
followOnParamNames: ["a"],
extraAdServersRegexps: [/^https:\/\/example\.com\/ad2?/],
},
};
];
const SEARCH_AD_CLICK_SCALARS = [
"browser.search.with_ads",

Просмотреть файл

@ -201,6 +201,7 @@ async function testAdUrlClicked(serpUrl, adUrl, expectedAdKey) {
add_task(async function setup() {
Services.prefs.setBoolPref(SearchUtils.BROWSER_SEARCH_PREF + "log", true);
await SearchTelemetry.init();
});
add_task(async function test_parsing_search_urls() {

Просмотреть файл

@ -1509,12 +1509,13 @@ add_task(async function test_formHistory_enterSelection() {
add_task(async function test_privateWindow() {
// Override the search telemetry search provider info to
// count in-content SEARCH_COUNTs telemetry for our test engine.
SearchTelemetry.overrideSearchTelemetryForTests({
example: {
regexp: "^http://example\\.com/",
queryParam: "q",
SearchTelemetry.overrideSearchTelemetryForTests([
{
telemetryId: "example",
searchPageRegexp: "^http://example\\.com/",
queryParamName: "q",
},
});
]);
let search_hist = TelemetryTestUtils.getAndClearKeyedHistogram(
"SEARCH_COUNTS"

Просмотреть файл

@ -10,6 +10,7 @@ FINAL_TARGET_FILES.defaults.settings.main += [
'password-recipes.json',
'search-config.json',
'search-default-override-allowlist.json',
'search-telemetry.json',
'sites-classification.json',
'top-sites.json',
'url-classifier-skip-urls.json',

Просмотреть файл

@ -0,0 +1,78 @@
{
"data": [
{
"codeParamName": "client",
"codePrefixes": [
"firefox"
],
"extraAdServersRegexps": [
"^https://www\\.google(?:adservices)?\\.com/(?:pagead/)?aclk"
],
"followOnParamNames": [
"oq", "ved", "ei"
],
"queryParamName": "q",
"searchPageRegexp": "^https://www\\.google\\.(?:.+)/search",
"telemetryId": "google"
},
{
"codeParamName": "t",
"codePrefixes": [
"ff", "newext"
],
"extraAdServersRegexps": [
"^https://duckduckgo.com/y\\.js?.*ad_provider\\=",
"^https://www\\.amazon\\.(?:[a-z.]{2,24}).*(?:tag=duckduckgo-)"
],
"queryParamName": "q",
"searchPageRegexp": "^https://duckduckgo\\.com/",
"telemetryId": "duckduckgo"
},
{
"queryParamName": "p",
"searchPageRegexp": "^https://(?:.*)search\\.yahoo\\.com/search",
"telemetryId": "yahoo"
},
{
"codeParamName": "tn",
"codePrefixes": [
"34046034_", "monline_"
],
"followOnParamNames": [
"oq"
],
"queryParamName": "wd",
"searchPageRegexp": "^https://www\\.baidu\\.com/(?:s|baidu)",
"telemetryId": "baidu"
},
{
"codeParamName": "pc",
"codePrefixes": [
"MOZ",
"MZ"
],
"extraAdServersRegexps": [
"^https://www\\.bing\\.com/acli?c?k",
"^https://www\\.bing\\.com/fd/ls/GLinkPingPost\\.aspx.*acli?c?k"
],
"followOnCookies": [
{
"extraCodePrefixes": [
"QBRE"
],
"codePrefixes": [
"MOZ",
"MZ"
],
"extraCodeParamName": "form",
"host": "www.bing.com",
"name": "SRCHS",
"codeParamName": "PC"
}
],
"queryParamName": "q",
"searchPageRegexp": "^https://www\\.bing\\.com/search",
"telemetryId": "bing"
}
]
}