From 6bd56eddebe8ea574c8223e58c5530141435d9ed Mon Sep 17 00:00:00 2001 From: Stephanie Cunnane Date: Tue, 5 Sep 2023 16:49:53 +0000 Subject: [PATCH] Bug 1846357 - Extract domains for basic SERP links. r=jteow Differential Revision: https://phabricator.services.mozilla.com/D185609 --- .../actors/SearchSERPTelemetryChild.sys.mjs | 183 ++++++++++++++ .../actors/SearchSERPTelemetryParent.sys.mjs | 4 + browser/app/profile/firefox.js | 3 + .../search/SearchSERPTelemetry.sys.mjs | 143 +++++++++++ .../search/test/browser/telemetry/browser.ini | 6 + ...emetry_domain_categorization_extraction.js | 224 ++++++++++++++++++ ...lemetry_domain_categorization_reporting.js | 108 +++++++++ .../search/test/browser/telemetry/head.js | 16 ++ .../searchTelemetryDomainCategorization.html | 45 ++++ ...elemetryDomainCategorizationReporting.html | 45 ++++ .../searchTelemetryDomainExtraction.html | 72 ++++++ ...elemetry_categorization_process_domains.js | 90 +++++++ .../components/search/test/unit/xpcshell.ini | 1 + 13 files changed, 940 insertions(+) create mode 100644 browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js create mode 100644 browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_reporting.js create mode 100644 browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorization.html create mode 100644 browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorizationReporting.html create mode 100644 browser/components/search/test/browser/telemetry/searchTelemetryDomainExtraction.html create mode 100644 browser/components/search/test/unit/test_search_telemetry_categorization_process_domains.js diff --git a/browser/actors/SearchSERPTelemetryChild.sys.mjs b/browser/actors/SearchSERPTelemetryChild.sys.mjs index 93d4e32a7dd9..5d0c424b5305 100644 --- a/browser/actors/SearchSERPTelemetryChild.sys.mjs +++ b/browser/actors/SearchSERPTelemetryChild.sys.mjs @@ -18,6 +18,13 @@ XPCOMUtils.defineLazyPreferenceGetter( false ); +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "serpEventTelemetryCategorization", + "browser.search.serpEventTelemetryCategorization.enabled", + false +); + const SHARED_DATA_KEY = "SearchTelemetry:ProviderInfo"; export const ADLINK_CHECK_TIMEOUT_MS = 1000; @@ -848,6 +855,154 @@ class SearchAdImpression { } } +/** + * An object indicating which elements to examine for domains to extract and + * which heuristic technique to use to extract that element's domain. + * + * @typedef {object} ExtractorInfo + * @property {string} selectors + * A string representing the CSS selector that targets the elements on the + * page that contain domains we want to extract. + * @property {string} method + * A string representing which domain extraction heuristic to use. + * One of: "href" or "data-attribute". + * @property {object | null} options + * Options related to the domain extraction heuristic used. + * @property {string | null} options.dataAttributeKey + * The key name of the data attribute to lookup. + * @property {string | null} options.queryParamKey + * The key name of the query param value to lookup. + */ + +/** + * DomainExtractor examines elements on a page to retrieve the domains. + */ +class DomainExtractor { + /** + * Extract domains from the page using an array of information pertaining to + * the SERP. + * + * @param {Document} document + * The document for the SERP we are extracting domains from. + * @param {Array} extractorInfos + * Information used to target the domains we need to extract. + * @return {Set} + * A set of the domains extracted from the page. + */ + extractDomainsFromDocument(document, extractorInfos) { + let extractedDomains = new Set(); + if (!extractorInfos?.length) { + return extractedDomains; + } + + for (let extractorInfo of extractorInfos) { + if (!extractorInfo.selectors) { + continue; + } + + let elements = document.querySelectorAll(extractorInfo.selectors); + if (!elements) { + continue; + } + + switch (extractorInfo.method) { + case "href": { + // Origin is used in case a URL needs to be made absolute. + let origin = new URL(document.documentURI).origin; + this.#fromElementsConvertHrefsIntoDomains( + elements, + origin, + extractedDomains, + extractorInfo.options?.queryParamKey + ); + break; + } + case "data-attribute": { + this.#fromElementsRetrieveDataAttributeValues( + elements, + extractorInfo.options?.dataAttributeKey, + extractedDomains + ); + break; + } + } + } + + return extractedDomains; + } + + /** + * Given a list of elements, extract domains using href attributes. If the + * URL in the href includes the specified query param, the domain will be + * that query param's value. Otherwise it will be the hostname of the href + * attribute's URL. + * + * @param {NodeList} elements + * A list of elements from the page whose href attributes we want to + * inspect. + * @param {string} origin + * Origin of the current page. + * @param {Set} extractedDomains + * The result set of domains extracted from the page. + * @param {string | null} queryParam + * An optional query param to search for in an element's href attribute. + */ + #fromElementsConvertHrefsIntoDomains( + elements, + origin, + extractedDomains, + queryParam + ) { + for (let element of elements) { + let href = element.getAttribute("href"); + + let url; + try { + url = new URL(href, origin); + } catch (ex) { + continue; + } + + // Ignore non-standard protocols. + if (url.protocol != "https:" && url.protocol != "http:") { + continue; + } + + let domain = queryParam ? url.searchParams.get(queryParam) : url.hostname; + if (domain && !extractedDomains.has(domain)) { + extractedDomains.add(domain); + } + } + } + + /** + * Given a list of elements, examine each for the specified data attribute. + * If found, add that data attribute's value to the result set of extracted + * domains as is. + * + * @param {NodeList} elements + * A list of elements from the page whose data attributes we want to + * inspect. + * @param {string} attribute + * The name of a data attribute to search for within an element. + * @param {Set} extractedDomains + * The result set of domains extracted from the page. + */ + #fromElementsRetrieveDataAttributeValues( + elements, + attribute, + extractedDomains + ) { + for (let element of elements) { + let value = element.dataset[attribute]; + if (value && !extractedDomains.has(value)) { + extractedDomains.add(value); + } + } + } +} + +export const domainExtractor = new DomainExtractor(); const searchProviders = new SearchProviders(); const searchAdImpression = new SearchAdImpression(); @@ -967,6 +1122,34 @@ export class SearchSERPTelemetryChild extends JSWindowActorChild { }); } } + + if ( + lazy.serpEventTelemetryCategorization && + providerInfo.domainExtraction && + (eventType == "load" || eventType == "pageshow") + ) { + let start = Cu.now(); + let nonAdDomains = domainExtractor.extractDomainsFromDocument( + doc, + providerInfo.domainExtraction.nonAds + ); + let adDomains = domainExtractor.extractDomainsFromDocument( + doc, + providerInfo.domainExtraction.ads + ); + + this.sendAsyncMessage("SearchTelemetry:Domains", { + url, + nonAdDomains, + adDomains, + }); + + ChromeUtils.addProfilerMarker( + "SearchSERPTelemetryChild._checkForAdLink", + start, + "Extract domains from elements" + ); + } } /** diff --git a/browser/actors/SearchSERPTelemetryParent.sys.mjs b/browser/actors/SearchSERPTelemetryParent.sys.mjs index 59c88f186655..4e4011b1f835 100644 --- a/browser/actors/SearchSERPTelemetryParent.sys.mjs +++ b/browser/actors/SearchSERPTelemetryParent.sys.mjs @@ -29,6 +29,10 @@ export class SearchSERPTelemetryParent extends JSWindowActorParent { lazy.SearchSERPTelemetry.reportPageImpression(msg.data, browser); break; } + case "SearchTelemetry:Domains": { + lazy.SearchSERPTelemetry.reportPageDomains(msg.data, browser); + break; + } } } } diff --git a/browser/app/profile/firefox.js b/browser/app/profile/firefox.js index 6ded963bd3ba..6a07a4f6921c 100644 --- a/browser/app/profile/firefox.js +++ b/browser/app/profile/firefox.js @@ -724,6 +724,9 @@ pref("browser.search.serpEventTelemetry.enabled", true); pref("browser.search.serpEventTelemetry.enabled", false); #endif +// Enables search SERP telemetry page categorization. +pref("browser.search.serpEventTelemetryCategorization.enabled", false); + // Enable new experimental shopping features. This is solely intended as a // rollout/"emergency stop" button - it will go away once the feature has // rolled out. There will be separate controls for user opt-in/opt-out. diff --git a/browser/components/search/SearchSERPTelemetry.sys.mjs b/browser/components/search/SearchSERPTelemetry.sys.mjs index 7051502d5798..3d6d4328dc2a 100644 --- a/browser/components/search/SearchSERPTelemetry.sys.mjs +++ b/browser/components/search/SearchSERPTelemetry.sys.mjs @@ -38,6 +38,13 @@ XPCOMUtils.defineLazyPreferenceGetter( false ); +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "serpEventTelemetryCategorization", + "browser.search.serpEventTelemetryCategorization.enabled", + false +); + export var SearchSERPTelemetryUtils = { ACTIONS: { CLICKED: "clicked", @@ -322,6 +329,10 @@ class TelemetryHandler { this._contentHandler._reportPageWithAdImpressions(info, browser); } + reportPageDomains(info, browser) { + this._contentHandler._reportPageDomains(info, browser); + } + reportPageImpression(info, browser) { this._contentHandler._reportPageImpression(info, browser); } @@ -1307,6 +1318,138 @@ class ContentHandler { lazy.logConsole.debug("Could not find an impression id."); } } + + /** + * Initiates the categorization and reporting of domains extracted from + * SERPs. + * + * @param {object} info + * The search provider infomation for the page. + * @param {Set} info.nonAdDomains + The non-ad domains extracted from the page. + * @param {Set} info.adDomains + The ad domains extracted from the page. + * @param {object} browser + * The browser associated with the page. + */ + _reportPageDomains(info, browser) { + let item = this._findBrowserItemForURL(info.url); + let telemetryState = item.browserTelemetryStateMap.get(browser); + if (lazy.serpEventTelemetryCategorization && telemetryState) { + let provider = item?.info.provider; + if (provider) { + SearchSERPCategorization.categorizeDomainsFromProvider( + info.nonAdDomains, + info.adDomains, + provider + ); + Services.obs.notifyObservers( + null, + "reported-page-with-categorized-domains" + ); + } + } + } +} + +/** + * Categorizes SERPs. + */ +class DomainCategorizer { + /** + * Categorizes domains extracted from SERPs. + * + * @param {Set} nonAdDomains + * The non-ad domains extracted from the page. + * @param {Set} adDomains + * The ad domains extracted from the page. + * @param {string} provider + * The provider associated with the page. + */ + categorizeDomainsFromProvider(nonAdDomains, adDomains, provider) { + nonAdDomains = this.processDomains(nonAdDomains, provider); + this.applyCategorizationLogic(nonAdDomains, false); + this.logDomains(nonAdDomains, false); + + adDomains = this.processDomains(adDomains, provider); + this.applyCategorizationLogic(adDomains, true); + this.logDomains(adDomains, true); + } + + // TODO: insert logic from DS for reducing extracted domains to a single + // category for the SERP. + applyCategorizationLogic(domains, areAdDomains) {} + + // TODO: replace this method once we know where to send the categorized + // domains and overall SERP category. + logDomains(domains, areAdDomains) { + if (domains?.size) { + lazy.logConsole.debug( + areAdDomains ? "Ad Domains:" : "Domains:", + ...domains + ); + } + } + + /** + * Processes raw domains extracted from the SERP into their final form before + * categorization. + * + * @param {Set} domains + * The domains extracted from the page. + * @param {string} provider + * The provider associated with the page. + * @returns {Set} processedDomains + * The final set of processed domains for a page. + */ + processDomains(domains, provider) { + let processedDomains = new Set(); + + for (let domain of domains) { + // Don't include domains associated with the search provider. + if ( + domain.startsWith(`${provider}.`) || + domain.includes(`.${provider}.`) + ) { + continue; + } + let domainWithoutSubdomains = this.#stripDomainOfSubdomains(domain); + // We may have come across the same domain twice, once with www. prefixed + // and another time without. + if ( + domainWithoutSubdomains && + !processedDomains.has(domainWithoutSubdomains) + ) { + processedDomains.add(domainWithoutSubdomains); + } + } + + return processedDomains; + } + + /** + * Helper to strip domains of any subdomains. + * + * @param {string} domain + * The domain to strip of any subdomains. + * @returns {object} browser + * The given domain with any subdomains removed. + */ + #stripDomainOfSubdomains(domain) { + let tld; + // Can throw an exception if the input has too few domain levels. + try { + tld = Services.eTLD.getKnownPublicSuffixFromHost(domain); + } catch (ex) { + return ""; + } + + let domainWithoutTLD = domain.substring(0, domain.length - tld.length); + let secondLevelDomain = domainWithoutTLD.split(".").at(-2); + + return secondLevelDomain ? `${secondLevelDomain}.${tld}` : ""; + } } export var SearchSERPTelemetry = new TelemetryHandler(); +export var SearchSERPCategorization = new DomainCategorizer(); diff --git a/browser/components/search/test/browser/telemetry/browser.ini b/browser/components/search/test/browser/telemetry/browser.ini index 1d9478637a2e..d8db428fbf7c 100644 --- a/browser/components/search/test/browser/telemetry/browser.ini +++ b/browser/components/search/test/browser/telemetry/browser.ini @@ -25,6 +25,12 @@ support-files = serp.css [browser_search_telemetry_categorization_timing.js] [browser_search_telemetry_content.js] +[browser_search_telemetry_domain_categorization_extraction.js] +support-files = + searchTelemetryDomainExtraction.html +[browser_search_telemetry_domain_categorization_reporting.js] +support-files = + searchTelemetryDomainCategorizationReporting.html [browser_search_telemetry_engagement_cached.js] support-files = cacheable.html diff --git a/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js new file mode 100644 index 000000000000..0eca028f47d5 --- /dev/null +++ b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_extraction.js @@ -0,0 +1,224 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +/* + * This test ensures we are correctly extracting domains from a SERP. + */ + +ChromeUtils.defineESModuleGetters(this, { + SearchSERPCategorization: "resource:///modules/SearchSERPTelemetry.sys.mjs", + SearchUtils: "resource://gre/modules/SearchUtils.sys.mjs", +}); + +const TESTS = [ + { + title: "Extract domain from href (absolute URL) - one link.", + extractorInfos: [ + { + selectors: + '#test1 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: ["foobar.com"], + }, + { + title: "Extract domain from href (absolute URL) - multiple links.", + extractorInfos: [ + { + selectors: + '#test2 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: ["foo.com", "bar.com", "baz.com", "qux.com"], + }, + { + title: "Extract domain from href (relative URL).", + extractorInfos: [ + { + selectors: + '#test3 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: ["example.org"], + }, + { + title: "Extract domain from data attribute - one link.", + extractorInfos: [ + { + selectors: "#test4 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + ], + expectedDomains: ["www.abc.com"], + }, + { + title: "Extract domain from data attribute - multiple links.", + extractorInfos: [ + { + selectors: "#test5 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + ], + expectedDomains: [ + "www.foo.com", + "www.bar.com", + "www.baz.com", + "www.qux.com", + ], + }, + { + title: "Extract domain from an href's query param value.", + extractorInfos: [ + { + selectors: + '#test6 .js-carousel-item-title, #test6 [data-layout="ad"] [data-testid="result-title-a"]', + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + expectedDomains: ["def.com"], + }, + { + title: "Extraction preserves order of domains within the page.", + extractorInfos: [ + { + selectors: + '#test7 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + { + selectors: "#test7 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + { + selectors: + '#test7 .js-carousel-item-title, #test7 [data-layout="ad"] [data-testid="result-title-a"]', + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + expectedDomains: ["foobar.com", "www.abc.com", "def.com"], + }, + { + title: "No elements match the selectors.", + extractorInfos: [ + { + selectors: + '#test8 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: [], + }, + { + title: "Data attribute is present, but value is missing.", + extractorInfos: [ + { + selectors: "#test9 [data-dtld]", + method: "data-attribute", + options: { + dataAttributeKey: "dtld", + }, + }, + ], + expectedDomains: [], + }, + { + title: "Query param is present, but value is missing.", + extractorInfos: [ + { + selectors: '#test10 [data-layout="ad"] [data-testid="result-title-a"]', + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + expectedDomains: [], + }, + { + title: "Non-standard URL scheme.", + extractorInfos: [ + { + selectors: + '#test11 [data-layout="organic"] a[data-testid="result-title-a"]', + method: "href", + }, + ], + expectedDomains: [], + }, +]; + +add_setup(async function () { + await waitForIdle(); + + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.search.log", true], + ["browser.search.serpEventTelemetry.enabled", true], + ["browser.search.serpEventTelemetryCategorization.enabled", true], + ], + }); + + await SearchSERPTelemetry.init(); + + registerCleanupFunction(async () => { + SearchSERPTelemetry.overrideSearchTelemetryForTests(); + resetTelemetry(); + }); +}); + +add_task(async function test_domain_extraction_heuristics() { + resetTelemetry(); + let url = getSERPUrl("searchTelemetryDomainExtraction.html"); + info( + "Load a sample SERP where domains need to be extracted in different ways." + ); + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, url); + + for (let currentTest of TESTS) { + if (currentTest.title) { + info(currentTest.title); + } + let expectedDomains = new Set(currentTest.expectedDomains); + let actualDomains = await SpecialPowers.spawn( + gBrowser.selectedBrowser, + [currentTest.extractorInfos], + extractorInfos => { + const { domainExtractor } = ChromeUtils.importESModule( + "resource:///actors/SearchSERPTelemetryChild.sys.mjs" + ); + return domainExtractor.extractDomainsFromDocument( + content.document, + extractorInfos + ); + } + ); + + Assert.deepEqual( + Array.from(actualDomains), + Array.from(expectedDomains), + "Domains should have been extracted correctly." + ); + } + + BrowserTestUtils.removeTab(tab); +}); diff --git a/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_reporting.js b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_reporting.js new file mode 100644 index 000000000000..7c17031f151d --- /dev/null +++ b/browser/components/search/test/browser/telemetry/browser_search_telemetry_domain_categorization_reporting.js @@ -0,0 +1,108 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +/* + * This test ensures we are correctly reporting categorized domains from a SERP. + */ + +ChromeUtils.defineESModuleGetters(this, { + SearchSERPCategorization: "resource:///modules/SearchSERPTelemetry.sys.mjs", + SearchUtils: "resource://gre/modules/SearchUtils.sys.mjs", + sinon: "resource://testing-common/Sinon.sys.mjs", +}); + +const TEST_PROVIDER_INFO = [ + { + telemetryId: "example", + searchPageRegexp: + /^https:\/\/example.org\/browser\/browser\/components\/search\/test\/browser\/telemetry\/searchTelemetry/, + queryParamName: "s", + codeParamName: "abc", + taggedCodes: ["ff"], + adServerAttributes: ["mozAttr"], + nonAdsLinkRegexps: [/^https:\/\/example.com/], + extraAdServersRegexps: [/^https:\/\/example\.com\/ad/], + // The search telemetry entry responsible for targeting the specific results. + domainExtraction: { + ads: [ + { + selectors: "[data-ad-domain]", + method: "data-attribute", + options: { + dataAttributeKey: "adDomain", + }, + }, + { + selectors: ".ad", + method: "href", + options: { + queryParamKey: "ad_domain", + }, + }, + ], + nonAds: [ + { + selectors: "#results .organic a", + method: "href", + }, + ], + }, + components: [ + { + type: SearchSERPTelemetryUtils.COMPONENTS.AD_LINK, + default: true, + }, + ], + }, +]; + +let stub; +add_setup(async function () { + SearchSERPTelemetry.overrideSearchTelemetryForTests(TEST_PROVIDER_INFO); + await waitForIdle(); + + await SpecialPowers.pushPrefEnv({ + set: [ + ["browser.search.log", true], + ["browser.search.serpEventTelemetry.enabled", true], + ["browser.search.serpEventTelemetryCategorization.enabled", true], + ], + }); + + await SearchSERPTelemetry.init(); + + stub = sinon.stub(SearchSERPCategorization, "logDomains"); + + registerCleanupFunction(async () => { + stub.restore(); + SearchSERPTelemetry.overrideSearchTelemetryForTests(); + resetTelemetry(); + }); +}); + +add_task(async function test_categorization_reporting() { + resetTelemetry(); + let url = getSERPUrl("searchTelemetryDomainCategorizationReporting.html"); + info("Load a sample SERP with organic results."); + let promise = waitForPageWithCategorizedDomains(); + let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, url); + await promise; + + // TODO: This needs to be refactored to actually test the reporting of the + // categorization. + Assert.deepEqual( + Array.from(stub.getCall(0).args[0]), + ["foobar.org"], + "Categorization of non-ads should match." + ); + + Assert.deepEqual( + Array.from(stub.getCall(1).args[0]), + ["abc.org", "def.org"], + "Categorization of ads should match." + ); + + BrowserTestUtils.removeTab(tab); +}); diff --git a/browser/components/search/test/browser/telemetry/head.js b/browser/components/search/test/browser/telemetry/head.js index a05441642e43..c66baafad3af 100644 --- a/browser/components/search/test/browser/telemetry/head.js +++ b/browser/components/search/test/browser/telemetry/head.js @@ -340,6 +340,22 @@ async function waitForPageWithAdImpressions() { }); } +async function waitForPageWithCategorizedDomains() { + return new Promise(resolve => { + let listener = win => { + Services.obs.removeObserver( + listener, + "reported-page-with-categorized-domains" + ); + resolve(); + }; + Services.obs.addObserver( + listener, + "reported-page-with-categorized-domains" + ); + }); +} + async function promiseImpressionReceived() { return TestUtils.waitForCondition(() => { let adImpressions = Glean.serp.adImpression.testGetValue() ?? []; diff --git a/browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorization.html b/browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorization.html new file mode 100644 index 000000000000..b9569ba2d637 --- /dev/null +++ b/browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorization.html @@ -0,0 +1,45 @@ + + + + + + Document + + + + +
+ + +
+ + diff --git a/browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorizationReporting.html b/browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorizationReporting.html new file mode 100644 index 000000000000..b9569ba2d637 --- /dev/null +++ b/browser/components/search/test/browser/telemetry/searchTelemetryDomainCategorizationReporting.html @@ -0,0 +1,45 @@ + + + + + + Document + + + + +
+ + +
+ + diff --git a/browser/components/search/test/browser/telemetry/searchTelemetryDomainExtraction.html b/browser/components/search/test/browser/telemetry/searchTelemetryDomainExtraction.html new file mode 100644 index 000000000000..3e359fc455e8 --- /dev/null +++ b/browser/components/search/test/browser/telemetry/searchTelemetryDomainExtraction.html @@ -0,0 +1,72 @@ + + + + + + Document + + + + + diff --git a/browser/components/search/test/unit/test_search_telemetry_categorization_process_domains.js b/browser/components/search/test/unit/test_search_telemetry_categorization_process_domains.js new file mode 100644 index 000000000000..643c20eb7d80 --- /dev/null +++ b/browser/components/search/test/unit/test_search_telemetry_categorization_process_domains.js @@ -0,0 +1,90 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +/* + * This test ensures we are correctly processing the domains that have been + * extracted from a SERP. + */ + +ChromeUtils.defineESModuleGetters(this, { + BrowserSearchTelemetry: "resource:///modules/BrowserSearchTelemetry.sys.mjs", + SearchSERPCategorization: "resource:///modules/SearchSERPTelemetry.sys.mjs", + SearchSERPTelemetry: "resource:///modules/SearchSERPTelemetry.sys.mjs", + SearchUtils: "resource://gre/modules/SearchUtils.sys.mjs", + sinon: "resource://testing-common/Sinon.sys.mjs", +}); + +// Links including the provider name are not extracted. +const PROVIDER = "example"; + +const TESTS = [ + { + title: "Domains matching the provider.", + domains: ["example.com", "www.example.com", "www.foobar.com"], + expected: ["foobar.com"], + }, + { + title: "Second-level domains to a top-level domain.", + domains: [ + "www.foobar.gc.ca", + "www.foobar.gov.uk", + "foobar.co.uk", + "www.foobar.co.il", + ], + expected: ["foobar.gc.ca", "foobar.gov.uk", "foobar.co.uk", "foobar.co.il"], + }, + { + title: "Long subdomain.", + domains: ["ab.cd.ef.gh.foobar.com"], + expected: ["foobar.com"], + }, + { + title: "Same top-level domain.", + domains: ["foobar.com", "www.foobar.com", "abc.def.foobar.com"], + expected: ["foobar.com"], + }, + { + title: "Empty input.", + domains: [""], + expected: [], + }, +]; + +add_setup(async function () { + Services.prefs.setBoolPref(SearchUtils.BROWSER_SEARCH_PREF + "log", true); + Services.prefs.setBoolPref( + SearchUtils.BROWSER_SEARCH_PREF + "serpEventTelemetry.enabled", + true + ); + Services.prefs.setBoolPref( + SearchUtils.BROWSER_SEARCH_PREF + + "serpEventTelemetryCategorization.enabled", + true + ); + + // Required or else BrowserSearchTelemetry will throw. + sinon.stub(BrowserSearchTelemetry, "shouldRecordSearchCount").returns(true); + await SearchSERPTelemetry.init(); +}); + +add_task(async function test_parsing_extracted_urls() { + for (let i = 0; i < TESTS.length; i++) { + let currentTest = TESTS[i]; + let domains = new Set(currentTest.domains); + + if (currentTest.title) { + info(currentTest.title); + } + let expectedDomains = new Set(currentTest.expected); + let actualDomains = SearchSERPCategorization.processDomains( + domains, + PROVIDER + ); + + Assert.deepEqual( + Array.from(actualDomains), + Array.from(expectedDomains), + "Domains should have been parsed correctly." + ); + } +}); diff --git a/browser/components/search/test/unit/xpcshell.ini b/browser/components/search/test/unit/xpcshell.ini index 7feeb6d38c22..c91061a4a4d9 100644 --- a/browser/components/search/test/unit/xpcshell.ini +++ b/browser/components/search/test/unit/xpcshell.ini @@ -2,6 +2,7 @@ skip-if = toolkit == 'android' # bug 1730213 firefox-appdir = browser +[test_search_telemetry_categorization_process_domains.js] [test_search_telemetry_config_validation.js] support-files = ../../schema/search-telemetry-schema.json