diff --git a/browser/components/search/SearchSERPTelemetry.sys.mjs b/browser/components/search/SearchSERPTelemetry.sys.mjs index 5b7227f22b23..2659cad3e7d4 100644 --- a/browser/components/search/SearchSERPTelemetry.sys.mjs +++ b/browser/components/search/SearchSERPTelemetry.sys.mjs @@ -45,7 +45,10 @@ export const SEARCH_TELEMETRY_SHARED = { const impressionIdsWithoutEngagementsSet = new Set(); export const CATEGORIZATION_SETTINGS = { + HIGHEST_SCORE_THRESHOLD: 50, MAX_DOMAINS_TO_CATEGORIZE: 10, + MINIMUM_SCORE: 0, + STARTING_RANK: 2, }; ChromeUtils.defineLazyGetter(lazy, "logConsole", () => { @@ -1663,7 +1666,6 @@ class SERPCategorizer { return resultsToReport; } - // TODO: check with DS to get the final aggregation logic. (Bug 1854196) /** * Applies the logic for reducing extracted domains to a single category for * the SERP. @@ -1675,13 +1677,13 @@ class SERPCategorizer { * "num_unknown" and "num_inconclusive". */ applyCategorizationLogic(domains) { - let totalScoresPerCategory = {}; + let domainInfo = {}; let domainsCount = 0; let unknownsCount = 0; let inconclusivesCount = 0; // Per a request from Data Science, we need to limit the number of domains - // categorized to 10 non ad domains and 10 ad domains. + // categorized to 10 non-ad domains and 10 ad domains. domains = new Set( [...domains].slice(0, CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE) ); @@ -1690,40 +1692,48 @@ class SERPCategorizer { domainsCount++; let categoryCandidates = SearchSERPDomainToCategoriesMap.get(domain); + if (!categoryCandidates.length) { unknownsCount++; continue; } - for (let candidate of categoryCandidates) { - if ( - candidate.category == - SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE - ) { - inconclusivesCount++; - continue; - } - - if (totalScoresPerCategory[candidate.category]) { - totalScoresPerCategory[candidate.category] += candidate.score; - } else { - totalScoresPerCategory[candidate.category] = candidate.score; - } + let isInconclusive = + (categoryCandidates.length == 1 && + categoryCandidates[0].category == + SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE) || + categoryCandidates.some( + c => + c.category == + SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE && + c.score >= CATEGORIZATION_SETTINGS.HIGHEST_SCORE_THRESHOLD + ); + if (isInconclusive) { + inconclusivesCount++; + continue; } + + domainInfo[domain] = categoryCandidates; } let finalCategory; + let topCategories = []; // Determine if all domains were unknown or inconclusive. if (unknownsCount + inconclusivesCount == domainsCount) { finalCategory = SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE; } else { - let maxScore = Math.max(...Object.values(totalScoresPerCategory)); - // Handles ties by randomly returning one of the categories with the - // maximum score. - let topCategories = []; - for (let category in totalScoresPerCategory) { - if (totalScoresPerCategory[category] == maxScore) { - topCategories.push(Number(category)); + let maxScore = CATEGORIZATION_SETTINGS.MINIMUM_SCORE; + let rank = CATEGORIZATION_SETTINGS.STARTING_RANK; + for (let categoryCandidates of Object.values(domainInfo)) { + for (let { category, score } of categoryCandidates) { + let adjustedScore = score / Math.log2(rank); + if (adjustedScore > maxScore) { + maxScore = adjustedScore; + topCategories = [category]; + } else if (adjustedScore == maxScore) { + topCategories.push(Number(category)); + } + rank++; } } finalCategory = diff --git a/browser/components/search/test/unit/test_search_telemetry_categorization_logic.js b/browser/components/search/test/unit/test_search_telemetry_categorization_logic.js index 84d4a46ea3a2..307c973ed4a0 100644 --- a/browser/components/search/test/unit/test_search_telemetry_categorization_logic.js +++ b/browser/components/search/test/unit/test_search_telemetry_categorization_logic.js @@ -72,6 +72,28 @@ const TEST_DOMAIN_TO_CATEGORIES_MAP_TIE = { "+gl+dBhWE0nx0AM69m2g5w==": [11, 50, 12, 50], }; +const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_1 = { + "VSXaqgDKYWrJ/yjsFomUdg==": [1, 45], + "6re74Kk34n2V6VCdLmCD5w==": [2, 45], + "s8gOGIaFnly5hHX7nPncnw==": [3, 45], + "zfRJyKV+2jd1RKNsSHm9pw==": [4, 45], + "zcW+KbRfLRO6Dljf5qnuwQ==": [5, 45], + "Rau9mfbBcIRiRQIliUxkow==": [6, 45], + "4AFhUOmLQ8804doOsI4jBA==": [7, 45], + "YZ3aEL73MR+Cjog0D7A24w==": [8, 45], + "crMclD9rwInEQ30DpZLg+g==": [9, 45], + "/r7oPRoE6LJAE95nuwmu7w==": [10, 45], +}; + +const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_2 = { + "sHWSmFwSYL3snycBZCY8Kg==": [1, 35, 2, 4], + "FZ5zPYh6ByI0KGWKkmpDoA==": [1, 5, 2, 94], +}; + +const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_3 = { + "WvodmXTKbmLPVwFSai5uMQ==": [0, 52, 3, 45], +}; + add_setup(async () => { Services.prefs.setBoolPref("browser.search.log", true); Services.prefs.setBoolPref( @@ -282,3 +304,70 @@ add_task(async function test_categorization_tie() { "Should report the correct counts for the various domain types." ); }); + +add_task(async function test_rank_penalization_equal_scores() { + SearchSERPDomainToCategoriesMap.overrideMapForTests( + TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_1 + ); + + let domains = new Set([ + "test51.com", + "test52.com", + "test53.com", + "test54.com", + "test55.com", + "test56.com", + "test57.com", + "test58.com", + "test59.com", + "test60.com", + ]); + + let resultsToReport = + SearchSERPCategorization.applyCategorizationLogic(domains); + + Assert.deepEqual( + resultsToReport, + { category: "1", num_domains: 10, num_inconclusive: 0, num_unknown: 0 }, + "Should report the correct values for categorizing the SERP." + ); +}); + +add_task(async function test_rank_penalization_highest_score_lower_on_page() { + SearchSERPDomainToCategoriesMap.overrideMapForTests( + TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_2 + ); + + let domains = new Set(["test61.com", "test62.com"]); + + let resultsToReport = + SearchSERPCategorization.applyCategorizationLogic(domains); + + Assert.deepEqual( + resultsToReport, + { category: "2", num_domains: 2, num_inconclusive: 0, num_unknown: 0 }, + "Should report the correct values for categorizing the SERP." + ); +}); + +add_task(async function test_high_inconclusive_causes_domain_to_be_ignored() { + SearchSERPDomainToCategoriesMap.overrideMapForTests( + TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_3 + ); + + let domains = new Set(["test63.com"]); + + let resultsToReport = + SearchSERPCategorization.applyCategorizationLogic(domains); + + Assert.deepEqual( + resultsToReport, + { + category: SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE, + num_domains: 1, + num_inconclusive: 1, + num_unknown: 0, + }, + "Should report the correct values for categorizing the SERP." + ); +});