Bug 1854196 - Modify categorization logic based on input from Data Science. r=jteow

Differential Revision: https://phabricator.services.mozilla.com/D197215
This commit is contained in:
Stephanie Cunnane 2023-12-28 22:37:41 +00:00
Родитель 77f14e48ab
Коммит 3c8e24ffbf
2 изменённых файлов: 123 добавлений и 24 удалений

Просмотреть файл

@ -45,7 +45,10 @@ export const SEARCH_TELEMETRY_SHARED = {
const impressionIdsWithoutEngagementsSet = new Set();
export const CATEGORIZATION_SETTINGS = {
HIGHEST_SCORE_THRESHOLD: 50,
MAX_DOMAINS_TO_CATEGORIZE: 10,
MINIMUM_SCORE: 0,
STARTING_RANK: 2,
};
ChromeUtils.defineLazyGetter(lazy, "logConsole", () => {
@ -1663,7 +1666,6 @@ class SERPCategorizer {
return resultsToReport;
}
// TODO: check with DS to get the final aggregation logic. (Bug 1854196)
/**
* Applies the logic for reducing extracted domains to a single category for
* the SERP.
@ -1675,13 +1677,13 @@ class SERPCategorizer {
* "num_unknown" and "num_inconclusive".
*/
applyCategorizationLogic(domains) {
let totalScoresPerCategory = {};
let domainInfo = {};
let domainsCount = 0;
let unknownsCount = 0;
let inconclusivesCount = 0;
// Per a request from Data Science, we need to limit the number of domains
// categorized to 10 non ad domains and 10 ad domains.
// categorized to 10 non-ad domains and 10 ad domains.
domains = new Set(
[...domains].slice(0, CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE)
);
@ -1690,40 +1692,48 @@ class SERPCategorizer {
domainsCount++;
let categoryCandidates = SearchSERPDomainToCategoriesMap.get(domain);
if (!categoryCandidates.length) {
unknownsCount++;
continue;
}
for (let candidate of categoryCandidates) {
if (
candidate.category ==
SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE
) {
inconclusivesCount++;
continue;
}
if (totalScoresPerCategory[candidate.category]) {
totalScoresPerCategory[candidate.category] += candidate.score;
} else {
totalScoresPerCategory[candidate.category] = candidate.score;
}
let isInconclusive =
(categoryCandidates.length == 1 &&
categoryCandidates[0].category ==
SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE) ||
categoryCandidates.some(
c =>
c.category ==
SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE &&
c.score >= CATEGORIZATION_SETTINGS.HIGHEST_SCORE_THRESHOLD
);
if (isInconclusive) {
inconclusivesCount++;
continue;
}
domainInfo[domain] = categoryCandidates;
}
let finalCategory;
let topCategories = [];
// Determine if all domains were unknown or inconclusive.
if (unknownsCount + inconclusivesCount == domainsCount) {
finalCategory = SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE;
} else {
let maxScore = Math.max(...Object.values(totalScoresPerCategory));
// Handles ties by randomly returning one of the categories with the
// maximum score.
let topCategories = [];
for (let category in totalScoresPerCategory) {
if (totalScoresPerCategory[category] == maxScore) {
topCategories.push(Number(category));
let maxScore = CATEGORIZATION_SETTINGS.MINIMUM_SCORE;
let rank = CATEGORIZATION_SETTINGS.STARTING_RANK;
for (let categoryCandidates of Object.values(domainInfo)) {
for (let { category, score } of categoryCandidates) {
let adjustedScore = score / Math.log2(rank);
if (adjustedScore > maxScore) {
maxScore = adjustedScore;
topCategories = [category];
} else if (adjustedScore == maxScore) {
topCategories.push(Number(category));
}
rank++;
}
}
finalCategory =

Просмотреть файл

@ -72,6 +72,28 @@ const TEST_DOMAIN_TO_CATEGORIES_MAP_TIE = {
"+gl+dBhWE0nx0AM69m2g5w==": [11, 50, 12, 50],
};
const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_1 = {
"VSXaqgDKYWrJ/yjsFomUdg==": [1, 45],
"6re74Kk34n2V6VCdLmCD5w==": [2, 45],
"s8gOGIaFnly5hHX7nPncnw==": [3, 45],
"zfRJyKV+2jd1RKNsSHm9pw==": [4, 45],
"zcW+KbRfLRO6Dljf5qnuwQ==": [5, 45],
"Rau9mfbBcIRiRQIliUxkow==": [6, 45],
"4AFhUOmLQ8804doOsI4jBA==": [7, 45],
"YZ3aEL73MR+Cjog0D7A24w==": [8, 45],
"crMclD9rwInEQ30DpZLg+g==": [9, 45],
"/r7oPRoE6LJAE95nuwmu7w==": [10, 45],
};
const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_2 = {
"sHWSmFwSYL3snycBZCY8Kg==": [1, 35, 2, 4],
"FZ5zPYh6ByI0KGWKkmpDoA==": [1, 5, 2, 94],
};
const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_3 = {
"WvodmXTKbmLPVwFSai5uMQ==": [0, 52, 3, 45],
};
add_setup(async () => {
Services.prefs.setBoolPref("browser.search.log", true);
Services.prefs.setBoolPref(
@ -282,3 +304,70 @@ add_task(async function test_categorization_tie() {
"Should report the correct counts for the various domain types."
);
});
add_task(async function test_rank_penalization_equal_scores() {
SearchSERPDomainToCategoriesMap.overrideMapForTests(
TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_1
);
let domains = new Set([
"test51.com",
"test52.com",
"test53.com",
"test54.com",
"test55.com",
"test56.com",
"test57.com",
"test58.com",
"test59.com",
"test60.com",
]);
let resultsToReport =
SearchSERPCategorization.applyCategorizationLogic(domains);
Assert.deepEqual(
resultsToReport,
{ category: "1", num_domains: 10, num_inconclusive: 0, num_unknown: 0 },
"Should report the correct values for categorizing the SERP."
);
});
add_task(async function test_rank_penalization_highest_score_lower_on_page() {
SearchSERPDomainToCategoriesMap.overrideMapForTests(
TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_2
);
let domains = new Set(["test61.com", "test62.com"]);
let resultsToReport =
SearchSERPCategorization.applyCategorizationLogic(domains);
Assert.deepEqual(
resultsToReport,
{ category: "2", num_domains: 2, num_inconclusive: 0, num_unknown: 0 },
"Should report the correct values for categorizing the SERP."
);
});
add_task(async function test_high_inconclusive_causes_domain_to_be_ignored() {
SearchSERPDomainToCategoriesMap.overrideMapForTests(
TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_3
);
let domains = new Set(["test63.com"]);
let resultsToReport =
SearchSERPCategorization.applyCategorizationLogic(domains);
Assert.deepEqual(
resultsToReport,
{
category: SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE,
num_domains: 1,
num_inconclusive: 1,
num_unknown: 0,
},
"Should report the correct values for categorizing the SERP."
);
});