зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1854196 - Modify categorization logic based on input from Data Science. r=jteow
Differential Revision: https://phabricator.services.mozilla.com/D197215
This commit is contained in:
Родитель
77f14e48ab
Коммит
3c8e24ffbf
|
@ -45,7 +45,10 @@ export const SEARCH_TELEMETRY_SHARED = {
|
|||
const impressionIdsWithoutEngagementsSet = new Set();
|
||||
|
||||
export const CATEGORIZATION_SETTINGS = {
|
||||
HIGHEST_SCORE_THRESHOLD: 50,
|
||||
MAX_DOMAINS_TO_CATEGORIZE: 10,
|
||||
MINIMUM_SCORE: 0,
|
||||
STARTING_RANK: 2,
|
||||
};
|
||||
|
||||
ChromeUtils.defineLazyGetter(lazy, "logConsole", () => {
|
||||
|
@ -1663,7 +1666,6 @@ class SERPCategorizer {
|
|||
return resultsToReport;
|
||||
}
|
||||
|
||||
// TODO: check with DS to get the final aggregation logic. (Bug 1854196)
|
||||
/**
|
||||
* Applies the logic for reducing extracted domains to a single category for
|
||||
* the SERP.
|
||||
|
@ -1675,13 +1677,13 @@ class SERPCategorizer {
|
|||
* "num_unknown" and "num_inconclusive".
|
||||
*/
|
||||
applyCategorizationLogic(domains) {
|
||||
let totalScoresPerCategory = {};
|
||||
let domainInfo = {};
|
||||
let domainsCount = 0;
|
||||
let unknownsCount = 0;
|
||||
let inconclusivesCount = 0;
|
||||
|
||||
// Per a request from Data Science, we need to limit the number of domains
|
||||
// categorized to 10 non ad domains and 10 ad domains.
|
||||
// categorized to 10 non-ad domains and 10 ad domains.
|
||||
domains = new Set(
|
||||
[...domains].slice(0, CATEGORIZATION_SETTINGS.MAX_DOMAINS_TO_CATEGORIZE)
|
||||
);
|
||||
|
@ -1690,40 +1692,48 @@ class SERPCategorizer {
|
|||
domainsCount++;
|
||||
|
||||
let categoryCandidates = SearchSERPDomainToCategoriesMap.get(domain);
|
||||
|
||||
if (!categoryCandidates.length) {
|
||||
unknownsCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (let candidate of categoryCandidates) {
|
||||
if (
|
||||
candidate.category ==
|
||||
SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE
|
||||
) {
|
||||
inconclusivesCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (totalScoresPerCategory[candidate.category]) {
|
||||
totalScoresPerCategory[candidate.category] += candidate.score;
|
||||
} else {
|
||||
totalScoresPerCategory[candidate.category] = candidate.score;
|
||||
}
|
||||
let isInconclusive =
|
||||
(categoryCandidates.length == 1 &&
|
||||
categoryCandidates[0].category ==
|
||||
SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE) ||
|
||||
categoryCandidates.some(
|
||||
c =>
|
||||
c.category ==
|
||||
SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE &&
|
||||
c.score >= CATEGORIZATION_SETTINGS.HIGHEST_SCORE_THRESHOLD
|
||||
);
|
||||
if (isInconclusive) {
|
||||
inconclusivesCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
domainInfo[domain] = categoryCandidates;
|
||||
}
|
||||
|
||||
let finalCategory;
|
||||
let topCategories = [];
|
||||
// Determine if all domains were unknown or inconclusive.
|
||||
if (unknownsCount + inconclusivesCount == domainsCount) {
|
||||
finalCategory = SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE;
|
||||
} else {
|
||||
let maxScore = Math.max(...Object.values(totalScoresPerCategory));
|
||||
// Handles ties by randomly returning one of the categories with the
|
||||
// maximum score.
|
||||
let topCategories = [];
|
||||
for (let category in totalScoresPerCategory) {
|
||||
if (totalScoresPerCategory[category] == maxScore) {
|
||||
topCategories.push(Number(category));
|
||||
let maxScore = CATEGORIZATION_SETTINGS.MINIMUM_SCORE;
|
||||
let rank = CATEGORIZATION_SETTINGS.STARTING_RANK;
|
||||
for (let categoryCandidates of Object.values(domainInfo)) {
|
||||
for (let { category, score } of categoryCandidates) {
|
||||
let adjustedScore = score / Math.log2(rank);
|
||||
if (adjustedScore > maxScore) {
|
||||
maxScore = adjustedScore;
|
||||
topCategories = [category];
|
||||
} else if (adjustedScore == maxScore) {
|
||||
topCategories.push(Number(category));
|
||||
}
|
||||
rank++;
|
||||
}
|
||||
}
|
||||
finalCategory =
|
||||
|
|
|
@ -72,6 +72,28 @@ const TEST_DOMAIN_TO_CATEGORIES_MAP_TIE = {
|
|||
"+gl+dBhWE0nx0AM69m2g5w==": [11, 50, 12, 50],
|
||||
};
|
||||
|
||||
const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_1 = {
|
||||
"VSXaqgDKYWrJ/yjsFomUdg==": [1, 45],
|
||||
"6re74Kk34n2V6VCdLmCD5w==": [2, 45],
|
||||
"s8gOGIaFnly5hHX7nPncnw==": [3, 45],
|
||||
"zfRJyKV+2jd1RKNsSHm9pw==": [4, 45],
|
||||
"zcW+KbRfLRO6Dljf5qnuwQ==": [5, 45],
|
||||
"Rau9mfbBcIRiRQIliUxkow==": [6, 45],
|
||||
"4AFhUOmLQ8804doOsI4jBA==": [7, 45],
|
||||
"YZ3aEL73MR+Cjog0D7A24w==": [8, 45],
|
||||
"crMclD9rwInEQ30DpZLg+g==": [9, 45],
|
||||
"/r7oPRoE6LJAE95nuwmu7w==": [10, 45],
|
||||
};
|
||||
|
||||
const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_2 = {
|
||||
"sHWSmFwSYL3snycBZCY8Kg==": [1, 35, 2, 4],
|
||||
"FZ5zPYh6ByI0KGWKkmpDoA==": [1, 5, 2, 94],
|
||||
};
|
||||
|
||||
const TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_3 = {
|
||||
"WvodmXTKbmLPVwFSai5uMQ==": [0, 52, 3, 45],
|
||||
};
|
||||
|
||||
add_setup(async () => {
|
||||
Services.prefs.setBoolPref("browser.search.log", true);
|
||||
Services.prefs.setBoolPref(
|
||||
|
@ -282,3 +304,70 @@ add_task(async function test_categorization_tie() {
|
|||
"Should report the correct counts for the various domain types."
|
||||
);
|
||||
});
|
||||
|
||||
add_task(async function test_rank_penalization_equal_scores() {
|
||||
SearchSERPDomainToCategoriesMap.overrideMapForTests(
|
||||
TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_1
|
||||
);
|
||||
|
||||
let domains = new Set([
|
||||
"test51.com",
|
||||
"test52.com",
|
||||
"test53.com",
|
||||
"test54.com",
|
||||
"test55.com",
|
||||
"test56.com",
|
||||
"test57.com",
|
||||
"test58.com",
|
||||
"test59.com",
|
||||
"test60.com",
|
||||
]);
|
||||
|
||||
let resultsToReport =
|
||||
SearchSERPCategorization.applyCategorizationLogic(domains);
|
||||
|
||||
Assert.deepEqual(
|
||||
resultsToReport,
|
||||
{ category: "1", num_domains: 10, num_inconclusive: 0, num_unknown: 0 },
|
||||
"Should report the correct values for categorizing the SERP."
|
||||
);
|
||||
});
|
||||
|
||||
add_task(async function test_rank_penalization_highest_score_lower_on_page() {
|
||||
SearchSERPDomainToCategoriesMap.overrideMapForTests(
|
||||
TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_2
|
||||
);
|
||||
|
||||
let domains = new Set(["test61.com", "test62.com"]);
|
||||
|
||||
let resultsToReport =
|
||||
SearchSERPCategorization.applyCategorizationLogic(domains);
|
||||
|
||||
Assert.deepEqual(
|
||||
resultsToReport,
|
||||
{ category: "2", num_domains: 2, num_inconclusive: 0, num_unknown: 0 },
|
||||
"Should report the correct values for categorizing the SERP."
|
||||
);
|
||||
});
|
||||
|
||||
add_task(async function test_high_inconclusive_causes_domain_to_be_ignored() {
|
||||
SearchSERPDomainToCategoriesMap.overrideMapForTests(
|
||||
TEST_DOMAIN_TO_CATEGORIES_MAP_RANK_PENALIZATION_3
|
||||
);
|
||||
|
||||
let domains = new Set(["test63.com"]);
|
||||
|
||||
let resultsToReport =
|
||||
SearchSERPCategorization.applyCategorizationLogic(domains);
|
||||
|
||||
Assert.deepEqual(
|
||||
resultsToReport,
|
||||
{
|
||||
category: SearchSERPTelemetryUtils.CATEGORIZATION.INCONCLUSIVE,
|
||||
num_domains: 1,
|
||||
num_inconclusive: 1,
|
||||
num_unknown: 0,
|
||||
},
|
||||
"Should report the correct values for categorizing the SERP."
|
||||
);
|
||||
});
|
||||
|
|
Загрузка…
Ссылка в новой задаче