activity-stream/lib/UserDomainAffinityProvider.jsm

331 строка
13 KiB
JavaScript

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
ChromeUtils.import("resource://gre/modules/Services.jsm");
ChromeUtils.defineModuleGetter(this, "PlacesUtils",
"resource://gre/modules/PlacesUtils.jsm");
const DEFAULT_TIME_SEGMENTS = [
{"id": "hour", "startTime": 3600, "endTime": 0, "weightPosition": 1},
{"id": "day", "startTime": 86400, "endTime": 3600, "weightPosition": 0.75},
{"id": "week", "startTime": 604800, "endTime": 86400, "weightPosition": 0.5},
{"id": "weekPlus", "startTime": 0, "endTime": 604800, "weightPosition": 0.25},
{"id": "alltime", "startTime": 0, "endTime": 0, "weightPosition": 0.25},
];
const DEFAULT_PARAMETER_SETS = {
"linear-frequency": {
"recencyFactor": 0.4,
"frequencyFactor": 0.5,
"combinedDomainFactor": 0.5,
"perfectFrequencyVisits": 10,
"perfectCombinedDomainScore": 2,
"multiDomainBoost": 0.1,
"itemScoreFactor": 0,
},
};
const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
function merge(...args) {
return Object.assign.apply(this, args);
}
/**
* Provides functionality to personalize content recommendations by calculating
* user domain affinity scores. These scores are used to calculate relevance
* scores for items/recs/stories that have domain affinities.
*
* The algorithm works as follows:
*
* - The recommendation endpoint returns a settings object containing
* timeSegments and parametersets.
*
* - For every time segment we calculate the corresponding domain visit counts,
* yielding result objects of the following structure: {"mozilla.org": 12,
* "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
*
* - These visit counts are transformed to domain affinity scores for all
* provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
* "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
* (see UserDomainAffinityProvider#calculateScoresForParameterSets)
*
* - The parameter sets provide factors for weighting which allows for
* flexible targeting. The functionality to calculate final scores can
* be seen in UserDomainAffinityProvider#calculateScores
*
* - The user domain affinity scores are summed up across all time segments
* see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
*
* - An item's domain affinities are matched to the user's domain affinity
* scores by calculating an item relevance score
* (see UserDomainAffinityProvider#calculateItemRelevanceScore)
*
* - The item relevance scores are used to sort items (see TopStoriesFeed for
* more details)
*
* - The data structure was chosen to allow for fast cache lookups during
* relevance score calculation. While user domain affinities are calculated
* infrequently (i.e. only once a day), the item relevance score (potentially)
* needs to be calculated every time the feed updates. Therefore allowing cache
* lookups of scores[domain][parameterSet] is beneficial
*/
this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
constructor(
timeSegments = DEFAULT_TIME_SEGMENTS,
parameterSets = DEFAULT_PARAMETER_SETS,
maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
version,
scores) {
this.timeSegments = timeSegments;
this.maxHistoryQueryResults = maxHistoryQueryResults;
this.version = version;
if (scores) {
this.parameterSets = parameterSets;
this.scores = scores;
} else {
this.parameterSets = this.prepareParameterSets(parameterSets);
this.scores = this.calculateAllUserDomainAffinityScores();
}
}
/**
* Adds dynamic parameters to the given parameter sets that need to be
* computed based on time segments.
*
* @param ps The parameter sets
* @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
*/
prepareParameterSets(ps) {
return Object
.keys(ps)
// Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
.map(k => ({[k]: merge(ps[k], {timeSegmentWeights: this.calculateTimeSegmentWeights(ps[k].recencyFactor)})}))
.reduce((acc, cur) => merge(acc, cur));
}
/**
* Calculates a time segment weight based on the provided recencyFactor.
*
* @param recencyFactor The recency factor indicating how to weigh recency
* @return An object containing time segment weights: {"hour": 0.987, "day": 1}
*/
calculateTimeSegmentWeights(recencyFactor) {
return this.timeSegments
.reduce((acc, cur) => merge(acc, ({[cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor)})), {});
}
/**
* Calculates user domain affinity scores based on browsing history and the
* available times segments and parameter sets.
*/
calculateAllUserDomainAffinityScores() {
return this.timeSegments
// Calculate parameter set specific domain scores for each time segment
// => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
.map(ts => this.calculateUserDomainAffinityScores(ts))
// Keep format, but reduce to single object, with combined scores across all time segments
// => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
.reduce((acc, cur) => this._combineScores(acc, cur));
}
/**
* Calculates the user domain affinity scores for the given time segment.
*
* @param ts The time segment
* @return The parameter specific scores for all domains with visits in
* this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
*/
calculateUserDomainAffinityScores(ts) {
// Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
let visits = this.queryVisits(ts);
return Object
.keys(visits)
.reduce((acc, d) => merge(acc, {[d]: this.calculateScoresForParameterSets(ts, visits[d])}), {});
}
/**
* Calculates the scores for all parameter sets for the given time segment
* and domain visit count.
*
* @param ts The time segment
* @param vc The domain visit count in the given time segment
* @return The parameter specific scores for the visit count in
* this time segment: {"ps1": 12, "ps2": 34}
*/
calculateScoresForParameterSets(ts, vc) {
return Object
.keys(this.parameterSets)
.reduce((acc, ps) => merge(acc, {[ps]: this.calculateScoreForParameterSet(ts, vc, this.parameterSets[ps])}), {});
}
/**
* Calculates the final affinity score in the given time segment for the given parameter set
*
* @param timeSegment The time segment
* @param visitCount The domain visit count in the given time segment
* @param parameterSet The parameter set to use for scoring
* @return The final score
*/
calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
return this.calculateScore(
visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
parameterSet.perfectFrequencyVisits,
parameterSet.frequencyFactor);
}
/**
* Keeps the same format, but reduces the two objects to a single object, with
* combined scores across all time segments => {a.com":{"ps1":2,"ps2":2},
* "b.com":{"ps1":3,"ps2":3}}
*/
_combineScores(a, b) {
// Merge both score objects so we get a combined object holding all domains.
// This is so we can combine them without missing domains that are in a and not in b and vice versa.
const c = merge({}, a, b);
return Object.keys(c).reduce((acc, d) => merge(acc, this._combine(a, b, c, d)), {});
}
_combine(a, b, c, d) {
return Object
.keys(c[d])
// Summing up the parameter set specific scores of each domain
.map(ps => ({[d]: {[ps]: Math.min(1, ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0))}}))
// Reducing from an array of objects with a single parameter set to a single object
// [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
.reduce((acc, cur) => ({[d]: merge(acc[d], cur[d])}));
}
/**
* Calculates a value on the curve described by the provided parameters. The curve we're using is
* (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
*
* @param {number} score A value between 0 and maxScore, representing x.
* @param {number} maxScore Highest possible score.
* @param {number} factor The slope describing the curve to get to maxScore. A low slope value
* [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
* a slope of exactly 0.5 is linear.
* @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
* effect of the slope can be. This represents b in the formula above.
* @return {number} the final score
*/
calculateScore(score, maxScore, factor, ease = 2) {
let a = 0;
let x = Math.max(0, score / maxScore);
if (x >= 1) {
return 1;
}
if (factor === 0.5) {
return x;
}
if (factor < 0.5) {
// We want a log-shaped curve so we scale "a" between 0 and .99
a = (factor / 0.5) * 0.49;
} else if (factor > 0.5) {
// We want an exp-shaped curve so we scale "a" between 1.01 and 10
a = 1 + (factor - 0.5) / 0.5 * 9;
}
return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
}
/**
* Queries the visit counts in the given time segment.
*
* @param ts the time segment
* @return the visit count object: {"a.com": 1, "b.com": 2}
*/
queryVisits(ts) {
const visitCounts = {};
const query = PlacesUtils.history.getNewQuery();
const wwwRegEx = /^www\./;
query.beginTimeReference = query.TIME_RELATIVE_NOW;
query.beginTime = (ts.startTime && ts.startTime !== 0) ? -(ts.startTime * 1000 * 1000) : -(Date.now() * 1000);
query.endTimeReference = query.TIME_RELATIVE_NOW;
query.endTime = (ts.endTime && ts.endTime !== 0) ? -(ts.endTime * 1000 * 1000) : 0;
const options = PlacesUtils.history.getNewQueryOptions();
options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
options.maxResults = this.maxHistoryQueryResults;
const {root} = PlacesUtils.history.executeQuery(query, options);
root.containerOpen = true;
for (let i = 0; i < root.childCount; i++) {
let node = root.getChild(i);
let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
if (!visitCounts[host]) {
visitCounts[host] = 0;
}
visitCounts[host] += node.accessCount;
}
root.containerOpen = false;
return visitCounts;
}
/**
* Calculates an item's relevance score.
*
* @param item the item (story), must contain domain affinities, otherwise a
* score of 1 is returned.
* @return the calculated item's score or 1 if item has no domain_affinities
* or references an unknown parameter set.
*/
calculateItemRelevanceScore(item) {
const params = this.parameterSets[item.parameter_set];
if (!item.domain_affinities || !params) {
return item.item_score;
}
const scores = Object
.keys(item.domain_affinities)
.reduce((acc, d) => {
let userDomainAffinityScore = this.scores[d] ? this.scores[d][item.parameter_set] : false;
if (userDomainAffinityScore) {
acc.combinedDomainScore += userDomainAffinityScore * item.domain_affinities[d];
acc.matchingDomainsCount++;
}
return acc;
}, {combinedDomainScore: 0, matchingDomainsCount: 0});
// Boost the score as configured in the provided parameter set
const boostedCombinedDomainScore = scores.combinedDomainScore *
Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
// Calculate what the score would be if the item score is ignored
const normalizedCombinedDomainScore = this.calculateScore(boostedCombinedDomainScore,
params.perfectCombinedDomainScore,
params.combinedDomainFactor);
// Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
// allows weighting the item score in relation to the normalizedCombinedDomainScore:
// An itemScoreFactor of 1 results in the item score and ignores the combined domain score
// An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
// An itemScoreFactor of 0 results in the combined domain score and ignores the item score
return params.itemScoreFactor * (item.item_score - normalizedCombinedDomainScore) + normalizedCombinedDomainScore;
}
/**
* Returns an object holding the settings and affinity scores of this provider instance.
*/
getAffinities() {
return {
timeSegments: this.timeSegments,
parameterSets: this.parameterSets,
maxHistoryQueryResults: this.maxHistoryQueryResults,
version: this.version,
scores: this.scores,
};
}
};
const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];