331 строка
13 KiB
JavaScript
331 строка
13 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
"use strict";
|
|
|
|
ChromeUtils.import("resource://gre/modules/Services.jsm");
|
|
|
|
ChromeUtils.defineModuleGetter(this, "PlacesUtils",
|
|
"resource://gre/modules/PlacesUtils.jsm");
|
|
|
|
const DEFAULT_TIME_SEGMENTS = [
|
|
{"id": "hour", "startTime": 3600, "endTime": 0, "weightPosition": 1},
|
|
{"id": "day", "startTime": 86400, "endTime": 3600, "weightPosition": 0.75},
|
|
{"id": "week", "startTime": 604800, "endTime": 86400, "weightPosition": 0.5},
|
|
{"id": "weekPlus", "startTime": 0, "endTime": 604800, "weightPosition": 0.25},
|
|
{"id": "alltime", "startTime": 0, "endTime": 0, "weightPosition": 0.25},
|
|
];
|
|
|
|
const DEFAULT_PARAMETER_SETS = {
|
|
"linear-frequency": {
|
|
"recencyFactor": 0.4,
|
|
"frequencyFactor": 0.5,
|
|
"combinedDomainFactor": 0.5,
|
|
"perfectFrequencyVisits": 10,
|
|
"perfectCombinedDomainScore": 2,
|
|
"multiDomainBoost": 0.1,
|
|
"itemScoreFactor": 0,
|
|
},
|
|
};
|
|
|
|
const DEFAULT_MAX_HISTORY_QUERY_RESULTS = 1000;
|
|
|
|
function merge(...args) {
|
|
return Object.assign.apply(this, args);
|
|
}
|
|
|
|
/**
|
|
* Provides functionality to personalize content recommendations by calculating
|
|
* user domain affinity scores. These scores are used to calculate relevance
|
|
* scores for items/recs/stories that have domain affinities.
|
|
*
|
|
* The algorithm works as follows:
|
|
*
|
|
* - The recommendation endpoint returns a settings object containing
|
|
* timeSegments and parametersets.
|
|
*
|
|
* - For every time segment we calculate the corresponding domain visit counts,
|
|
* yielding result objects of the following structure: {"mozilla.org": 12,
|
|
* "mozilla.com": 34} (see UserDomainAffinityProvider#queryVisits)
|
|
*
|
|
* - These visit counts are transformed to domain affinity scores for all
|
|
* provided parameter sets: {"mozilla.org": {"paramSet1": 0.8,
|
|
* "paramSet2": 0.9}, "mozilla.org": {"paramSet1": 1, "paramSet2": 0.9}}
|
|
* (see UserDomainAffinityProvider#calculateScoresForParameterSets)
|
|
*
|
|
* - The parameter sets provide factors for weighting which allows for
|
|
* flexible targeting. The functionality to calculate final scores can
|
|
* be seen in UserDomainAffinityProvider#calculateScores
|
|
*
|
|
* - The user domain affinity scores are summed up across all time segments
|
|
* see UserDomainAffinityProvider#calculateAllUserDomainAffinityScores
|
|
*
|
|
* - An item's domain affinities are matched to the user's domain affinity
|
|
* scores by calculating an item relevance score
|
|
* (see UserDomainAffinityProvider#calculateItemRelevanceScore)
|
|
*
|
|
* - The item relevance scores are used to sort items (see TopStoriesFeed for
|
|
* more details)
|
|
*
|
|
* - The data structure was chosen to allow for fast cache lookups during
|
|
* relevance score calculation. While user domain affinities are calculated
|
|
* infrequently (i.e. only once a day), the item relevance score (potentially)
|
|
* needs to be calculated every time the feed updates. Therefore allowing cache
|
|
* lookups of scores[domain][parameterSet] is beneficial
|
|
*/
|
|
this.UserDomainAffinityProvider = class UserDomainAffinityProvider {
|
|
constructor(
|
|
timeSegments = DEFAULT_TIME_SEGMENTS,
|
|
parameterSets = DEFAULT_PARAMETER_SETS,
|
|
maxHistoryQueryResults = DEFAULT_MAX_HISTORY_QUERY_RESULTS,
|
|
version,
|
|
scores) {
|
|
this.timeSegments = timeSegments;
|
|
this.maxHistoryQueryResults = maxHistoryQueryResults;
|
|
this.version = version;
|
|
if (scores) {
|
|
this.parameterSets = parameterSets;
|
|
this.scores = scores;
|
|
} else {
|
|
this.parameterSets = this.prepareParameterSets(parameterSets);
|
|
this.scores = this.calculateAllUserDomainAffinityScores();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Adds dynamic parameters to the given parameter sets that need to be
|
|
* computed based on time segments.
|
|
*
|
|
* @param ps The parameter sets
|
|
* @return Updated parameter sets with additional fields (i.e. timeSegmentWeights)
|
|
*/
|
|
prepareParameterSets(ps) {
|
|
return Object
|
|
.keys(ps)
|
|
// Add timeSegmentWeight fields to param sets e.g. timeSegmentWeights: {"hour": 1, "day": 0.8915, ...}
|
|
.map(k => ({[k]: merge(ps[k], {timeSegmentWeights: this.calculateTimeSegmentWeights(ps[k].recencyFactor)})}))
|
|
.reduce((acc, cur) => merge(acc, cur));
|
|
}
|
|
|
|
/**
|
|
* Calculates a time segment weight based on the provided recencyFactor.
|
|
*
|
|
* @param recencyFactor The recency factor indicating how to weigh recency
|
|
* @return An object containing time segment weights: {"hour": 0.987, "day": 1}
|
|
*/
|
|
calculateTimeSegmentWeights(recencyFactor) {
|
|
return this.timeSegments
|
|
.reduce((acc, cur) => merge(acc, ({[cur.id]: this.calculateScore(cur.weightPosition, 1, recencyFactor)})), {});
|
|
}
|
|
|
|
/**
|
|
* Calculates user domain affinity scores based on browsing history and the
|
|
* available times segments and parameter sets.
|
|
*/
|
|
calculateAllUserDomainAffinityScores() {
|
|
return this.timeSegments
|
|
// Calculate parameter set specific domain scores for each time segment
|
|
// => [{"a.com": {"ps1": 12, "ps2": 34}, "b.com": {"ps1": 56, "ps2": 78}}, ...]
|
|
.map(ts => this.calculateUserDomainAffinityScores(ts))
|
|
// Keep format, but reduce to single object, with combined scores across all time segments
|
|
// => "{a.com":{"ps1":2,"ps2":2}, "b.com":{"ps1":3,"ps2":3}}""
|
|
.reduce((acc, cur) => this._combineScores(acc, cur));
|
|
}
|
|
|
|
/**
|
|
* Calculates the user domain affinity scores for the given time segment.
|
|
*
|
|
* @param ts The time segment
|
|
* @return The parameter specific scores for all domains with visits in
|
|
* this time segment: {"a.com": {"ps1": 12, "ps2": 34}, "b.com" ...}
|
|
*/
|
|
calculateUserDomainAffinityScores(ts) {
|
|
// Returns domains and visit counts for this time segment: {"a.com": 1, "b.com": 2}
|
|
let visits = this.queryVisits(ts);
|
|
|
|
return Object
|
|
.keys(visits)
|
|
.reduce((acc, d) => merge(acc, {[d]: this.calculateScoresForParameterSets(ts, visits[d])}), {});
|
|
}
|
|
|
|
/**
|
|
* Calculates the scores for all parameter sets for the given time segment
|
|
* and domain visit count.
|
|
*
|
|
* @param ts The time segment
|
|
* @param vc The domain visit count in the given time segment
|
|
* @return The parameter specific scores for the visit count in
|
|
* this time segment: {"ps1": 12, "ps2": 34}
|
|
*/
|
|
calculateScoresForParameterSets(ts, vc) {
|
|
return Object
|
|
.keys(this.parameterSets)
|
|
.reduce((acc, ps) => merge(acc, {[ps]: this.calculateScoreForParameterSet(ts, vc, this.parameterSets[ps])}), {});
|
|
}
|
|
|
|
/**
|
|
* Calculates the final affinity score in the given time segment for the given parameter set
|
|
*
|
|
* @param timeSegment The time segment
|
|
* @param visitCount The domain visit count in the given time segment
|
|
* @param parameterSet The parameter set to use for scoring
|
|
* @return The final score
|
|
*/
|
|
calculateScoreForParameterSet(timeSegment, visitCount, parameterSet) {
|
|
return this.calculateScore(
|
|
visitCount * parameterSet.timeSegmentWeights[timeSegment.id],
|
|
parameterSet.perfectFrequencyVisits,
|
|
parameterSet.frequencyFactor);
|
|
}
|
|
|
|
/**
|
|
* Keeps the same format, but reduces the two objects to a single object, with
|
|
* combined scores across all time segments => {a.com":{"ps1":2,"ps2":2},
|
|
* "b.com":{"ps1":3,"ps2":3}}
|
|
*/
|
|
_combineScores(a, b) {
|
|
// Merge both score objects so we get a combined object holding all domains.
|
|
// This is so we can combine them without missing domains that are in a and not in b and vice versa.
|
|
const c = merge({}, a, b);
|
|
return Object.keys(c).reduce((acc, d) => merge(acc, this._combine(a, b, c, d)), {});
|
|
}
|
|
|
|
_combine(a, b, c, d) {
|
|
return Object
|
|
.keys(c[d])
|
|
// Summing up the parameter set specific scores of each domain
|
|
.map(ps => ({[d]: {[ps]: Math.min(1, ((a[d] && a[d][ps]) || 0) + ((b[d] && b[d][ps]) || 0))}}))
|
|
// Reducing from an array of objects with a single parameter set to a single object
|
|
// [{"a.com":{"ps1":11}}, {"a.com: {"ps2":12}}] => {"a.com":{"ps1":11,"ps2":12}}
|
|
.reduce((acc, cur) => ({[d]: merge(acc[d], cur[d])}));
|
|
}
|
|
|
|
/**
|
|
* Calculates a value on the curve described by the provided parameters. The curve we're using is
|
|
* (a^(b*x) - 1) / (a^b - 1): https://www.desmos.com/calculator/maqhpttupp
|
|
*
|
|
* @param {number} score A value between 0 and maxScore, representing x.
|
|
* @param {number} maxScore Highest possible score.
|
|
* @param {number} factor The slope describing the curve to get to maxScore. A low slope value
|
|
* [0, 0.5] results in a log-shaped curve, a high slope [0.5, 1] results in a exp-shaped curve,
|
|
* a slope of exactly 0.5 is linear.
|
|
* @param {number} ease Adjusts how much bend is in the curve i.e. how dramatic the maximum
|
|
* effect of the slope can be. This represents b in the formula above.
|
|
* @return {number} the final score
|
|
*/
|
|
calculateScore(score, maxScore, factor, ease = 2) {
|
|
let a = 0;
|
|
let x = Math.max(0, score / maxScore);
|
|
|
|
if (x >= 1) {
|
|
return 1;
|
|
}
|
|
|
|
if (factor === 0.5) {
|
|
return x;
|
|
}
|
|
|
|
if (factor < 0.5) {
|
|
// We want a log-shaped curve so we scale "a" between 0 and .99
|
|
a = (factor / 0.5) * 0.49;
|
|
} else if (factor > 0.5) {
|
|
// We want an exp-shaped curve so we scale "a" between 1.01 and 10
|
|
a = 1 + (factor - 0.5) / 0.5 * 9;
|
|
}
|
|
|
|
return (Math.pow(a, ease * x) - 1) / (Math.pow(a, ease) - 1);
|
|
}
|
|
|
|
/**
|
|
* Queries the visit counts in the given time segment.
|
|
*
|
|
* @param ts the time segment
|
|
* @return the visit count object: {"a.com": 1, "b.com": 2}
|
|
*/
|
|
queryVisits(ts) {
|
|
const visitCounts = {};
|
|
const query = PlacesUtils.history.getNewQuery();
|
|
const wwwRegEx = /^www\./;
|
|
|
|
query.beginTimeReference = query.TIME_RELATIVE_NOW;
|
|
query.beginTime = (ts.startTime && ts.startTime !== 0) ? -(ts.startTime * 1000 * 1000) : -(Date.now() * 1000);
|
|
|
|
query.endTimeReference = query.TIME_RELATIVE_NOW;
|
|
query.endTime = (ts.endTime && ts.endTime !== 0) ? -(ts.endTime * 1000 * 1000) : 0;
|
|
|
|
const options = PlacesUtils.history.getNewQueryOptions();
|
|
options.sortingMode = options.SORT_BY_VISITCOUNT_DESCENDING;
|
|
options.maxResults = this.maxHistoryQueryResults;
|
|
|
|
const {root} = PlacesUtils.history.executeQuery(query, options);
|
|
root.containerOpen = true;
|
|
for (let i = 0; i < root.childCount; i++) {
|
|
let node = root.getChild(i);
|
|
let host = Services.io.newURI(node.uri).host.replace(wwwRegEx, "");
|
|
if (!visitCounts[host]) {
|
|
visitCounts[host] = 0;
|
|
}
|
|
visitCounts[host] += node.accessCount;
|
|
}
|
|
root.containerOpen = false;
|
|
return visitCounts;
|
|
}
|
|
|
|
/**
|
|
* Calculates an item's relevance score.
|
|
*
|
|
* @param item the item (story), must contain domain affinities, otherwise a
|
|
* score of 1 is returned.
|
|
* @return the calculated item's score or 1 if item has no domain_affinities
|
|
* or references an unknown parameter set.
|
|
*/
|
|
calculateItemRelevanceScore(item) {
|
|
const params = this.parameterSets[item.parameter_set];
|
|
if (!item.domain_affinities || !params) {
|
|
return item.item_score;
|
|
}
|
|
|
|
const scores = Object
|
|
.keys(item.domain_affinities)
|
|
.reduce((acc, d) => {
|
|
let userDomainAffinityScore = this.scores[d] ? this.scores[d][item.parameter_set] : false;
|
|
if (userDomainAffinityScore) {
|
|
acc.combinedDomainScore += userDomainAffinityScore * item.domain_affinities[d];
|
|
acc.matchingDomainsCount++;
|
|
}
|
|
return acc;
|
|
}, {combinedDomainScore: 0, matchingDomainsCount: 0});
|
|
|
|
// Boost the score as configured in the provided parameter set
|
|
const boostedCombinedDomainScore = scores.combinedDomainScore *
|
|
Math.pow(params.multiDomainBoost + 1, scores.matchingDomainsCount);
|
|
|
|
// Calculate what the score would be if the item score is ignored
|
|
const normalizedCombinedDomainScore = this.calculateScore(boostedCombinedDomainScore,
|
|
params.perfectCombinedDomainScore,
|
|
params.combinedDomainFactor);
|
|
|
|
// Calculate the final relevance score using the itemScoreFactor. The itemScoreFactor
|
|
// allows weighting the item score in relation to the normalizedCombinedDomainScore:
|
|
// An itemScoreFactor of 1 results in the item score and ignores the combined domain score
|
|
// An itemScoreFactor of 0.5 results in the the average of item score and combined domain score
|
|
// An itemScoreFactor of 0 results in the combined domain score and ignores the item score
|
|
return params.itemScoreFactor * (item.item_score - normalizedCombinedDomainScore) + normalizedCombinedDomainScore;
|
|
}
|
|
|
|
/**
|
|
* Returns an object holding the settings and affinity scores of this provider instance.
|
|
*/
|
|
getAffinities() {
|
|
return {
|
|
timeSegments: this.timeSegments,
|
|
parameterSets: this.parameterSets,
|
|
maxHistoryQueryResults: this.maxHistoryQueryResults,
|
|
version: this.version,
|
|
scores: this.scores,
|
|
};
|
|
}
|
|
};
|
|
|
|
const EXPORTED_SYMBOLS = ["UserDomainAffinityProvider"];
|