gecko-dev/browser/components/newtab/lib/PersonalityProvider.jsm

464 строки
14 KiB
JavaScript

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const { RemoteSettings } = ChromeUtils.import(
"resource://services-settings/remote-settings.js"
);
const { actionCreators: ac } = ChromeUtils.import(
"resource://activity-stream/common/Actions.jsm"
);
ChromeUtils.defineModuleGetter(
this,
"perfService",
"resource://activity-stream/common/PerfService.jsm"
);
const { NaiveBayesTextTagger } = ChromeUtils.import(
"resource://activity-stream/lib/NaiveBayesTextTagger.jsm"
);
const { NmfTextTagger } = ChromeUtils.import(
"resource://activity-stream/lib/NmfTextTagger.jsm"
);
const { RecipeExecutor } = ChromeUtils.import(
"resource://activity-stream/lib/RecipeExecutor.jsm"
);
ChromeUtils.defineModuleGetter(
this,
"NewTabUtils",
"resource://gre/modules/NewTabUtils.jsm"
);
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
const { XPCOMUtils } = ChromeUtils.import(
"resource://gre/modules/XPCOMUtils.jsm"
);
ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
XPCOMUtils.defineLazyGlobalGetters(this, ["fetch"]);
XPCOMUtils.defineLazyGetter(this, "gTextDecoder", () => new TextDecoder());
XPCOMUtils.defineLazyGetter(this, "baseAttachmentsURL", async () => {
const server = Services.prefs.getCharPref("services.settings.server");
const serverInfo = await (await fetch(`${server}/`, {
credentials: "omit",
})).json();
const {
capabilities: {
attachments: { base_url },
},
} = serverInfo;
return base_url;
});
const PERSONALITY_PROVIDER_DIR = OS.Path.join(
OS.Constants.Path.localProfileDir,
"personality-provider"
);
const RECIPE_NAME = "personality-provider-recipe";
const MODELS_NAME = "personality-provider-models";
function getHash(aStr) {
// return the two-digit hexadecimal code for a byte
let toHexString = charCode => `0${charCode.toString(16)}`.slice(-2);
let hasher = Cc["@mozilla.org/security/hash;1"].createInstance(
Ci.nsICryptoHash
);
hasher.init(Ci.nsICryptoHash.SHA256);
let stringStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance(
Ci.nsIStringInputStream
);
stringStream.data = aStr;
hasher.updateFromStream(stringStream, -1);
// convert the binary hash data to a hex string.
let binary = hasher.finish(false);
return Array.from(binary, (c, i) => toHexString(binary.charCodeAt(i)))
.join("")
.toLowerCase();
}
/**
* V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
* This allows Firefox to classify pages into topics, by examining the text found on the page.
* It does this by looking at the history text content, title, and description.
*/
this.PersonalityProvider = class PersonalityProvider {
constructor(
timeSegments,
parameterSets,
maxHistoryQueryResults,
version,
scores,
v2Params
) {
this.v2Params = v2Params || {};
this.dispatch = this.v2Params.dispatch || (() => {});
this.modelKeys = this.v2Params.modelKeys;
this.timeSegments = timeSegments;
this.parameterSets = parameterSets;
this.maxHistoryQueryResults = maxHistoryQueryResults;
this.version = version;
this.scores = scores || {};
this.interestConfig = this.scores.interestConfig;
this.interestVector = this.scores.interestVector;
this.onSync = this.onSync.bind(this);
this.setupSyncAttachment(RECIPE_NAME);
this.setupSyncAttachment(MODELS_NAME);
}
async onSync(event) {
const {
data: { created, updated, deleted },
} = event;
// Remove every removed attachment.
const toRemove = deleted.concat(updated.map(u => u.old));
await Promise.all(toRemove.map(record => this.deleteAttachment(record)));
// Download every new/updated attachment.
const toDownload = created.concat(updated.map(u => u.new));
await Promise.all(
toDownload.map(record => this.maybeDownloadAttachment(record))
);
}
setupSyncAttachment(collection) {
RemoteSettings(collection).on("sync", this.onSync);
}
/**
* Downloads the attachment to disk assuming the dir already exists
* and any existing files matching the filename are clobbered.
*/
async _downloadAttachment(record) {
const {
attachment: { location, filename },
} = record;
const remoteFilePath = (await baseAttachmentsURL) + location;
const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
const headers = new Headers();
headers.set("Accept-Encoding", "gzip");
const resp = await fetch(remoteFilePath, { headers, credentials: "omit" });
if (!resp.ok) {
Cu.reportError(`Failed to fetch ${remoteFilePath}: ${resp.status}`);
return;
}
const buffer = await resp.arrayBuffer();
const bytes = new Uint8Array(buffer);
await OS.File.writeAtomic(localFilePath, bytes, {
tmpPath: `${localFilePath}.tmp`,
});
}
/**
* Attempts to download the attachment, but only if it doesn't already exist.
*/
async maybeDownloadAttachment(record, retries = 3) {
const {
attachment: { filename, hash, size },
} = record;
await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
let retry = 0;
while (
retry++ < retries &&
(!(await OS.File.exists(localFilePath)) ||
(await OS.File.stat(localFilePath)).size !== size ||
getHash(await this._getFileStr(localFilePath)) !== hash)
) {
await this._downloadAttachment(record);
}
}
async deleteAttachment(record) {
const {
attachment: { filename },
} = record;
await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
const path = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
await OS.File.remove(path, { ignoreAbsent: true });
return OS.File.removeEmptyDir(PERSONALITY_PROVIDER_DIR, {
ignoreAbsent: true,
});
}
/**
* Gets contents of the attachment if it already exists on file,
* and if not attempts to download it.
*/
async getAttachment(record) {
const {
attachment: { filename },
} = record;
const filepath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
try {
await this.maybeDownloadAttachment(record);
return JSON.parse(await this._getFileStr(filepath));
} catch (error) {
Cu.reportError(`Failed to load ${filepath}: ${error.message}`);
}
return {};
}
// A helper function to read and decode a file, it isn't a stand alone function.
// If you use this, ensure you check the file exists and you have a try catch.
async _getFileStr(filepath) {
const binaryData = await OS.File.read(filepath);
return gTextDecoder.decode(binaryData);
}
async init(callback) {
const perfStart = perfService.absNow();
this.interestConfig = this.interestConfig || (await this.getRecipe());
if (!this.interestConfig) {
this.dispatch(
ac.PerfEvent({ event: "PERSONALIZATION_V2_GET_RECIPE_ERROR" })
);
return;
}
this.recipeExecutor = await this.generateRecipeExecutor();
if (!this.recipeExecutor) {
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_GENERATE_RECIPE_EXECUTOR_ERROR",
})
);
return;
}
this.interestVector =
this.interestVector || (await this.createInterestVector());
if (!this.interestVector) {
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_ERROR",
})
);
return;
}
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_TOTAL_DURATION",
value: Math.round(perfService.absNow() - perfStart),
})
);
this.initialized = true;
if (callback) {
callback();
}
}
async getFromRemoteSettings(name) {
const result = await RemoteSettings(name).get();
return Promise.all(
result.map(async record => ({
...(await this.getAttachment(record)),
recordKey: record.key,
}))
);
}
/**
* Returns a Recipe from remote settings to be consumed by a RecipeExecutor.
* A Recipe is a set of instructions on how to processes a RecipeExecutor.
*/
async getRecipe() {
if (!this.recipes || !this.recipes.length) {
const start = perfService.absNow();
this.recipes = await this.getFromRemoteSettings(RECIPE_NAME);
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_GET_RECIPE_DURATION",
value: Math.round(perfService.absNow() - start),
})
);
}
return this.recipes[0];
}
/**
* Returns a Recipe Executor.
* A Recipe Executor is a set of actions that can be consumed by a Recipe.
* The Recipe determines the order and specifics of which the actions are called.
*/
async generateRecipeExecutor() {
if (!this.taggers) {
const startTaggers = perfService.absNow();
let nbTaggers = [];
let nmfTaggers = {};
const models = await this.getFromRemoteSettings(MODELS_NAME);
if (models.length === 0) {
return null;
}
for (let model of models) {
if (!this.modelKeys.includes(model.recordKey)) {
continue;
}
if (model.model_type === "nb") {
nbTaggers.push(new NaiveBayesTextTagger(model));
} else if (model.model_type === "nmf") {
nmfTaggers[model.parent_tag] = new NmfTextTagger(model);
}
}
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_TAGGERS_DURATION",
value: Math.round(perfService.absNow() - startTaggers),
})
);
this.taggers = { nbTaggers, nmfTaggers };
}
const startRecipeExecutor = perfService.absNow();
const recipeExecutor = new RecipeExecutor(
this.taggers.nbTaggers,
this.taggers.nmfTaggers
);
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_RECIPE_EXECUTOR_DURATION",
value: Math.round(perfService.absNow() - startRecipeExecutor),
})
);
return recipeExecutor;
}
/**
* Grabs a slice of browse history for building a interest vector
*/
async fetchHistory(columns, beginTimeSecs, endTimeSecs) {
let sql = `SELECT url, title, visit_count, frecency, last_visit_date, description
FROM moz_places
WHERE last_visit_date >= ${beginTimeSecs * 1000000}
AND last_visit_date < ${endTimeSecs * 1000000}`;
columns.forEach(requiredColumn => {
sql += ` AND IFNULL(${requiredColumn}, "") <> ""`;
});
sql += " LIMIT 30000";
const { activityStreamProvider } = NewTabUtils;
const history = await activityStreamProvider.executePlacesQuery(sql, {
columns,
params: {},
});
return history;
}
/**
* Examines the user's browse history and returns an interest vector that
* describes the topics the user frequently browses.
*/
async createInterestVector() {
let interestVector = {};
let endTimeSecs = new Date().getTime() / 1000;
let beginTimeSecs = endTimeSecs - this.interestConfig.history_limit_secs;
let history = await this.fetchHistory(
this.interestConfig.history_required_fields,
beginTimeSecs,
endTimeSecs
);
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_HISTORY_SIZE",
value: history.length,
})
);
const start = perfService.absNow();
for (let historyRec of history) {
let ivItem = this.recipeExecutor.executeRecipe(
historyRec,
this.interestConfig.history_item_builder
);
if (ivItem === null) {
continue;
}
interestVector = this.recipeExecutor.executeCombinerRecipe(
interestVector,
ivItem,
this.interestConfig.interest_combiner
);
if (interestVector === null) {
return null;
}
}
const finalResult = this.recipeExecutor.executeRecipe(
interestVector,
this.interestConfig.interest_finalizer
);
this.dispatch(
ac.PerfEvent({
event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_DURATION",
value: Math.round(perfService.absNow() - start),
})
);
return finalResult;
}
/**
* Calculates a score of a Pocket item when compared to the user's interest
* vector. Returns the score. Higher scores are better. Assumes this.interestVector
* is populated.
*/
calculateItemRelevanceScore(pocketItem) {
if (!this.initialized) {
return pocketItem.item_score || 1;
}
let scorableItem = this.recipeExecutor.executeRecipe(
pocketItem,
this.interestConfig.item_to_rank_builder
);
if (scorableItem === null) {
return -1;
}
let rankingVector = JSON.parse(JSON.stringify(this.interestVector));
Object.keys(scorableItem).forEach(key => {
rankingVector[key] = scorableItem[key];
});
rankingVector = this.recipeExecutor.executeRecipe(
rankingVector,
this.interestConfig.item_ranker
);
if (rankingVector === null) {
return -1;
}
return rankingVector.score;
}
/**
* Returns an object holding the settings and affinity scores of this provider instance.
*/
getAffinities() {
return {
timeSegments: this.timeSegments,
parameterSets: this.parameterSets,
maxHistoryQueryResults: this.maxHistoryQueryResults,
version: this.version,
scores: {
interestConfig: this.interestConfig,
interestVector: this.interestVector,
taggers: this.taggers,
},
};
}
};
const EXPORTED_SYMBOLS = ["PersonalityProvider"];