Initial attempt with attachments for personality provider in activity… (#4523)

Fixes bug 1507973 - v2 with attachments for personality provider in activity stream.
This commit is contained in:
ScottDowne 2018-11-19 13:19:42 -05:00 коммит произвёл GitHub
Родитель 23ec0a4906
Коммит fd1b30ad8f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 377 добавлений и 26 удалений

Просмотреть файл

@ -15,6 +15,38 @@ const {RecipeExecutor} = ChromeUtils.import("resource://activity-stream/lib/Reci
ChromeUtils.defineModuleGetter(this, "NewTabUtils",
"resource://gre/modules/NewTabUtils.jsm");
ChromeUtils.import("resource://gre/modules/Services.jsm");
ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
XPCOMUtils.defineLazyGlobalGetters(this, ["fetch"]);
XPCOMUtils.defineLazyGetter(this, "gTextDecoder", () => new TextDecoder());
XPCOMUtils.defineLazyGetter(this, "baseAttachmentsURL", async () => {
const server = Services.prefs.getCharPref("services.settings.server");
const serverInfo = await (await fetch(`${server}/`)).json();
const {capabilities: {attachments: {base_url}}} = serverInfo;
return base_url;
});
const PERSONALITY_PROVIDER_DIR = OS.Path.join(OS.Constants.Path.localProfileDir, "personality-provider");
const RECIPE_NAME = "personality-provider-recipe";
const MODELS_NAME = "personality-provider-models";
function getHash(aStr) {
// return the two-digit hexadecimal code for a byte
let toHexString = charCode => (`0${charCode.toString(16)}`).slice(-2);
let hasher = Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
hasher.init(Ci.nsICryptoHash.SHA256);
let stringStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance(Ci.nsIStringInputStream);
stringStream.data = aStr;
hasher.updateFromStream(stringStream, -1);
// convert the binary hash data to a hex string.
let binary = hasher.finish(false);
return Array.from(binary, (c, i) => toHexString(binary.charCodeAt(i))).join("").toLowerCase();
}
/**
* V2 provider builds and ranks an interest profile (also called an interest vector) off the browse history.
* This allows Firefox to classify pages into topics, by examining the text found on the page.
@ -38,6 +70,97 @@ this.PersonalityProvider = class PersonalityProvider {
this.scores = scores || {};
this.interestConfig = this.scores.interestConfig;
this.interestVector = this.scores.interestVector;
this.onSync = this.onSync.bind(this);
this.setupSyncAttachment(RECIPE_NAME);
this.setupSyncAttachment(MODELS_NAME);
}
async onSync(event) {
const {
data: {created, updated, deleted},
} = event;
// Remove every removed attachment.
const toRemove = deleted.concat(updated.map(u => u.old));
await Promise.all(toRemove.map(record => this.deleteAttachment(record)));
// Download every new/updated attachment.
const toDownload = created.concat(updated.map(u => u.new));
await Promise.all(toDownload.map(record => this.maybeDownloadAttachment(record)));
}
setupSyncAttachment(collection) {
RemoteSettings(collection).on("sync", this.onSync);
}
/**
* Downloads the attachment to disk assuming the dir already exists
* and any existing files matching the filename are clobbered.
*/
async _downloadAttachment(record) {
const {attachment: {location, filename}} = record;
const remoteFilePath = (await baseAttachmentsURL) + location;
const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
const headers = new Headers();
headers.set("Accept-Encoding", "gzip");
const resp = await fetch(remoteFilePath, {headers});
if (!resp.ok) {
Cu.reportError(`Failed to fetch ${remoteFilePath}: ${resp.status}`);
return;
}
const buffer = await resp.arrayBuffer();
const bytes = new Uint8Array(buffer);
await OS.File.writeAtomic(localFilePath, bytes, {tmpPath: `${localFilePath}.tmp`});
}
/**
* Attempts to download the attachment, but only if it doesn't already exist.
*/
async maybeDownloadAttachment(record, retries = 3) {
const {attachment: {filename, hash, size}} = record;
await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
let retry = 0;
while ((retry++ < retries) &&
(!await OS.File.exists(localFilePath) ||
(await OS.File.stat(localFilePath)).size !== size ||
getHash(await this._getFileStr(localFilePath)) !== hash)) {
await this._downloadAttachment(record);
}
}
async deleteAttachment(record) {
const {attachment: {filename}} = record;
await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
const path = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
await OS.File.remove(path, {ignoreAbsent: true});
return OS.File.removeEmptyDir(PERSONALITY_PROVIDER_DIR, {ignoreAbsent: true});
}
/**
* Gets contents of the attachment if it already exists on file,
* and if not attempts to download it.
*/
async getAttachment(record) {
const {attachment: {filename}} = record;
const filepath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
try {
await this.maybeDownloadAttachment(record);
return JSON.parse(await this._getFileStr(filepath));
} catch (error) {
Cu.reportError(`Failed to load ${filepath}: ${error.message}`);
}
return {};
}
// A helper function to read and decode a file, it isn't a stand alone function.
// If you use this, ensure you check the file exists and you have a try catch.
async _getFileStr(filepath) {
const binaryData = await OS.File.read(filepath);
return gTextDecoder.decode(binaryData);
}
async init(callback) {
@ -71,7 +194,7 @@ this.PersonalityProvider = class PersonalityProvider {
async getFromRemoteSettings(name) {
const result = await RemoteSettings(name).get();
return result;
return Promise.all(result.map(async record => ({...await this.getAttachment(record), recordKey: record.key})));
}
/**
@ -79,15 +202,15 @@ this.PersonalityProvider = class PersonalityProvider {
* A Recipe is a set of instructions on how to processes a RecipeExecutor.
*/
async getRecipe() {
if (!this.recipe || !this.recipe.length) {
if (!this.recipes || !this.recipes.length) {
const start = perfService.absNow();
this.recipe = await this.getFromRemoteSettings("personality-provider-recipe");
this.recipes = await this.getFromRemoteSettings(RECIPE_NAME);
this.dispatch(ac.PerfEvent({
event: "PERSONALIZATION_V2_GET_RECIPE_DURATION",
value: Math.round(perfService.absNow() - start),
}));
}
return this.recipe[0];
return this.recipes[0];
}
/**
@ -100,20 +223,20 @@ this.PersonalityProvider = class PersonalityProvider {
const startTaggers = perfService.absNow();
let nbTaggers = [];
let nmfTaggers = {};
const models = await this.getFromRemoteSettings("personality-provider-models");
const models = await this.getFromRemoteSettings(MODELS_NAME);
if (models.length === 0) {
return null;
}
for (let model of models) {
if (!model || !this.modelKeys.includes(model.key)) {
if (!this.modelKeys.includes(model.recordKey)) {
continue;
}
if (model.data.model_type === "nb") {
nbTaggers.push(new NaiveBayesTextTagger(model.data));
} else if (model.data.model_type === "nmf") {
nmfTaggers[model.data.parent_tag] = new NmfTextTagger(model.data);
if (model.model_type === "nb") {
nbTaggers.push(new NaiveBayesTextTagger(model));
} else if (model.model_type === "nmf") {
nmfTaggers[model.parent_tag] = new NmfTextTagger(model);
}
}
this.dispatch(ac.PerfEvent({

Просмотреть файл

@ -1,20 +1,29 @@
import {filterAdult} from "lib/FilterAdult.jsm";
import {GlobalOverrider} from "test/unit/utils";
describe("filterAdult", () => {
let hashStub;
let hashValue;
let globals;
beforeEach(() => {
globals = new GlobalOverrider();
hashStub = {
finish: sinon.stub().callsFake(() => hashValue),
init: sinon.stub(),
update: sinon.stub(),
};
global.Cc["@mozilla.org/security/hash;1"] = {
createInstance() {
return hashStub;
globals.set("Cc", {
"@mozilla.org/security/hash;1": {
createInstance() {
return hashStub;
},
},
};
});
});
afterEach(() => {
globals.restore();
});
it("should default to include on unexpected urls", () => {

Просмотреть файл

@ -25,6 +25,9 @@ describe("PersistentCache", () => {
cache = new PersistentCache(filename);
});
afterEach(() => {
globals.restore();
});
describe("#get", () => {
it("tries to read the file on the first get", async () => {

Просмотреть файл

@ -51,6 +51,19 @@ describe("Personality Provider", () => {
NmfTextTaggerStub = globals.sandbox.stub();
RecipeExecutorStub = globals.sandbox.stub();
globals.set("baseAttachmentsURL", new Promise(resolve => resolve("/")));
global.fetch = async server => ({
ok: true,
json: async () => {
if (server === "services.settings.server/") {
return {capabilities: {attachments: {base_url: ""}}};
}
return {};
},
});
globals.sandbox.stub(global.Services.prefs, "getCharPref").callsFake(pref => pref);
({PersonalityProvider} = injector({
"lib/NaiveBayesTextTagger.jsm": {NaiveBayesTextTagger: NaiveBayesTextTaggerStub},
"lib/NmfTextTagger.jsm": {NmfTextTagger: NmfTextTaggerStub},
@ -108,6 +121,7 @@ describe("Personality Provider", () => {
},
};
});
afterEach(() => {
globals.restore();
});
@ -177,9 +191,9 @@ describe("Personality Provider", () => {
});
describe("#remote-settings", () => {
it("should return a remote setting for getFromRemoteSettings", async () => {
const settings = await instance.getFromRemoteSettings("test");
const settings = await instance.getFromRemoteSettings("attachment");
assert.equal(typeof settings, "object");
assert.equal(settings.length, 0);
assert.equal(settings.length, 1);
});
});
describe("#executor", () => {
@ -202,8 +216,8 @@ describe("Personality Provider", () => {
instance.modelKeys = ["nb_model_sports", "nmf_model_sports"];
instance.getFromRemoteSettings = async name => [
{key: "nb_model_sports", data: {model_type: "nb"}},
{key: "nmf_model_sports", data: {model_type: "nmf", parent_tag: "nmf_sports_parent_tag"}},
{recordKey: "nb_model_sports", model_type: "nb"},
{recordKey: "nmf_model_sports", model_type: "nmf", parent_tag: "nmf_sports_parent_tag"},
];
await instance.generateRecipeExecutor();
@ -219,8 +233,8 @@ describe("Personality Provider", () => {
instance.modelKeys = ["nb_model_sports"];
instance.getFromRemoteSettings = async name => [
{key: "nb_model_sports", data: {model_type: "nb"}},
{key: "nmf_model_sports", data: {model_type: "nmf", parent_tag: "nmf_sports_parent_tag"}},
{recordKey: "nb_model_sports", model_type: "nb"},
{recordKey: "nmf_model_sports", model_type: "nmf", parent_tag: "nmf_sports_parent_tag"},
];
await instance.generateRecipeExecutor();
@ -236,7 +250,7 @@ describe("Personality Provider", () => {
instance.modelKeys = ["nb_model_sports", "nmf_model_sports"];
instance.getFromRemoteSettings = async name => [
{key: "nb_model_sports", data: {model_type: "nb"}},
{recordKey: "nb_model_sports", model_type: "nb"},
];
await instance.generateRecipeExecutor();
assert.calledOnce(RecipeExecutorStub);
@ -256,8 +270,8 @@ describe("Personality Provider", () => {
assert.calledWith(instance.getFromRemoteSettings, "personality-provider-recipe");
});
it("should not fetch a recipe on getRecipe if cached", async () => {
sinon.stub(instance, "getFromRemoteSettings").returns(Promise.resolve());
instance.recipe = ["blah"];
sinon.stub(instance, "getFromRemoteSettings").returns(Promise.resolve([]));
instance.recipes = ["blah"];
await instance.getRecipe();
assert.notCalled(instance.getFromRemoteSettings);
});
@ -341,4 +355,153 @@ describe("Personality Provider", () => {
assert.equal(Object.keys(history.options.params).length, 0);
});
});
describe("#attachments", () => {
it("should sync remote settings collection from onSync", async () => {
sinon.stub(instance, "deleteAttachment").returns(Promise.resolve({}));
sinon.stub(instance, "maybeDownloadAttachment").returns(Promise.resolve({}));
await instance.onSync({
data: {
created: ["create-1", "create-2"],
updated: [
{old: "update-old-1", new: "update-new-1"},
{old: "update-old-2", new: "update-new-2"},
],
deleted: ["delete-2", "delete-1"],
},
});
assert(instance.maybeDownloadAttachment.withArgs("create-1").calledOnce);
assert(instance.maybeDownloadAttachment.withArgs("create-2").calledOnce);
assert(instance.maybeDownloadAttachment.withArgs("update-new-1").calledOnce);
assert(instance.maybeDownloadAttachment.withArgs("update-new-2").calledOnce);
assert(instance.deleteAttachment.withArgs("delete-1").calledOnce);
assert(instance.deleteAttachment.withArgs("delete-2").calledOnce);
assert(instance.deleteAttachment.withArgs("update-old-1").calledOnce);
assert(instance.deleteAttachment.withArgs("update-old-2").calledOnce);
});
it("should write a file from _downloadAttachment", async () => {
const fetchStub = globals.sandbox.stub(global, "fetch").resolves({
ok: true,
arrayBuffer: async () => {},
});
const writeAtomicStub = globals.sandbox.stub(global.OS.File, "writeAtomic").resolves(Promise.resolve());
globals.sandbox.stub(global.OS.Path, "join").callsFake((first, second) => first + second);
globals.set("Uint8Array", class Uint8Array {});
await instance._downloadAttachment({attachment: {location: "location", filename: "filename"}});
const fetchArgs = fetchStub.firstCall.args;
assert.equal(fetchArgs[0], "/location");
const writeArgs = writeAtomicStub.firstCall.args;
assert.equal(writeArgs[0], "/filename");
assert.equal(writeArgs[2].tmpPath, "/filename.tmp");
});
it("should call reportError from _downloadAttachment if not valid response", async () => {
globals.sandbox.stub(global, "fetch").resolves({ok: false});
globals.sandbox.spy(global.Cu, "reportError");
await instance._downloadAttachment({attachment: {location: "location", filename: "filename"}});
assert.calledWith(Cu.reportError, "Failed to fetch /location: undefined");
});
it("should attempt _downloadAttachment three times for maybeDownloadAttachment", async () => {
let existsStub;
let statStub;
let attachmentStub;
sinon.stub(instance, "_downloadAttachment").returns(Promise.resolve());
sinon.stub(instance, "_getFileStr").returns(Promise.resolve("1"));
const makeDirStub = globals.sandbox.stub(global.OS.File, "makeDir").returns(Promise.resolve());
globals.sandbox.stub(global.OS.Path, "join").callsFake((first, second) => first + second);
existsStub = globals.sandbox.stub(global.OS.File, "exists").returns(Promise.resolve(true));
statStub = globals.sandbox.stub(global.OS.File, "stat").returns(Promise.resolve({size: "1"}));
attachmentStub = {
attachment: {
filename: "file",
hash: "30",
size: "1",
},
};
await instance.maybeDownloadAttachment(attachmentStub);
assert.calledWith(makeDirStub, "/");
assert.calledOnce(existsStub);
assert.calledOnce(statStub);
assert.calledOnce(instance._getFileStr);
assert.notCalled(instance._downloadAttachment);
instance._getFileStr.resetHistory();
existsStub.resetHistory();
statStub.resetHistory();
attachmentStub = {
attachment: {
filename: "file",
hash: "31",
size: "1",
},
};
await instance.maybeDownloadAttachment(attachmentStub);
assert.calledThrice(existsStub);
assert.calledThrice(statStub);
assert.calledThrice(instance._getFileStr);
assert.calledThrice(instance._downloadAttachment);
});
it("should remove attachments when calling deleteAttachment", async () => {
const makeDirStub = globals.sandbox.stub(global.OS.File, "makeDir").returns(Promise.resolve());
const removeStub = globals.sandbox.stub(global.OS.File, "remove").returns(Promise.resolve());
const removeEmptyDirStub = globals.sandbox.stub(global.OS.File, "removeEmptyDir").returns(Promise.resolve());
globals.sandbox.stub(global.OS.Path, "join").callsFake((first, second) => first + second);
await instance.deleteAttachment({attachment: {filename: "filename"}});
assert.calledOnce(makeDirStub);
assert.calledOnce(removeStub);
assert.calledOnce(removeEmptyDirStub);
assert.calledWith(removeStub, "/filename", {ignoreAbsent: true});
});
it("should return JSON when calling getAttachment", async () => {
sinon.stub(instance, "maybeDownloadAttachment").returns(Promise.resolve());
sinon.stub(instance, "_getFileStr").returns(Promise.resolve("{}"));
const reportErrorStub = globals.sandbox.stub(global.Cu, "reportError");
globals.sandbox.stub(global.OS.Path, "join").callsFake((first, second) => first + second);
const record = {attachment: {filename: "filename"}};
let returnValue = await instance.getAttachment(record);
assert.notCalled(reportErrorStub);
assert.calledOnce(instance._getFileStr);
assert.calledWith(instance._getFileStr, "/filename");
assert.calledOnce(instance.maybeDownloadAttachment);
assert.calledWith(instance.maybeDownloadAttachment, record);
assert.deepEqual(returnValue, {});
instance._getFileStr.restore();
sinon.stub(instance, "_getFileStr").returns(Promise.resolve({}));
returnValue = await instance.getAttachment(record);
assert.calledOnce(reportErrorStub);
assert.calledWith(reportErrorStub, "Failed to load /filename: JSON.parse: unexpected character at line 1 column 2 of the JSON data");
assert.deepEqual(returnValue, {});
});
it("should read and decode a file with _getFileStr", async () => {
global.OS.File.read = async path => {
if (path === "/filename") {
return "binaryData";
}
return "";
};
globals.set("gTextDecoder", {
decode: async binaryData => {
if (binaryData === "binaryData") {
return "binaryData";
}
return "";
},
});
const returnValue = await instance._getFileStr("/filename");
assert.equal(returnValue, "binaryData");
});
});
});

Просмотреть файл

@ -134,10 +134,15 @@ describe("Top Stories Feed", () => {
});
it("should report error for invalid configuration", () => {
globals.sandbox.spy(global.Cu, "reportError");
sectionsManagerStub.sections.set("topstories", {options: {api_key_pref: "invalid"}});
sectionsManagerStub.sections.set("topstories", {
options: {
api_key_pref: "invalid",
stories_endpoint: "https://invalid.com/?apiKey=$apiKey",
},
});
instance.init();
assert.called(Cu.reportError);
assert.calledWith(Cu.reportError, "Problem initializing top stories feed: An API key was specified but none configured: https://invalid.com/?apiKey=$apiKey");
});
it("should report error for missing api key", () => {
globals.sandbox.spy(global.Cu, "reportError");

Просмотреть файл

@ -69,9 +69,26 @@ const TEST_GLOBAL = {
markPageAsTyped() {},
removeObserver() {},
},
"@mozilla.org/io/string-input-stream;1": {
createInstance() {
return {};
},
},
"@mozilla.org/security/hash;1": {
createInstance() {
return {
init() {},
updateFromStream() {},
finish() {
return "0";
},
};
},
},
"@mozilla.org/updates/update-checker;1": {createInstance() {}},
},
Ci: {
nsICryptoHash: {},
nsIHttpChannel: {REFERRER_POLICY_UNSAFE_URL: 5},
nsITimer: {TYPE_ONE_SHOT: 1},
nsIWebProgressListener: {LOCATION_CHANGE_SAME_DOCUMENT: 1},
@ -91,6 +108,26 @@ const TEST_GLOBAL = {
executePlacesQuery: async (sql, options) => ({sql, options}),
},
},
OS: {
File: {
writeAtomic() {},
makeDir() {},
stat() {},
exists() {},
remove() {},
removeEmptyDir() {},
},
Path: {
join() {
return "/";
},
},
Constants: {
Path: {
localProfileDir: "/",
},
},
},
PlacesUtils: {
get bookmarks() {
return TEST_GLOBAL.Cc["@mozilla.org/browser/nav-bookmarks-service;1"];
@ -145,6 +182,7 @@ const TEST_GLOBAL = {
setStringPref() {},
getIntPref() {},
getBoolPref() {},
getCharPref() {},
setBoolPref() {},
setIntPref() {},
getBranch() {},
@ -213,7 +251,17 @@ const TEST_GLOBAL = {
EventEmitter,
ShellService: {isDefaultBrowser: () => true},
FilterExpressions: {eval() { return Promise.resolve(false); }},
RemoteSettings() { return {get() { return Promise.resolve([]); }}; },
RemoteSettings(name) {
return {
get() {
if (name === "attachment") {
return Promise.resolve([{attachment: {}}]);
}
return Promise.resolve([]);
},
on() {},
};
},
Localization: class {},
};
overrider.set(TEST_GLOBAL);