gecko-dev/browser/components/newtab/lib/PersonalityProvider.jsm

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";

const { RemoteSettings } = ChromeUtils.import(
  "resource://services-settings/remote-settings.js"
);

const { actionCreators: ac } = ChromeUtils.import(
  "resource://activity-stream/common/Actions.jsm"
);
ChromeUtils.defineModuleGetter(
  this,
  "perfService",
  "resource://activity-stream/common/PerfService.jsm"
);

const { NaiveBayesTextTagger } = ChromeUtils.import(
  "resource://activity-stream/lib/NaiveBayesTextTagger.jsm"
);
const { NmfTextTagger } = ChromeUtils.import(
  "resource://activity-stream/lib/NmfTextTagger.jsm"
);
const { RecipeExecutor } = ChromeUtils.import(
  "resource://activity-stream/lib/RecipeExecutor.jsm"
);

ChromeUtils.defineModuleGetter(
  this,
  "NewTabUtils",
  "resource://gre/modules/NewTabUtils.jsm"
);

const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
const { XPCOMUtils } = ChromeUtils.import(
  "resource://gre/modules/XPCOMUtils.jsm"
);
ChromeUtils.defineModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
XPCOMUtils.defineLazyGlobalGetters(this, ["fetch"]);

XPCOMUtils.defineLazyGetter(this, "gTextDecoder", () => new TextDecoder());

XPCOMUtils.defineLazyGetter(this, "baseAttachmentsURL", async () => {
  const server = Services.prefs.getCharPref("services.settings.server");
  const serverInfo = await (await fetch(`${server}/`, {
    credentials: "omit",
  })).json();
  const {
    capabilities: {
      attachments: { base_url },
    },
  } = serverInfo;
  return base_url;
});

const PERSONALITY_PROVIDER_DIR = OS.Path.join(
  OS.Constants.Path.localProfileDir,
  "personality-provider"
);
const RECIPE_NAME = "personality-provider-recipe";
const MODELS_NAME = "personality-provider-models";

function getHash(aStr) {
  // return the two-digit hexadecimal code for a byte
  let toHexString = charCode => `0${charCode.toString(16)}`.slice(-2);
  let hasher = Cc["@mozilla.org/security/hash;1"].createInstance(
    Ci.nsICryptoHash
  );
  hasher.init(Ci.nsICryptoHash.SHA256);
  let stringStream = Cc["@mozilla.org/io/string-input-stream;1"].createInstance(
    Ci.nsIStringInputStream
  );
  stringStream.data = aStr;
  hasher.updateFromStream(stringStream, -1);

  // convert the binary hash data to a hex string.
  let binary = hasher.finish(false);
  return Array.from(binary, (c, i) => toHexString(binary.charCodeAt(i)))
    .join("")
    .toLowerCase();
}

/**
 * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
 * This allows Firefox to classify pages into topics, by examining the text found on the page.
 * It does this by looking at the history text content, title, and description.
 */
this.PersonalityProvider = class PersonalityProvider {
  constructor(
    timeSegments,
    parameterSets,
    maxHistoryQueryResults,
    version,
    scores,
    v2Params
  ) {
    this.v2Params = v2Params || {};
    this.dispatch = this.v2Params.dispatch || (() => {});
    this.modelKeys = this.v2Params.modelKeys;
    this.timeSegments = timeSegments;
    this.parameterSets = parameterSets;
    this.maxHistoryQueryResults = maxHistoryQueryResults;
    this.version = version;
    this.scores = scores || {};
    this.interestConfig = this.scores.interestConfig;
    this.interestVector = this.scores.interestVector;
    this.onSync = this.onSync.bind(this);
    this.setupSyncAttachment(RECIPE_NAME);
    this.setupSyncAttachment(MODELS_NAME);
  }

  async onSync(event) {
    const {
      data: { created, updated, deleted },
    } = event;

    // Remove every removed attachment.
    const toRemove = deleted.concat(updated.map(u => u.old));
    await Promise.all(toRemove.map(record => this.deleteAttachment(record)));

    // Download every new/updated attachment.
    const toDownload = created.concat(updated.map(u => u.new));
    await Promise.all(
      toDownload.map(record => this.maybeDownloadAttachment(record))
    );
  }

  setupSyncAttachment(collection) {
    RemoteSettings(collection).on("sync", this.onSync);
  }

  /**
   * Downloads the attachment to disk assuming the dir already exists
   * and any existing files matching the filename are clobbered.
   */
  async _downloadAttachment(record) {
    const {
      attachment: { location, filename },
    } = record;
    const remoteFilePath = (await baseAttachmentsURL) + location;
    const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);
    const headers = new Headers();
    headers.set("Accept-Encoding", "gzip");
    const resp = await fetch(remoteFilePath, { headers, credentials: "omit" });
    if (!resp.ok) {
      Cu.reportError(`Failed to fetch ${remoteFilePath}: ${resp.status}`);
      return;
    }
    const buffer = await resp.arrayBuffer();
    const bytes = new Uint8Array(buffer);
    await OS.File.writeAtomic(localFilePath, bytes, {
      tmpPath: `${localFilePath}.tmp`,
    });
  }

  /**
   * Attempts to download the attachment, but only if it doesn't already exist.
   */
  async maybeDownloadAttachment(record, retries = 3) {
    const {
      attachment: { filename, hash, size },
    } = record;
    await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
    const localFilePath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);

    let retry = 0;
    while (
      retry++ < retries &&
      (!(await OS.File.exists(localFilePath)) ||
        (await OS.File.stat(localFilePath)).size !== size ||
        getHash(await this._getFileStr(localFilePath)) !== hash)
    ) {
      await this._downloadAttachment(record);
    }
  }

  async deleteAttachment(record) {
    const {
      attachment: { filename },
    } = record;
    await OS.File.makeDir(PERSONALITY_PROVIDER_DIR);
    const path = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);

    await OS.File.remove(path, { ignoreAbsent: true });
    return OS.File.removeEmptyDir(PERSONALITY_PROVIDER_DIR, {
      ignoreAbsent: true,
    });
  }

  /**
   * Gets contents of the attachment if it already exists on file,
   * and if not attempts to download it.
   */
  async getAttachment(record) {
    const {
      attachment: { filename },
    } = record;
    const filepath = OS.Path.join(PERSONALITY_PROVIDER_DIR, filename);

    try {
      await this.maybeDownloadAttachment(record);
      return JSON.parse(await this._getFileStr(filepath));
    } catch (error) {
      Cu.reportError(`Failed to load ${filepath}: ${error.message}`);
    }
    return {};
  }

  // A helper function to read and decode a file, it isn't a stand alone function.
  // If you use this, ensure you check the file exists and you have a try catch.
  async _getFileStr(filepath) {
    const binaryData = await OS.File.read(filepath);
    return gTextDecoder.decode(binaryData);
  }

  async init(callback) {
    const perfStart = perfService.absNow();
    this.interestConfig = this.interestConfig || (await this.getRecipe());
    if (!this.interestConfig) {
      this.dispatch(
        ac.PerfEvent({ event: "PERSONALIZATION_V2_GET_RECIPE_ERROR" })
      );
      return;
    }
    this.recipeExecutor = await this.generateRecipeExecutor();
    if (!this.recipeExecutor) {
      this.dispatch(
        ac.PerfEvent({
          event: "PERSONALIZATION_V2_GENERATE_RECIPE_EXECUTOR_ERROR",
        })
      );
      return;
    }
    this.interestVector =
      this.interestVector || (await this.createInterestVector());
    if (!this.interestVector) {
      this.dispatch(
        ac.PerfEvent({
          event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_ERROR",
        })
      );
      return;
    }

    this.dispatch(
      ac.PerfEvent({
        event: "PERSONALIZATION_V2_TOTAL_DURATION",
        value: Math.round(perfService.absNow() - perfStart),
      })
    );

    this.initialized = true;
    if (callback) {
      callback();
    }
  }

  async getFromRemoteSettings(name) {
    const result = await RemoteSettings(name).get();
    return Promise.all(
      result.map(async record => ({
        ...(await this.getAttachment(record)),
        recordKey: record.key,
      }))
    );
  }

  /**
   * Returns a Recipe from remote settings to be consumed by a RecipeExecutor.
   * A Recipe is a set of instructions on how to processes a RecipeExecutor.
   */
  async getRecipe() {
    if (!this.recipes || !this.recipes.length) {
      const start = perfService.absNow();
      this.recipes = await this.getFromRemoteSettings(RECIPE_NAME);
      this.dispatch(
        ac.PerfEvent({
          event: "PERSONALIZATION_V2_GET_RECIPE_DURATION",
          value: Math.round(perfService.absNow() - start),
        })
      );
    }
    return this.recipes[0];
  }

  /**
   * Returns a Recipe Executor.
   * A Recipe Executor is a set of actions that can be consumed by a Recipe.
   * The Recipe determines the order and specifics of which the actions are called.
   */
  async generateRecipeExecutor() {
    if (!this.taggers) {
      const startTaggers = perfService.absNow();
      let nbTaggers = [];
      let nmfTaggers = {};
      const models = await this.getFromRemoteSettings(MODELS_NAME);

      if (models.length === 0) {
        return null;
      }

      for (let model of models) {
        if (!this.modelKeys.includes(model.recordKey)) {
          continue;
        }
        if (model.model_type === "nb") {
          nbTaggers.push(new NaiveBayesTextTagger(model));
        } else if (model.model_type === "nmf") {
          nmfTaggers[model.parent_tag] = new NmfTextTagger(model);
        }
      }
      this.dispatch(
        ac.PerfEvent({
          event: "PERSONALIZATION_V2_TAGGERS_DURATION",
          value: Math.round(perfService.absNow() - startTaggers),
        })
      );
      this.taggers = { nbTaggers, nmfTaggers };
    }
    const startRecipeExecutor = perfService.absNow();
    const recipeExecutor = new RecipeExecutor(
      this.taggers.nbTaggers,
      this.taggers.nmfTaggers
    );
    this.dispatch(
      ac.PerfEvent({
        event: "PERSONALIZATION_V2_RECIPE_EXECUTOR_DURATION",
        value: Math.round(perfService.absNow() - startRecipeExecutor),
      })
    );
    return recipeExecutor;
  }

  /**
   * Grabs a slice of browse history for building a interest vector
   */
  async fetchHistory(columns, beginTimeSecs, endTimeSecs) {
    let sql = `SELECT url, title, visit_count, frecency, last_visit_date, description
    FROM moz_places
    WHERE last_visit_date >= ${beginTimeSecs * 1000000}
    AND last_visit_date < ${endTimeSecs * 1000000}`;
    columns.forEach(requiredColumn => {
      sql += ` AND IFNULL(${requiredColumn}, "") <> ""`;
    });
    sql += " LIMIT 30000";

    const { activityStreamProvider } = NewTabUtils;
    const history = await activityStreamProvider.executePlacesQuery(sql, {
      columns,
      params: {},
    });

    return history;
  }

  /**
   * Examines the user's browse history and returns an interest vector that
   * describes the topics the user frequently browses.
   */
  async createInterestVector() {
    let interestVector = {};
    let endTimeSecs = new Date().getTime() / 1000;
    let beginTimeSecs = endTimeSecs - this.interestConfig.history_limit_secs;
    let history = await this.fetchHistory(
      this.interestConfig.history_required_fields,
      beginTimeSecs,
      endTimeSecs
    );

    this.dispatch(
      ac.PerfEvent({
        event: "PERSONALIZATION_V2_HISTORY_SIZE",
        value: history.length,
      })
    );

    const start = perfService.absNow();
    for (let historyRec of history) {
      let ivItem = this.recipeExecutor.executeRecipe(
        historyRec,
        this.interestConfig.history_item_builder
      );
      if (ivItem === null) {
        continue;
      }
      interestVector = this.recipeExecutor.executeCombinerRecipe(
        interestVector,
        ivItem,
        this.interestConfig.interest_combiner
      );
      if (interestVector === null) {
        return null;
      }
    }

    const finalResult = this.recipeExecutor.executeRecipe(
      interestVector,
      this.interestConfig.interest_finalizer
    );

    this.dispatch(
      ac.PerfEvent({
        event: "PERSONALIZATION_V2_CREATE_INTEREST_VECTOR_DURATION",
        value: Math.round(perfService.absNow() - start),
      })
    );
    return finalResult;
  }

  /**
   * Calculates a score of a Pocket item when compared to the user's interest
   * vector. Returns the score. Higher scores are better. Assumes this.interestVector
   * is populated.
   */
  calculateItemRelevanceScore(pocketItem) {
    if (!this.initialized) {
      return pocketItem.item_score || 1;
    }
    let scorableItem = this.recipeExecutor.executeRecipe(
      pocketItem,
      this.interestConfig.item_to_rank_builder
    );
    if (scorableItem === null) {
      return -1;
    }

    let rankingVector = JSON.parse(JSON.stringify(this.interestVector));

    Object.keys(scorableItem).forEach(key => {
      rankingVector[key] = scorableItem[key];
    });

    rankingVector = this.recipeExecutor.executeRecipe(
      rankingVector,
      this.interestConfig.item_ranker
    );

    if (rankingVector === null) {
      return -1;
    }
    return rankingVector.score;
  }

  /**
   * Returns an object holding the settings and affinity scores of this provider instance.
   */
  getAffinities() {
    return {
      timeSegments: this.timeSegments,
      parameterSets: this.parameterSets,
      maxHistoryQueryResults: this.maxHistoryQueryResults,
      version: this.version,
      scores: {
        interestConfig: this.interestConfig,
        interestVector: this.interestVector,
        taggers: this.taggers,
      },
    };
  }
};

const EXPORTED_SYMBOLS = ["PersonalityProvider"];