Bug 1317223 (part 2) - add 'doctor' concept and move bookmark validation logic to it. r=tcsc

This patch defines the concept of a "doctor" for collections. The doctor is responsible for running all validators and deciding whether or not to initiate a repair request based on the validation results. MozReview-Commit-ID: 6NLRE6L0OpA
2017-03-02 17:14:31 +11:00 · 2017-03-02 17:14:31 +11:00 · c1f06ce1f0
--- a/.eslintignore
+++ b/.eslintignore
@ -289,6 +289,7 @@ security/nss/**

 # Uses `#filter substitution`
 services/sync/modules/constants.js
+services/sync/services-sync.js

 # Third party services
 services/common/kinto-http-client.js
--- a/browser/app/profile/firefox.js
+++ b/browser/app/profile/firefox.js
@ -1581,12 +1581,6 @@ pref("browser.crashReports.unsubmittedCheck.enabled", false);
 pref("browser.crashReports.unsubmittedCheck.chancesUntilSuppress", 4);
 pref("browser.crashReports.unsubmittedCheck.autoSubmit", false);

-#ifdef NIGHTLY_BUILD
-// Enable the (fairly costly) client/server validation on nightly only. The other prefs
-// controlling validation are located in /services/sync/services-sync.js
-pref("services.sync.validation.enabled", true);
-#endif
-
 // Preferences for the form autofill system extension
 pref("browser.formautofill.experimental", false);
 pref("browser.formautofill.enabled", false);
--- a/services/sync/modules/bookmark_validator.js
+++ b/services/sync/modules/bookmark_validator.js
@ -807,6 +807,8 @@ class BookmarkValidator {
  }

  _getServerState(engine) {
+// XXXXX - todo - we need to capture last-modified of the server here and
+// ensure the repairer only applys with if-unmodified-since that date.
    let collection = engine.itemSource();
    let collectionKey = engine.service.collectionKeys.keyForCollection(engine.name);
    collection.full = true;
--- a/services/sync/modules/doctor.js
+++ b/services/sync/modules/doctor.js
@ -0,0 +1,250 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// A doctor for our collections. She can be asked to make a consultation, and
+// may just diagnose an issue without attempting to cure it, may diagnose and
+// attempt to cure, or may decide she is overworked and underpaid.
+// Or something - naming is hard :)
+
+"use strict";
+
+this.EXPORTED_SYMBOLS = ["Doctor"];
+
+const Cu = Components.utils;
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/Log.jsm");
+Cu.import("resource://services-common/async.js");
+Cu.import("resource://services-common/observers.js");
+Cu.import("resource://services-sync/service.js");
+Cu.import("resource://services-sync/resource.js");
+
+Cu.import("resource://services-sync/util.js");
+XPCOMUtils.defineLazyModuleGetter(this, "getRepairRequestor",
+  "resource://services-sync/collection_repair.js");
+XPCOMUtils.defineLazyModuleGetter(this, "getAllRepairRequestors",
+  "resource://services-sync/collection_repair.js");
+
+const log = Log.repository.getLogger("Sync.Doctor");
+
+this.REPAIR_ADVANCE_PERIOD = 86400; // 1 day
+
+this.Doctor = {
+  anyClientsRepairing(service, collection, ignoreFlowID = null) {
+    if (!service || !service.clientsEngine) {
+      log.info("Missing clients engine, assuming we're in test code");
+      return false;
+    }
+    return service.clientsEngine.remoteClients.some(client =>
+      client.commands && client.commands.some(command => {
+        if (command.command != "repairResponse" && command.command != "repairRequest") {
+          return false;
+        }
+        if (!command.args || command.args.length != 1) {
+          return false;
+        }
+        if (command.args[0].collection != collection) {
+          return false;
+        }
+        if (ignoreFlowID != null && command.args[0].flowID == ignoreFlowID) {
+          return false;
+        }
+        return true;
+      })
+    );
+  },
+
+  async consult(recentlySyncedEngines) {
+    if (!Services.telemetry.canRecordBase) {
+      log.info("Skipping consultation: telemetry reporting is disabled");
+      return;
+    }
+
+    let engineInfos = this._getEnginesToValidate(recentlySyncedEngines);
+
+    await this._runValidators(engineInfos);
+
+    // We are called at the end of a sync, which is a good time to periodically
+    // check each repairer to see if it can advance.
+    if (this._now() - this.lastRepairAdvance > REPAIR_ADVANCE_PERIOD) {
+      try {
+        for (let [collection, requestor] of Object.entries(this._getAllRepairRequestors())) {
+          try {
+            let advanced = requestor.continueRepairs();
+            log.info(`${collection} reparier ${advanced ? "advanced" : "did not advance"}.`);
+          } catch (ex) {
+            if (Async.isShutdownException(ex)) {
+              throw ex;
+            }
+            log.error(`${collection} repairer failed`, ex);
+          }
+        }
+      } finally {
+        this.lastRepairAdvance = this._now();
+      }
+    }
+  },
+
+  _getEnginesToValidate(recentlySyncedEngines) {
+    let result = {};
+    for (let e of recentlySyncedEngines) {
+      let prefPrefix = `engine.${e.name}.`;
+      if (!Svc.Prefs.get(prefPrefix + "validation.enabled", false)) {
+        log.info(`Skipping check of ${e.name} - disabled via preferences`);
+        continue;
+      }
+      // Check the last validation time for the engine.
+      let lastValidation = Svc.Prefs.get(prefPrefix + "validation.lastTime", 0);
+      let validationInterval = Svc.Prefs.get(prefPrefix + "validation.interval");
+      let nowSeconds = this._now();
+
+      if (nowSeconds - lastValidation < validationInterval) {
+        log.info(`Skipping validation of ${e.name}: too recent since last validation attempt`);
+        continue;
+      }
+      // Update the time now, even if we decline to actually perform a
+      // validation. We don't want to check the rest of these more frequently
+      // than once a day.
+      Svc.Prefs.set("validation.lastTime", Math.floor(nowSeconds));
+
+      // Validation only occurs a certain percentage of the time.
+      let validationProbability = Svc.Prefs.get(prefPrefix + "validation.percentageChance", 0) / 100.0;
+      if (validationProbability < Math.random()) {
+        log.info(`Skipping validation of ${e.name}: Probability threshold not met`);
+        continue;
+      }
+
+      let maxRecords = Svc.Prefs.get(prefPrefix + "validation.maxRecords");
+      if (!maxRecords) {
+        log.info(`Skipping validation of ${e.name}: No maxRecords specified`);
+        continue;
+      }
+      // OK, so this is a candidate - the final decision will be based on the
+      // number of records actually found.
+      result[e.name] = { engine: e, maxRecords };
+    }
+    return result;
+  },
+
+  async _runValidators(engineInfos) {
+    if (Object.keys(engineInfos).length == 0) {
+      log.info("Skipping validation: no engines qualify");
+      return;
+    }
+
+    if (Object.values(engineInfos).filter(i => i.maxRecords != -1).length != 0) {
+      // at least some of the engines have maxRecord restrictions which require
+      // us to ask the server for the counts.
+      let countInfo = this._fetchCollectionCounts();
+      for (let [engineName, recordCount] of Object.entries(countInfo)) {
+        if (engineName in engineInfos) {
+          engineInfos[engineName].recordCount = recordCount;
+        }
+      }
+    }
+
+    for (let [engineName, { engine, maxRecords, recordCount }] of Object.entries(engineInfos)) {
+      // maxRecords of -1 means "any number", so we can skip asking the server.
+      // Used for tests.
+      if (maxRecords >= 0 && recordCount > maxRecords) {
+        log.debug(`Skipping validation for ${engineName} because ` +
+                        `the number of records (${recordCount}) is greater ` +
+                        `than the maximum allowed (${maxRecords}).`);
+        continue;
+      }
+      let validator = engine.getValidator();
+      if (!validator) {
+        continue;
+      }
+
+      // Let's do it!
+      Services.console.logStringMessage(
+        `Sync is about to run a consistency check of ${engine.name}. This may be slow, and ` +
+        `can be controlled using the pref "services.sync.${engine.name}.validation.enabled".\n` +
+        `If you encounter any problems because of this, please file a bug.`);
+
+      // Make a new flowID just incase we end up starting a repair.
+      let flowID = Utils.makeGUID();
+      try {
+        // XXX - must get the flow ID to either the validator, or directly to
+        // telemetry. I guess it's probably OK to always record a flowID even
+        // if we don't end up repairing?
+        log.info(`Running validator for ${engine.name}`);
+        let result = await validator.validate(engine);
+        Observers.notify("weave:engine:validate:finish", result, engine.name);
+        // And see if we should repair.
+        await this._maybeCure(engine, result, flowID);
+      } catch (ex) {
+        if (Async.isShutdownException(ex)) {
+          throw ex;
+        }
+        log.error(`Failed to run validation on ${engine.name}!`, ex);
+        Observers.notify("weave:engine:validate:error", ex, engine.name)
+        // Keep validating -- there's no reason to think that a failure for one
+        // validator would mean the others will fail.
+      }
+    }
+  },
+
+  _maybeCure(engine, validationResults, flowID) {
+    if (!this._shouldRepair(engine)) {
+      log.info(`Skipping repair of ${engine.name} - disabled via preferences`);
+      return;
+    }
+
+    let requestor = this._getRepairRequestor(engine.name);
+    let didStart = false;
+    if (requestor) {
+      didStart = requestor.startRepairs(validationResults, flowID);
+    }
+    log.info(`${didStart ? "did" : "didn't"} start a repair of ${engine.name} with flowID ${flowID}`);
+  },
+
+  _shouldRepair(engine) {
+    return Svc.Prefs.get(`engine.${engine.name}.repair.enabled`, false);
+  },
+
+  // mainly for mocking.
+  _fetchCollectionCounts() {
+    let collectionCountsURL = Service.userBaseURL + "info/collection_counts";
+    try {
+      let infoResp = Service._fetchInfo(collectionCountsURL);
+      if (!infoResp.success) {
+        log.error("Can't fetch collection counts: request to info/collection_counts responded with "
+                        + infoResp.status);
+        return {};
+      }
+      return infoResp.obj; // might throw because obj is a getter which parses json.
+    } catch (ex) {
+      if (Async.isShutdownException(ex)) {
+        throw ex;
+      }
+      // Not running validation is totally fine, so we just write an error log and return.
+      log.error("Caught error when fetching counts", ex);
+      return {};
+    }
+  },
+
+  get lastRepairAdvance() {
+    return Svc.Prefs.get("doctor.lastRepairAdvance", 0);
+  },
+
+  set lastRepairAdvance(value) {
+    Svc.Prefs.set("doctor.lastRepairAdvance", value);
+  },
+
+  // functions used so tests can mock them
+  _now() {
+    // We use the server time, which is SECONDS
+    return AsyncResource.serverTime;
+  },
+
+  _getRepairRequestor(name) {
+    return getRepairRequestor(name);
+  },
+
+  _getAllRepairRequestors() {
+    return getAllRepairRequestors();
+  }
+}
--- a/services/sync/modules/stages/enginesync.js
+++ b/services/sync/modules/stages/enginesync.js
@ -18,6 +18,8 @@ Cu.import("resource://services-sync/util.js");
 Cu.import("resource://services-common/observers.js");
 Cu.import("resource://services-common/async.js");
 Cu.import("resource://gre/modules/Task.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Doctor",
+                                  "resource://services-sync/doctor.js");

 /**
 * Perform synchronization of engines.
@ -194,7 +196,7 @@ EngineSynchronizer.prototype = {
        }
      }

-      Async.promiseSpinningly(this._tryValidateEngines(enginesToValidate));
+      Async.promiseSpinningly(Doctor.consult(enginesToValidate));

      // If there were no sync engine failures
      if (this.service.status.service != SYNC_FAILED_PARTIAL) {
@ -213,106 +215,6 @@ EngineSynchronizer.prototype = {
    this.onComplete(null);
  },

-  _tryValidateEngines: Task.async(function* (recentlySyncedEngines) {
-    if (!Services.telemetry.canRecordBase || !Svc.Prefs.get("validation.enabled", false)) {
-      this._log.info("Skipping validation: validation or telemetry reporting is disabled");
-      return;
-    }
-
-    let lastValidation = Svc.Prefs.get("validation.lastTime", 0);
-    let validationInterval = Svc.Prefs.get("validation.interval");
-    let nowSeconds = Math.floor(Date.now() / 1000);
-
-    if (nowSeconds - lastValidation < validationInterval) {
-      this._log.info("Skipping validation: too recent since last validation attempt");
-      return;
-    }
-    // Update the time now, even if we may return false still. We don't want to
-    // check the rest of these more frequently than once a day.
-    Svc.Prefs.set("validation.lastTime", nowSeconds);
-
-    // Validation only occurs a certain percentage of the time.
-    let validationProbability = Svc.Prefs.get("validation.percentageChance", 0) / 100.0;
-    if (validationProbability < Math.random()) {
-      this._log.info("Skipping validation: Probability threshold not met");
-      return;
-    }
-    let maxRecords = Svc.Prefs.get("validation.maxRecords");
-    if (!maxRecords) {
-      // Don't bother asking the server for the counts if we know validation
-      // won't happen anyway.
-      return;
-    }
-
-    // maxRecords of -1 means "any number", so we can skip asking the server.
-    // Used for tests.
-    let info;
-    if (maxRecords < 0) {
-      info = {};
-      for (let e of recentlySyncedEngines) {
-        info[e.name] = 1; // needs to be < maxRecords
-      }
-      maxRecords = 2;
-    } else {
-
-      let collectionCountsURL = this.service.userBaseURL + "info/collection_counts";
-      try {
-        let infoResp = this.service._fetchInfo(collectionCountsURL);
-        if (!infoResp.success) {
-          this._log.error("Can't run validation: request to info/collection_counts responded with "
-                          + infoResp.status);
-          return;
-        }
-        info = infoResp.obj; // might throw because obj is a getter which parses json.
-      } catch (e) {
-        // Not running validation is totally fine, so we just write an error log and return.
-        this._log.error("Can't run validation: Caught error when fetching counts", e);
-        return;
-      }
-    }
-
-    if (!info) {
-      return;
-    }
-
-    let engineLookup = new Map(recentlySyncedEngines.map(e => [e.name, e]));
-    let toRun = [];
-    for (let [engineName, recordCount] of Object.entries(info)) {
-      let engine = engineLookup.get(engineName);
-      if (recordCount > maxRecords || !engine) {
-        this._log.debug(`Skipping validation for ${engineName} because it's not an engine or ` +
-                        `the number of records (${recordCount}) is greater than the maximum allowed (${maxRecords}).`);
-        continue;
-      }
-      let validator = engine.getValidator();
-      if (!validator) {
-        continue;
-      }
-      // Put this in an array so that we know how many we're going to do, so we
-      // don't tell users we're going to run some validators when we aren't.
-      toRun.push({ engine, validator });
-    }
-
-    if (!toRun.length) {
-      return;
-    }
-    Services.console.logStringMessage(
-      "Sync is about to run a consistency check. This may be slow, and " +
-      "can be controlled using the pref \"services.sync.validation.enabled\".\n" +
-      "If you encounter any problems because of this, please file a bug.");
-    for (let { validator, engine } of toRun) {
-      try {
-        let result = yield validator.validate(engine);
-        Observers.notify("weave:engine:validate:finish", result, engine.name);
-      } catch (e) {
-        this._log.error(`Failed to run validation on ${engine.name}!`, e);
-        Observers.notify("weave:engine:validate:error", e, engine.name)
-        // Keep validating -- there's no reason to think that a failure for one
-        // validator would mean the others will fail.
-      }
-    }
-  }),
-
  // Returns true if sync should proceed.
  // false / no return value means sync should be aborted.
  _syncEngine: function _syncEngine(engine) {
--- a/services/sync/moz.build
+++ b/services/sync/moz.build
@ -23,6 +23,7 @@ EXTRA_JS_MODULES['services-sync'] += [
    'modules/browserid_identity.js',
    'modules/collection_repair.js',
    'modules/collection_validator.js',
+    'modules/doctor.js',
    'modules/engines.js',
    'modules/keys.js',
    'modules/main.js',
@ -69,6 +70,6 @@ TESTING_JS_MODULES.services.sync += [
    'modules-testing/utils.js',
 ]

-JS_PREFERENCE_FILES += [
+JS_PREFERENCE_PP_FILES += [
    'services-sync.js',
 ]
--- a/services/sync/services-sync.js
+++ b/services/sync/services-sync.js
@ -65,16 +65,21 @@ pref("services.sync.fxa.privacyURL", "https://accounts.firefox.com/legal/privacy
 pref("services.sync.telemetry.submissionInterval", 43200); // 12 hours in seconds
 pref("services.sync.telemetry.maxPayloadCount", 500);

-// Note that services.sync.validation.enabled is located in browser/app/profile/firefox.js
+#ifndef RELEASE_OR_BETA
+// Enable the (fairly costly) client/server validation on nightly/aurora only.
+pref("services.sync.engine.bookmarks.validation.enabled", true);
+// Enable repair of bookmarks - requires validation also be enabled.
+pref("services.sync.engine.bookmarks.repair.enabled", true);
+#endif

 // We consider validation this frequently. After considering validation, even
 // if we don't end up validating, we won't try again unless this much time has passed.
-pref("services.sync.validation.interval", 86400); // 24 hours in seconds
+pref("services.sync.engine.bookmarks.validation.interval", 86400); // 24 hours in seconds

 // We only run validation `services.sync.validation.percentageChance` percent of
 // the time, even if it's been the right amount of time since the last validation,
 // and you meet the maxRecord checks.
-pref("services.sync.validation.percentageChance", 10);
+pref("services.sync.engine.bookmarks.validation.percentageChance", 10);

 // We won't validate an engine if it has more than this many records on the server.
-pref("services.sync.validation.maxRecords", 1000);
+pref("services.sync.engine.bookmarks.validation.maxRecords", 1000);
--- a/services/sync/tests/unit/head_appinfo.js
+++ b/services/sync/tests/unit/head_appinfo.js
@ -19,11 +19,11 @@ fhs.observe(null, "profile-after-change", null);
 // An app is going to have some prefs set which xpcshell tests don't.
 Services.prefs.setCharPref("identity.sync.tokenserver.uri", "http://token-server");

-// Set the validation prefs to attempt validation every time to avoid non-determinism.
-Services.prefs.setIntPref("services.sync.validation.interval", 0);
-Services.prefs.setIntPref("services.sync.validation.percentageChance", 100);
-Services.prefs.setIntPref("services.sync.validation.maxRecords", -1);
-Services.prefs.setBoolPref("services.sync.validation.enabled", true);
+// Set the validation prefs to attempt bookmark validation every time to avoid non-determinism.
+Services.prefs.setIntPref("services.sync.engine.bookmarks.validation.interval", 0);
+Services.prefs.setIntPref("services.sync.engine.bookmarks.validation.percentageChance", 100);
+Services.prefs.setIntPref("services.sync.engine.bookmarks.validation.maxRecords", -1);
+Services.prefs.setBoolPref("services.sync.engine.bookmarks.validation.enabled", true);

 // Make sure to provide the right OS so crypto loads the right binaries
 function getOS() {
--- a/services/sync/tests/unit/test_doctor.js
+++ b/services/sync/tests/unit/test_doctor.js
@ -0,0 +1,94 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+
+const { Doctor, REPAIR_ADVANCE_PERIOD } = Cu.import("resource://services-sync/doctor.js", {});
+
+initTestLogging("Trace");
+
+function mockDoctor(mocks) {
+  // Clone the object and put mocks in that.
+  return Object.assign({}, Doctor, mocks);
+}
+
+add_task(async function test_repairs_start() {
+  let repairStarted = false;
+  let problems = {
+    missingChildren: ["a", "b", "c"],
+  }
+  let validator = {
+    validate(engine) {
+      return problems;
+    }
+  }
+  let engine = {
+    name: "test-engine",
+    getValidator() {
+      return validator;
+    }
+  }
+  let requestor = {
+    startRepairs(validationInfo, flowID) {
+      ok(flowID, "got a flow ID");
+      equal(validationInfo, problems);
+      repairStarted = true;
+      return true;
+    }
+  }
+  let doctor = mockDoctor({
+    _getEnginesToValidate(recentlySyncedEngines) {
+      deepEqual(recentlySyncedEngines, [engine]);
+      return {
+        "test-engine": { engine, maxRecords: -1 }
+      };
+    },
+    _getRepairRequestor(engineName) {
+      equal(engineName, engine.name);
+      return requestor;
+    },
+    _shouldRepair(e) {
+      return true;
+    },
+  });
+  let promiseValidationDone = promiseOneObserver("weave:engine:validate:finish");
+  await doctor.consult([engine]);
+  await promiseValidationDone;
+  ok(repairStarted);
+});
+
+add_task(async function test_repairs_advanced_daily() {
+  let repairCalls = 0;
+  let requestor = {
+    continueRepairs() {
+      repairCalls++;
+    }
+  }
+  // start now at just after REPAIR_ADVANCE_PERIOD so we do a a first one.
+  let now = REPAIR_ADVANCE_PERIOD + 1;
+  let doctor = mockDoctor({
+    _getEnginesToValidate() {
+      return {}; // no validations.
+    },
+    _runValidators() {
+      // do nothing.
+    },
+    _getAllRepairRequestors() {
+      return {
+        foo: requestor,
+      }
+    },
+    _now() {
+      return now;
+    },
+  });
+  await doctor.consult();
+  equal(repairCalls, 1);
+  now += 10; // 10 seconds later...
+  await doctor.consult();
+  // should not have done another repair yet - it's too soon.
+  equal(repairCalls, 1);
+  // advance our pretend clock by the advance period (eg, day)
+  now += REPAIR_ADVANCE_PERIOD;
+  await doctor.consult();
+  // should have done another repair
+  equal(repairCalls, 2);
+});
--- a/services/sync/tests/unit/xpcshell.ini
+++ b/services/sync/tests/unit/xpcshell.ini
@ -146,6 +146,7 @@ requesttimeoutfactor = 4
 [test_bookmark_validator.js]
 [test_clients_engine.js]
 [test_clients_escape.js]
+[test_doctor.js]
 [test_extension_storage_engine.js]
 [test_extension_storage_tracker.js]
 [test_forms_store.js]
--- a/tools/lint/eslint/modules.json
+++ b/tools/lint/eslint/modules.json
@ -57,6 +57,7 @@
  "dispatcher.js": ["Dispatcher"],
  "distribution.js": ["DistributionCustomizer"],
  "DNSTypes.jsm": ["DNS_QUERY_RESPONSE_CODES", "DNS_AUTHORITATIVE_ANSWER_CODES", "DNS_CLASS_CODES", "DNS_RECORD_TYPES"],
+  "doctor.js": ["Doctor"],
  "dom.js": ["getAttributes"],
  "DOMRequestHelper.jsm": ["DOMRequestIpcHelper"],
  "DownloadCore.jsm": ["Download", "DownloadSource", "DownloadTarget", "DownloadError", "DownloadSaver", "DownloadCopySaver", "DownloadLegacySaver", "DownloadPDFSaver"],