Bug 1523313 - Monitor Remote Settings synchronization duration r=glasserc

Monitor Remote Settings synchronization duration

Differential Revision: https://phabricator.services.mozilla.com/D25402

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Mathieu Leplatre 2019-04-01 20:04:25 +00:00
Родитель 3602f6f6ad
Коммит 9fc31364f2
6 изменённых файлов: 140 добавлений и 27 удалений

Просмотреть файл

@ -285,6 +285,7 @@ class RemoteSettingsClient extends EventEmitter {
async maybeSync(expectedTimestamp, options = {}) {
const { loadDump = true, trigger = "manual" } = options;
const startedAt = new Date();
let reportStatus = null;
try {
// Synchronize remote data into a local DB using Kinto.
@ -393,12 +394,17 @@ class RemoteSettingsClient extends EventEmitter {
}
throw e;
} finally {
const durationMilliseconds = new Date() - startedAt;
// No error was reported, this is a success!
if (reportStatus === null) {
reportStatus = UptakeTelemetry.STATUS.SUCCESS;
}
// Report success/error status to Telemetry.
await UptakeTelemetry.report(TELEMETRY_COMPONENT, reportStatus, { source: this.identifier, trigger });
await UptakeTelemetry.report(TELEMETRY_COMPONENT, reportStatus, {
source: this.identifier,
trigger,
duration: durationMilliseconds,
});
}
}

Просмотреть файл

@ -42,7 +42,8 @@ const PREF_SETTINGS_LOAD_DUMP = "load_dump";
// Telemetry identifiers.
const TELEMETRY_COMPONENT = "remotesettings";
const TELEMETRY_SOURCE = "settings-changes-monitoring";
const TELEMETRY_SOURCE_POLL = "settings-changes-monitoring";
const TELEMETRY_SOURCE_SYNC = "settings-sync";
// Push broadcast id.
const BROADCAST_ID = "remote-settings/monitor_changes";
@ -157,8 +158,9 @@ function remoteSettingsFunction() {
* @returns {Promise} or throws error if something goes wrong.
*/
remoteSettings.pollChanges = async ({ expectedTimestamp, trigger = "manual" } = {}) => {
let telemetryArgs = {
source: TELEMETRY_SOURCE,
const startedAt = new Date();
let pollTelemetryArgs = {
source: TELEMETRY_SOURCE_POLL,
trigger,
};
@ -168,7 +170,7 @@ function remoteSettingsFunction() {
const remainingMilliseconds = parseInt(backoffReleaseTime, 10) - Date.now();
if (remainingMilliseconds > 0) {
// Backoff time has not elapsed yet.
await UptakeTelemetry.report(TELEMETRY_COMPONENT, UptakeTelemetry.STATUS.BACKOFF, telemetryArgs);
await UptakeTelemetry.report(TELEMETRY_COMPONENT, UptakeTelemetry.STATUS.BACKOFF, pollTelemetryArgs);
throw new Error(`Server is asking clients to back off; retry in ${Math.ceil(remainingMilliseconds / 1000)}s.`);
} else {
gPrefs.clearUserPref(PREF_SETTINGS_SERVER_BACKOFF);
@ -198,7 +200,7 @@ function remoteSettingsFunction() {
} else {
reportStatus = UptakeTelemetry.STATUS.UNKNOWN_ERROR;
}
await UptakeTelemetry.report(TELEMETRY_COMPONENT, reportStatus, telemetryArgs);
await UptakeTelemetry.report(TELEMETRY_COMPONENT, reportStatus, pollTelemetryArgs);
// No need to go further.
throw new Error(`Polling for changes failed: ${e.message}.`);
}
@ -206,12 +208,12 @@ function remoteSettingsFunction() {
const { serverTimeMillis, changes, currentEtag, backoffSeconds, ageSeconds } = pollResult;
// Report age of server data in Telemetry.
telemetryArgs = { age: ageSeconds, ...telemetryArgs };
pollTelemetryArgs = { age: ageSeconds, ...pollTelemetryArgs };
// Report polling success to Uptake Telemetry.
const reportStatus = changes.length === 0 ? UptakeTelemetry.STATUS.UP_TO_DATE
: UptakeTelemetry.STATUS.SUCCESS;
await UptakeTelemetry.report(TELEMETRY_COMPONENT, reportStatus, telemetryArgs);
await UptakeTelemetry.report(TELEMETRY_COMPONENT, reportStatus, pollTelemetryArgs);
// Check if the server asked the clients to back off (for next poll).
if (backoffSeconds) {
@ -231,13 +233,14 @@ function remoteSettingsFunction() {
const loadDump = gPrefs.getBoolPref(PREF_SETTINGS_LOAD_DUMP, true);
// Iterate through the collections version info and initiate a synchronization
// on the related remote settings client.
// on the related remote settings clients.
let firstError;
for (const change of changes) {
const { bucket, collection, last_modified } = change;
const client = await _client(bucket, collection);
if (!client) {
// This collection has no associated client (eg. preview, other platform...)
continue;
}
// Start synchronization! It will be a no-op if the specified `lastModified` equals
@ -254,8 +257,14 @@ function remoteSettingsFunction() {
}
}
}
// Report total synchronization duration to Telemetry.
const durationMilliseconds = new Date() - startedAt;
const syncTelemetryArgs = { source: TELEMETRY_SOURCE_SYNC, duration: durationMilliseconds, trigger };
if (firstError) {
// cause the promise to reject by throwing the first observed error
// Report the global synchronization failure. Individual uptake reports will also have been sent for each collection.
await UptakeTelemetry.report(TELEMETRY_COMPONENT, UptakeTelemetry.STATUS.SYNC_ERROR, syncTelemetryArgs);
// Rethrow the first observed error
throw firstError;
}
@ -264,6 +273,9 @@ function remoteSettingsFunction() {
gPrefs.setCharPref(PREF_SETTINGS_LAST_ETAG, currentEtag);
}
// Report the global synchronization success.
await UptakeTelemetry.report(TELEMETRY_COMPONENT, UptakeTelemetry.STATUS.SUCCESS, syncTelemetryArgs);
Services.obs.notifyObservers(null, "remote-settings:changes-poll-end");
};

Просмотреть файл

@ -307,6 +307,20 @@ add_task(async function test_telemetry_if_sync_succeeds() {
});
add_task(clear_state);
add_task(async function test_synchronization_duration_is_reported_in_uptake_status() {
const backup = UptakeTelemetry.report;
let reportedDuration = -1;
UptakeTelemetry.report = (component, status, { duration }) => {
reportedDuration = duration;
};
await client.maybeSync(2000);
UptakeTelemetry.report = backup;
Assert.ok(reportedDuration > 0);
});
add_task(clear_state);
add_task(async function test_telemetry_reports_if_application_fails() {
const startHistogram = getUptakeTelemetrySnapshot(client.identifier);
client.on("sync", () => { throw new Error("boom"); });

Просмотреть файл

@ -6,6 +6,7 @@ const {Services} = ChromeUtils.import("resource://gre/modules/Services.jsm");
const { UptakeTelemetry } = ChromeUtils.import("resource://services-common/uptake-telemetry.js");
const { RemoteSettings } = ChromeUtils.import("resource://services-settings/remote-settings.js");
const { Kinto } = ChromeUtils.import("resource://services-common/kinto-offline-client.js");
const { setTimeout } = ChromeUtils.import("resource://gre/modules/Timer.jsm");
const IS_ANDROID = AppConstants.platform == "android";
@ -17,7 +18,8 @@ const PREF_CLOCK_SKEW_SECONDS = "services.settings.clock_skew_seconds";
const DB_NAME = "remote-settings";
// Telemetry report result.
const TELEMETRY_HISTOGRAM_KEY = "settings-changes-monitoring";
const TELEMETRY_HISTOGRAM_POLL_KEY = "settings-changes-monitoring";
const TELEMETRY_HISTOGRAM_SYNC_KEY = "settings-sync";
const CHANGES_PATH = "/v1/buckets/monitor/collections/changes/records";
var server;
@ -87,7 +89,8 @@ add_task(clear_state);
add_task(async function test_check_success() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startPollHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const startSyncHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_SYNC_KEY);
const serverTime = 8000;
server.registerPathHandler(CHANGES_PATH, serveChangesEntries(serverTime, [{
@ -134,11 +137,13 @@ add_task(async function test_check_success() {
// check the last_update is updated
Assert.equal(Services.prefs.getIntPref(PREF_LAST_UPDATE), serverTime / 1000);
// ensure that we've accumulated the correct telemetry
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endPollHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.SUCCESS]: 1,
};
checkUptakeTelemetry(startHistogram, endHistogram, expectedIncrements);
checkUptakeTelemetry(startPollHistogram, endPollHistogram, expectedIncrements);
const endSyncHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_SYNC_KEY);
checkUptakeTelemetry(startSyncHistogram, endSyncHistogram, expectedIncrements);
});
add_task(clear_state);
@ -177,7 +182,7 @@ add_task(clear_state);
add_task(async function test_check_up_to_date() {
// Simulate a poll with up-to-date collection.
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const serverTime = 4000;
function server304(request, response) {
@ -215,7 +220,7 @@ add_task(async function test_check_up_to_date() {
Assert.equal(Services.prefs.getIntPref(PREF_LAST_UPDATE), serverTime / 1000);
// ensure that we've accumulated the correct telemetry
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.UP_TO_DATE]: 1,
};
@ -298,7 +303,9 @@ add_task(async function test_age_of_data_is_reported_in_uptake_status() {
const backup = UptakeTelemetry.report;
let reportedAge;
UptakeTelemetry.report = (component, status, { age }) => {
reportedAge = age;
if (age) {
reportedAge = age;
}
};
await RemoteSettings.pollChanges();
@ -308,6 +315,34 @@ add_task(async function test_age_of_data_is_reported_in_uptake_status() {
});
add_task(clear_state);
add_task(async function test_synchronization_duration_is_reported_in_uptake_status() {
server.registerPathHandler(CHANGES_PATH, serveChangesEntries(10000, [{
id: "b6ba7fab-a40a-4d03-a4af-6b627f3c5b36",
last_modified: 42,
host: "localhost",
bucket: "main",
collection: "some-entry",
}]));
const c = RemoteSettings("some-entry");
// Simulate a synchronization that lasts 1 sec.
// eslint-disable-next-line mozilla/no-arbitrary-setTimeout
c.maybeSync = () => new Promise(resolve => setTimeout(resolve, 1000));
const backup = UptakeTelemetry.report;
let reportedStatus;
let reportedDuration = -1;
UptakeTelemetry.report = (component, status, { duration }) => {
reportedStatus = status;
reportedDuration = duration;
};
await RemoteSettings.pollChanges();
UptakeTelemetry.report = backup;
Assert.ok(reportedDuration >= 1000);
Assert.equal(reportedStatus, "success");
});
add_task(clear_state);
add_task(async function test_success_with_partial_list() {
function partialList(request, response) {
@ -356,7 +391,7 @@ add_task(clear_state);
add_task(async function test_server_bad_json() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
function simulateBadJSON(request, response) {
response.setHeader("Content-Type", "application/json; charset=UTF-8");
@ -373,7 +408,7 @@ add_task(async function test_server_bad_json() {
}
Assert.ok(/JSON.parse: unexpected character/.test(error.message));
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.PARSE_ERROR]: 1,
};
@ -383,7 +418,7 @@ add_task(clear_state);
add_task(async function test_server_bad_content_type() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
function simulateBadContentType(request, response) {
response.setHeader("Content-Type", "text/html");
@ -400,7 +435,7 @@ add_task(async function test_server_bad_content_type() {
}
Assert.ok(/Unexpected content-type/.test(error.message));
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.CONTENT_ERROR]: 1,
};
@ -423,7 +458,7 @@ add_task(clear_state);
add_task(async function test_server_error() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
// Simulate a server error.
function simulateErrorResponse(request, response) {
@ -461,7 +496,7 @@ add_task(async function test_server_error() {
// When an error occurs, last update was not overwritten.
Assert.equal(Services.prefs.getIntPref(PREF_LAST_UPDATE), 42);
// ensure that we've accumulated the correct telemetry
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.SERVER_ERROR]: 1,
};
@ -469,6 +504,49 @@ add_task(async function test_server_error() {
});
add_task(clear_state);
add_task(async function test_client_error() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_SYNC_KEY);
server.registerPathHandler(CHANGES_PATH, serveChangesEntries(10000, [{
id: "b6ba7fab-a40a-4d03-a4af-6b627f3c5b36",
last_modified: 42,
host: "localhost",
bucket: "main",
collection: "some-entry",
}]));
const c = RemoteSettings("some-entry");
c.maybeSync = () => { throw new Error("boom"); };
let notificationObserved = false;
const observer = {
observe(aSubject, aTopic, aData) {
Services.obs.removeObserver(this, "remote-settings:changes-poll-end");
notificationObserved = true;
},
};
Services.obs.addObserver(observer, "remote-settings:changes-poll-end");
Services.prefs.setIntPref(PREF_LAST_ETAG, 42);
// pollChanges() fails with adequate error and no notification.
let error;
try {
await RemoteSettings.pollChanges();
} catch (e) {
error = e;
}
Assert.ok(!notificationObserved, "a notification should not have been observed");
Assert.ok(/boom/.test(error.message), "original client error is thrown");
// When an error occurs, last etag was not overwritten.
Assert.equal(Services.prefs.getIntPref(PREF_LAST_ETAG), 42);
// ensure that we've accumulated the correct telemetry
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_SYNC_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.SYNC_ERROR]: 1,
};
checkUptakeTelemetry(startHistogram, endHistogram, expectedIncrements);
});
add_task(clear_state);
add_task(async function test_check_clockskew_is_updated() {
const serverTime = 2000;
@ -529,7 +607,7 @@ add_task(clear_state);
add_task(async function test_backoff() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
function simulateBackoffResponse(request, response) {
response.setHeader("Content-Type", "application/json; charset=UTF-8");
@ -566,7 +644,7 @@ add_task(async function test_backoff() {
Assert.ok(!Services.prefs.prefHasUserValue(PREF_SETTINGS_SERVER_BACKOFF));
// Ensure that we've accumulated the correct telemetry
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.SUCCESS]: 1,
[UptakeTelemetry.STATUS.UP_TO_DATE]: 1,
@ -578,7 +656,7 @@ add_task(clear_state);
add_task(async function test_network_error() {
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
// Simulate a network error (to check telemetry report).
Services.prefs.setCharPref(PREF_SETTINGS_SERVER, "http://localhost:42/v1");
@ -587,7 +665,7 @@ add_task(async function test_network_error() {
} catch (e) {}
// ensure that we've accumulated the correct telemetry
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY);
const endHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_POLL_KEY);
const expectedIncrements = {
[UptakeTelemetry.STATUS.NETWORK_ERROR]: 1,
};

Просмотреть файл

@ -1014,6 +1014,8 @@ uptake.remotecontent.result:
"timer", "forced", "manual")
age: >
The age of pulled data in seconds (ie. difference between publication time and fetch time).
duration: >
The duration of the synchronization process in milliseconds.
bug_numbers:
- 1517469
record_in_processes: ["main"]

Просмотреть файл

@ -87,6 +87,7 @@ Events sent using the telemetry events API can contain additional information. U
- ``trigger``: A label to distinguish what triggered the polling/fetching of remote content (eg. ``"broadcast"``, ``"timer"``, ``"forced"``, ``"manual"``)
- ``age``: The age of pulled data in seconds (ie. difference between publication time and fetch time).
- ``duration``: The duration of the synchronization process in milliseconds.
.. code-block:: js