Bug 1736040 - collect CPU time through Glean, r=chutten,gerald,gcp

Differential Revision: https://phabricator.services.mozilla.com/D128609
This commit is contained in:
Florian Quèze 2021-10-25 18:27:24 +00:00
Родитель 4d0b49fcd3
Коммит bcdfb763fc
11 изменённых файлов: 243 добавлений и 3 удалений

Просмотреть файл

@ -740,6 +740,9 @@ UniquePtr<SandboxBroker::Policy> SandboxBrokerPolicyFactory::GetContentPolicy(
// Bug 1198550: the profiler's replacement for dl_iterate_phdr
policy->AddPath(rdonly, nsPrintfCString("/proc/%d/maps", aPid).get());
// Bug 1736040: CPU use telemetry
policy->AddPath(rdonly, nsPrintfCString("/proc/%d/stat", aPid).get());
// Bug 1198552: memory reporting.
AddMemoryReporting(policy.get(), aPid);

Просмотреть файл

@ -10,6 +10,7 @@
#include "mozilla/dom/ContentChild.h"
#include "mozilla/dom/ContentParent.h"
#include "mozilla/MozPromise.h"
#include "mozilla/ProcInfo.h"
#include "nsTArray.h"
#include "nsThreadUtils.h"
@ -20,13 +21,36 @@ using FlushFOGDataPromise = mozilla::dom::ContentParent::FlushFOGDataPromise;
namespace mozilla {
namespace glean {
static void RecordCpuTime() {
static uint64_t previousCpuTime = 0;
uint64_t cpuTime;
if (NS_FAILED(GetCpuTimeSinceProcessStartInMs(&cpuTime))) {
return;
}
uint64_t newCpuTime = cpuTime - previousCpuTime;
previousCpuTime += newCpuTime;
if (newCpuTime) {
// The counters are reset at least once a day. Assuming all cores are used
// continuously, an int32 can hold the data for 24.85 cores.
// This should be fine for now, but may overflow in the future.
power::total_cpu_time_ms.Add(int32_t(newCpuTime));
}
}
/**
* The parent process is asking you to flush your data ASAP.
* Flush your data ASAP, either because the parent process is asking you to
* or because the process is about to shutdown.
*
* @param aResolver - The function you need to call with the bincoded,
* serialized payload that the Rust impl hands you.
*/
void FlushFOGData(std::function<void(ipc::ByteBuf&&)>&& aResolver) {
// Record the CPU time right before data is sent to the parent.
RecordCpuTime();
ByteBuf buf;
uint32_t ipcBufferSize = impl::fog_serialize_ipc_buf();
bool ok = buf.Allocate(ipcBufferSize);
@ -106,6 +130,9 @@ void SendFOGData(ipc::ByteBuf&& buf) {
* sending it all down into Rust to be used.
*/
RefPtr<GenericPromise> FlushAndUseFOGData() {
// Record CPU time on the parent before sending requests to child processes.
RecordCpuTime();
RefPtr<GenericPromise::Private> ret = new GenericPromise::Private(__func__);
std::function<void(nsTArray<ByteBuf> &&)> resolver =
[ret](nsTArray<ByteBuf>&& bufs) {

Просмотреть файл

@ -12,6 +12,7 @@ metrics_yamls = [
"toolkit/mozapps/update/metrics.yaml",
"browser/base/content/metrics.yaml",
"gfx/metrics.yaml",
"toolkit/components/processtools/metrics.yaml",
]
# The list of all Glean pings.yaml files, relative to the top src dir.

Просмотреть файл

@ -20,6 +20,13 @@ namespace ipc {
class GeckoChildProcessHost;
}
/**
* Return the number of milliseconds of CPU time used since process start.
*
* @return NS_OK on success.
*/
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult);
// Process types. When updating this enum, please make sure to update
// WebIDLProcType, ChromeUtils::RequestProcInfo and ProcTypeToWebIDL to
// mirror the changes.

Просмотреть файл

@ -21,6 +21,16 @@
namespace mozilla {
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult) {
struct proc_taskinfo pti;
if ((unsigned long)proc_pidinfo(getpid(), PROC_PIDTASKINFO, 0, &pti, PROC_PIDTASKINFO_SIZE) <
PROC_PIDTASKINFO_SIZE) {
return NS_ERROR_FAILURE;
}
*aResult = (pti.pti_total_user + pti.pti_total_system) / PR_NSEC_PER_MSEC;
return NS_OK;
}
RefPtr<ProcInfoPromise> GetProcInfo(nsTArray<ProcInfoRequest>&& aRequests) {
auto holder = MakeUnique<MozPromiseHolder<ProcInfoPromise>>();
RefPtr<ProcInfoPromise> promise = holder->Ensure(__func__);

Просмотреть файл

@ -126,7 +126,11 @@ class StatReader {
// Reads the stat file and puts its content in a nsString.
nsresult ReadFile(nsAutoString& aFileContent) {
if (mFilepath.IsEmpty()) {
mFilepath.AppendPrintf("/proc/%u/stat", mPid);
if (mPid == 0) {
mFilepath.AssignLiteral("/proc/self/stat");
} else {
mFilepath.AppendPrintf("/proc/%u/stat", mPid);
}
}
FILE* fstat = fopen(mFilepath.get(), "r");
if (!fstat) {
@ -181,6 +185,18 @@ class ThreadInfoReader final : public StatReader {
base::ProcessId mTid;
};
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult) {
StatReader reader(0);
ProcInfo info;
nsresult rv = reader.ParseProc(info);
if (NS_FAILED(rv)) {
return rv;
}
*aResult = (info.cpuKernel + info.cpuUser) / PR_NSEC_PER_MSEC;
return NS_OK;
}
RefPtr<ProcInfoPromise> GetProcInfo(nsTArray<ProcInfoRequest>&& aRequests) {
auto holder = MakeUnique<MozPromiseHolder<ProcInfoPromise>>();
RefPtr<ProcInfoPromise> promise = holder->Ensure(__func__);

Просмотреть файл

@ -24,6 +24,17 @@ uint64_t ToNanoSeconds(const FILETIME& aFileTime) {
return usec.QuadPart * 100;
}
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult) {
FILETIME createTime, exitTime, kernelTime, userTime;
if (!GetProcessTimes(::GetCurrentProcess(), &createTime, &exitTime,
&kernelTime, &userTime)) {
return NS_ERROR_FAILURE;
}
*aResult =
(ToNanoSeconds(kernelTime) + ToNanoSeconds(userTime)) / PR_NSEC_PER_MSEC;
return NS_OK;
}
RefPtr<ProcInfoPromise> GetProcInfo(nsTArray<ProcInfoRequest>&& aRequests) {
auto holder = MakeUnique<MozPromiseHolder<ProcInfoPromise>>();
RefPtr<ProcInfoPromise> promise = holder->Ensure(__func__);

Просмотреть файл

@ -0,0 +1,25 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# This file defines the metrics that are recorded by the Glean SDK. They are
# automatically converted to platform-specific code at build time using the
# `glean_parser` PyPI package.
---
$schema: moz://mozilla.org/schemas/glean/metrics/2-0-0
power:
total_cpu_time_ms:
type: counter
description: >
Total CPU time used by all processes in ms.
bugs:
- https://bugzilla.mozilla.org/show_bug.cgi?id=1736040
data_reviews:
- https://bugzilla.mozilla.org/show_bug.cgi?id=1736040
data_sensitivity:
- technical
notification_emails:
- florian@mozilla.com
expires: never

Просмотреть файл

@ -0,0 +1,128 @@
"use strict";
Cu.importGlobalProperties(["Glean"]);
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
const FOG = Cc["@mozilla.org/toolkit/glean;1"].createInstance(Ci.nsIFOG);
var cpuThreadCount;
add_task(async function setup() {
// FOG needs a profile directory to put its data in.
do_get_profile();
FOG.initializeFOG();
cpuThreadCount = (await Services.sysinfo.processInfo).count;
});
async function getCpuTimeFromProcInfo() {
const NS_PER_MS = 1000000;
let cpuTimeForProcess = p => (p.cpuUser + p.cpuKernel) / NS_PER_MS;
let procInfo = await ChromeUtils.requestProcInfo();
return (
cpuTimeForProcess(procInfo) +
procInfo.children.map(cpuTimeForProcess).reduce((a, b) => a + b, 0)
);
}
add_task(async function test_totalCpuTime_in_parent() {
let startTime = Date.now();
let initialProcInfoCpuTime = Math.floor(await getCpuTimeFromProcInfo());
await FOG.testFlushAllChildren();
let initialCpuTime = Glean.power.totalCpuTimeMs.testGetValue();
Assert.greater(
initialCpuTime,
0,
"The CPU time used by starting the test harness should be more than 0"
);
let initialProcInfoCpuTime2 = Math.ceil(await getCpuTimeFromProcInfo());
Assert.greaterOrEqual(
initialCpuTime,
initialProcInfoCpuTime,
"The CPU time reported through Glean should be at least as much as the CPU time reported by ProcInfo before asking Glean for the data"
);
Assert.lessOrEqual(
initialCpuTime,
initialProcInfoCpuTime2,
"The CPU time reported through Glean should be no more than the CPU time reported by ProcInfo after asking Glean for the data"
);
// 50 is an arbitrary value, but the resolution is 16ms on Windows,
// so this needs to be significantly more than 16.
const kBusyWaitForMs = 50;
while (Date.now() - startTime < kBusyWaitForMs) {
// Burn CPU time...
}
let additionalProcInfoCpuTime =
Math.floor(await getCpuTimeFromProcInfo()) - initialProcInfoCpuTime2;
await FOG.testFlushAllChildren();
let additionalCpuTime =
Glean.power.totalCpuTimeMs.testGetValue() - initialCpuTime;
info(
`additional CPU time according to ProcInfo: ${additionalProcInfoCpuTime}ms and Glean ${additionalCpuTime}ms`
);
// On a machine where the CPU is very busy, our busy wait loop might burn less
// CPU than expected if other processes are being scheduled instead of us.
let expectedAdditionalCpuTime = Math.min(
additionalProcInfoCpuTime,
kBusyWaitForMs
);
Assert.greaterOrEqual(
additionalCpuTime,
expectedAdditionalCpuTime,
`The total CPU time should have increased by at least ${expectedAdditionalCpuTime}ms`
);
let wallClockTime = Date.now() - startTime;
Assert.lessOrEqual(
additionalCpuTime,
wallClockTime * cpuThreadCount,
`The total CPU time should have increased by at most the wall clock time (${wallClockTime}ms) * ${cpuThreadCount} CPU threads`
);
});
add_task(async function test_totalCpuTime_in_child() {
const MESSAGE_CHILD_TEST_DONE = "ChildTest:Done";
let startTime = Date.now();
await FOG.testFlushAllChildren();
let initialCpuTime = Glean.power.totalCpuTimeMs.testGetValue();
let initialProcInfoCpuTime = await getCpuTimeFromProcInfo();
run_test_in_child("test_total_cpu_time_child.js");
let burntCpuTime = await do_await_remote_message(MESSAGE_CHILD_TEST_DONE);
let additionalProcInfoCpuTime =
(await getCpuTimeFromProcInfo()) - initialProcInfoCpuTime;
await FOG.testFlushAllChildren();
let additionalCpuTime =
Glean.power.totalCpuTimeMs.testGetValue() - initialCpuTime;
info(
`additional CPU time according to ProcInfo: ${additionalProcInfoCpuTime}ms and Glean ${additionalCpuTime}ms`
);
// On a machine where the CPU is very busy, our busy wait loop might burn less
// CPU than expected if other processes are being scheduled instead of us.
let expectedAdditionalCpuTime = Math.min(
Math.floor(additionalProcInfoCpuTime),
burntCpuTime
);
Assert.greaterOrEqual(
additionalCpuTime,
expectedAdditionalCpuTime,
`The total CPU time should have increased by at least ${expectedAdditionalCpuTime}ms`
);
let wallClockTime = Date.now() - startTime;
Assert.lessOrEqual(
additionalCpuTime,
wallClockTime * cpuThreadCount,
`The total CPU time should have increased by at most the wall clock time (${wallClockTime}ms) * ${cpuThreadCount} CPU threads`
);
});

Просмотреть файл

@ -0,0 +1,10 @@
const MESSAGE_CHILD_TEST_DONE = "ChildTest:Done";
function run_test() {
const kBusyWaitForMs = 50;
let startTime = Date.now();
while (Date.now() - startTime < kBusyWaitForMs) {
// Burn CPU time...
}
do_send_remote_message(MESSAGE_CHILD_TEST_DONE, kBusyWaitForMs);
}

Просмотреть файл

@ -1,6 +1,8 @@
[DEFAULT]
firefox-appdir = browser
skip-if = os == 'android'
subprocess = true
[test_process_kill.js]
skip-if = os == 'android'
[test_total_cpu_time.js]
support-files = test_total_cpu_time_child.js