зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1736040 - collect CPU time through Glean, r=chutten,gerald,gcp
Differential Revision: https://phabricator.services.mozilla.com/D128609
This commit is contained in:
Родитель
4d0b49fcd3
Коммит
bcdfb763fc
|
@ -740,6 +740,9 @@ UniquePtr<SandboxBroker::Policy> SandboxBrokerPolicyFactory::GetContentPolicy(
|
|||
// Bug 1198550: the profiler's replacement for dl_iterate_phdr
|
||||
policy->AddPath(rdonly, nsPrintfCString("/proc/%d/maps", aPid).get());
|
||||
|
||||
// Bug 1736040: CPU use telemetry
|
||||
policy->AddPath(rdonly, nsPrintfCString("/proc/%d/stat", aPid).get());
|
||||
|
||||
// Bug 1198552: memory reporting.
|
||||
AddMemoryReporting(policy.get(), aPid);
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "mozilla/dom/ContentChild.h"
|
||||
#include "mozilla/dom/ContentParent.h"
|
||||
#include "mozilla/MozPromise.h"
|
||||
#include "mozilla/ProcInfo.h"
|
||||
#include "nsTArray.h"
|
||||
#include "nsThreadUtils.h"
|
||||
|
||||
|
@ -20,13 +21,36 @@ using FlushFOGDataPromise = mozilla::dom::ContentParent::FlushFOGDataPromise;
|
|||
namespace mozilla {
|
||||
namespace glean {
|
||||
|
||||
static void RecordCpuTime() {
|
||||
static uint64_t previousCpuTime = 0;
|
||||
|
||||
uint64_t cpuTime;
|
||||
if (NS_FAILED(GetCpuTimeSinceProcessStartInMs(&cpuTime))) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t newCpuTime = cpuTime - previousCpuTime;
|
||||
previousCpuTime += newCpuTime;
|
||||
|
||||
if (newCpuTime) {
|
||||
// The counters are reset at least once a day. Assuming all cores are used
|
||||
// continuously, an int32 can hold the data for 24.85 cores.
|
||||
// This should be fine for now, but may overflow in the future.
|
||||
power::total_cpu_time_ms.Add(int32_t(newCpuTime));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The parent process is asking you to flush your data ASAP.
|
||||
* Flush your data ASAP, either because the parent process is asking you to
|
||||
* or because the process is about to shutdown.
|
||||
*
|
||||
* @param aResolver - The function you need to call with the bincoded,
|
||||
* serialized payload that the Rust impl hands you.
|
||||
*/
|
||||
void FlushFOGData(std::function<void(ipc::ByteBuf&&)>&& aResolver) {
|
||||
// Record the CPU time right before data is sent to the parent.
|
||||
RecordCpuTime();
|
||||
|
||||
ByteBuf buf;
|
||||
uint32_t ipcBufferSize = impl::fog_serialize_ipc_buf();
|
||||
bool ok = buf.Allocate(ipcBufferSize);
|
||||
|
@ -106,6 +130,9 @@ void SendFOGData(ipc::ByteBuf&& buf) {
|
|||
* sending it all down into Rust to be used.
|
||||
*/
|
||||
RefPtr<GenericPromise> FlushAndUseFOGData() {
|
||||
// Record CPU time on the parent before sending requests to child processes.
|
||||
RecordCpuTime();
|
||||
|
||||
RefPtr<GenericPromise::Private> ret = new GenericPromise::Private(__func__);
|
||||
std::function<void(nsTArray<ByteBuf> &&)> resolver =
|
||||
[ret](nsTArray<ByteBuf>&& bufs) {
|
||||
|
|
|
@ -12,6 +12,7 @@ metrics_yamls = [
|
|||
"toolkit/mozapps/update/metrics.yaml",
|
||||
"browser/base/content/metrics.yaml",
|
||||
"gfx/metrics.yaml",
|
||||
"toolkit/components/processtools/metrics.yaml",
|
||||
]
|
||||
|
||||
# The list of all Glean pings.yaml files, relative to the top src dir.
|
||||
|
|
|
@ -20,6 +20,13 @@ namespace ipc {
|
|||
class GeckoChildProcessHost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of milliseconds of CPU time used since process start.
|
||||
*
|
||||
* @return NS_OK on success.
|
||||
*/
|
||||
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult);
|
||||
|
||||
// Process types. When updating this enum, please make sure to update
|
||||
// WebIDLProcType, ChromeUtils::RequestProcInfo and ProcTypeToWebIDL to
|
||||
// mirror the changes.
|
||||
|
|
|
@ -21,6 +21,16 @@
|
|||
|
||||
namespace mozilla {
|
||||
|
||||
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult) {
|
||||
struct proc_taskinfo pti;
|
||||
if ((unsigned long)proc_pidinfo(getpid(), PROC_PIDTASKINFO, 0, &pti, PROC_PIDTASKINFO_SIZE) <
|
||||
PROC_PIDTASKINFO_SIZE) {
|
||||
return NS_ERROR_FAILURE;
|
||||
}
|
||||
*aResult = (pti.pti_total_user + pti.pti_total_system) / PR_NSEC_PER_MSEC;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
RefPtr<ProcInfoPromise> GetProcInfo(nsTArray<ProcInfoRequest>&& aRequests) {
|
||||
auto holder = MakeUnique<MozPromiseHolder<ProcInfoPromise>>();
|
||||
RefPtr<ProcInfoPromise> promise = holder->Ensure(__func__);
|
||||
|
|
|
@ -126,7 +126,11 @@ class StatReader {
|
|||
// Reads the stat file and puts its content in a nsString.
|
||||
nsresult ReadFile(nsAutoString& aFileContent) {
|
||||
if (mFilepath.IsEmpty()) {
|
||||
mFilepath.AppendPrintf("/proc/%u/stat", mPid);
|
||||
if (mPid == 0) {
|
||||
mFilepath.AssignLiteral("/proc/self/stat");
|
||||
} else {
|
||||
mFilepath.AppendPrintf("/proc/%u/stat", mPid);
|
||||
}
|
||||
}
|
||||
FILE* fstat = fopen(mFilepath.get(), "r");
|
||||
if (!fstat) {
|
||||
|
@ -181,6 +185,18 @@ class ThreadInfoReader final : public StatReader {
|
|||
base::ProcessId mTid;
|
||||
};
|
||||
|
||||
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult) {
|
||||
StatReader reader(0);
|
||||
ProcInfo info;
|
||||
nsresult rv = reader.ParseProc(info);
|
||||
if (NS_FAILED(rv)) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
*aResult = (info.cpuKernel + info.cpuUser) / PR_NSEC_PER_MSEC;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
RefPtr<ProcInfoPromise> GetProcInfo(nsTArray<ProcInfoRequest>&& aRequests) {
|
||||
auto holder = MakeUnique<MozPromiseHolder<ProcInfoPromise>>();
|
||||
RefPtr<ProcInfoPromise> promise = holder->Ensure(__func__);
|
||||
|
|
|
@ -24,6 +24,17 @@ uint64_t ToNanoSeconds(const FILETIME& aFileTime) {
|
|||
return usec.QuadPart * 100;
|
||||
}
|
||||
|
||||
nsresult GetCpuTimeSinceProcessStartInMs(uint64_t* aResult) {
|
||||
FILETIME createTime, exitTime, kernelTime, userTime;
|
||||
if (!GetProcessTimes(::GetCurrentProcess(), &createTime, &exitTime,
|
||||
&kernelTime, &userTime)) {
|
||||
return NS_ERROR_FAILURE;
|
||||
}
|
||||
*aResult =
|
||||
(ToNanoSeconds(kernelTime) + ToNanoSeconds(userTime)) / PR_NSEC_PER_MSEC;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
RefPtr<ProcInfoPromise> GetProcInfo(nsTArray<ProcInfoRequest>&& aRequests) {
|
||||
auto holder = MakeUnique<MozPromiseHolder<ProcInfoPromise>>();
|
||||
RefPtr<ProcInfoPromise> promise = holder->Ensure(__func__);
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
# This file defines the metrics that are recorded by the Glean SDK. They are
|
||||
# automatically converted to platform-specific code at build time using the
|
||||
# `glean_parser` PyPI package.
|
||||
|
||||
---
|
||||
$schema: moz://mozilla.org/schemas/glean/metrics/2-0-0
|
||||
|
||||
power:
|
||||
total_cpu_time_ms:
|
||||
type: counter
|
||||
description: >
|
||||
Total CPU time used by all processes in ms.
|
||||
bugs:
|
||||
- https://bugzilla.mozilla.org/show_bug.cgi?id=1736040
|
||||
data_reviews:
|
||||
- https://bugzilla.mozilla.org/show_bug.cgi?id=1736040
|
||||
data_sensitivity:
|
||||
- technical
|
||||
notification_emails:
|
||||
- florian@mozilla.com
|
||||
expires: never
|
|
@ -0,0 +1,128 @@
|
|||
"use strict";
|
||||
|
||||
Cu.importGlobalProperties(["Glean"]);
|
||||
|
||||
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
|
||||
|
||||
const FOG = Cc["@mozilla.org/toolkit/glean;1"].createInstance(Ci.nsIFOG);
|
||||
|
||||
var cpuThreadCount;
|
||||
|
||||
add_task(async function setup() {
|
||||
// FOG needs a profile directory to put its data in.
|
||||
do_get_profile();
|
||||
|
||||
FOG.initializeFOG();
|
||||
|
||||
cpuThreadCount = (await Services.sysinfo.processInfo).count;
|
||||
});
|
||||
|
||||
async function getCpuTimeFromProcInfo() {
|
||||
const NS_PER_MS = 1000000;
|
||||
let cpuTimeForProcess = p => (p.cpuUser + p.cpuKernel) / NS_PER_MS;
|
||||
let procInfo = await ChromeUtils.requestProcInfo();
|
||||
return (
|
||||
cpuTimeForProcess(procInfo) +
|
||||
procInfo.children.map(cpuTimeForProcess).reduce((a, b) => a + b, 0)
|
||||
);
|
||||
}
|
||||
|
||||
add_task(async function test_totalCpuTime_in_parent() {
|
||||
let startTime = Date.now();
|
||||
|
||||
let initialProcInfoCpuTime = Math.floor(await getCpuTimeFromProcInfo());
|
||||
await FOG.testFlushAllChildren();
|
||||
|
||||
let initialCpuTime = Glean.power.totalCpuTimeMs.testGetValue();
|
||||
Assert.greater(
|
||||
initialCpuTime,
|
||||
0,
|
||||
"The CPU time used by starting the test harness should be more than 0"
|
||||
);
|
||||
let initialProcInfoCpuTime2 = Math.ceil(await getCpuTimeFromProcInfo());
|
||||
|
||||
Assert.greaterOrEqual(
|
||||
initialCpuTime,
|
||||
initialProcInfoCpuTime,
|
||||
"The CPU time reported through Glean should be at least as much as the CPU time reported by ProcInfo before asking Glean for the data"
|
||||
);
|
||||
Assert.lessOrEqual(
|
||||
initialCpuTime,
|
||||
initialProcInfoCpuTime2,
|
||||
"The CPU time reported through Glean should be no more than the CPU time reported by ProcInfo after asking Glean for the data"
|
||||
);
|
||||
|
||||
// 50 is an arbitrary value, but the resolution is 16ms on Windows,
|
||||
// so this needs to be significantly more than 16.
|
||||
const kBusyWaitForMs = 50;
|
||||
while (Date.now() - startTime < kBusyWaitForMs) {
|
||||
// Burn CPU time...
|
||||
}
|
||||
|
||||
let additionalProcInfoCpuTime =
|
||||
Math.floor(await getCpuTimeFromProcInfo()) - initialProcInfoCpuTime2;
|
||||
await FOG.testFlushAllChildren();
|
||||
|
||||
let additionalCpuTime =
|
||||
Glean.power.totalCpuTimeMs.testGetValue() - initialCpuTime;
|
||||
info(
|
||||
`additional CPU time according to ProcInfo: ${additionalProcInfoCpuTime}ms and Glean ${additionalCpuTime}ms`
|
||||
);
|
||||
|
||||
// On a machine where the CPU is very busy, our busy wait loop might burn less
|
||||
// CPU than expected if other processes are being scheduled instead of us.
|
||||
let expectedAdditionalCpuTime = Math.min(
|
||||
additionalProcInfoCpuTime,
|
||||
kBusyWaitForMs
|
||||
);
|
||||
Assert.greaterOrEqual(
|
||||
additionalCpuTime,
|
||||
expectedAdditionalCpuTime,
|
||||
`The total CPU time should have increased by at least ${expectedAdditionalCpuTime}ms`
|
||||
);
|
||||
let wallClockTime = Date.now() - startTime;
|
||||
Assert.lessOrEqual(
|
||||
additionalCpuTime,
|
||||
wallClockTime * cpuThreadCount,
|
||||
`The total CPU time should have increased by at most the wall clock time (${wallClockTime}ms) * ${cpuThreadCount} CPU threads`
|
||||
);
|
||||
});
|
||||
|
||||
add_task(async function test_totalCpuTime_in_child() {
|
||||
const MESSAGE_CHILD_TEST_DONE = "ChildTest:Done";
|
||||
|
||||
let startTime = Date.now();
|
||||
await FOG.testFlushAllChildren();
|
||||
let initialCpuTime = Glean.power.totalCpuTimeMs.testGetValue();
|
||||
|
||||
let initialProcInfoCpuTime = await getCpuTimeFromProcInfo();
|
||||
run_test_in_child("test_total_cpu_time_child.js");
|
||||
let burntCpuTime = await do_await_remote_message(MESSAGE_CHILD_TEST_DONE);
|
||||
let additionalProcInfoCpuTime =
|
||||
(await getCpuTimeFromProcInfo()) - initialProcInfoCpuTime;
|
||||
|
||||
await FOG.testFlushAllChildren();
|
||||
let additionalCpuTime =
|
||||
Glean.power.totalCpuTimeMs.testGetValue() - initialCpuTime;
|
||||
info(
|
||||
`additional CPU time according to ProcInfo: ${additionalProcInfoCpuTime}ms and Glean ${additionalCpuTime}ms`
|
||||
);
|
||||
|
||||
// On a machine where the CPU is very busy, our busy wait loop might burn less
|
||||
// CPU than expected if other processes are being scheduled instead of us.
|
||||
let expectedAdditionalCpuTime = Math.min(
|
||||
Math.floor(additionalProcInfoCpuTime),
|
||||
burntCpuTime
|
||||
);
|
||||
Assert.greaterOrEqual(
|
||||
additionalCpuTime,
|
||||
expectedAdditionalCpuTime,
|
||||
`The total CPU time should have increased by at least ${expectedAdditionalCpuTime}ms`
|
||||
);
|
||||
let wallClockTime = Date.now() - startTime;
|
||||
Assert.lessOrEqual(
|
||||
additionalCpuTime,
|
||||
wallClockTime * cpuThreadCount,
|
||||
`The total CPU time should have increased by at most the wall clock time (${wallClockTime}ms) * ${cpuThreadCount} CPU threads`
|
||||
);
|
||||
});
|
|
@ -0,0 +1,10 @@
|
|||
const MESSAGE_CHILD_TEST_DONE = "ChildTest:Done";
|
||||
|
||||
function run_test() {
|
||||
const kBusyWaitForMs = 50;
|
||||
let startTime = Date.now();
|
||||
while (Date.now() - startTime < kBusyWaitForMs) {
|
||||
// Burn CPU time...
|
||||
}
|
||||
do_send_remote_message(MESSAGE_CHILD_TEST_DONE, kBusyWaitForMs);
|
||||
}
|
|
@ -1,6 +1,8 @@
|
|||
[DEFAULT]
|
||||
firefox-appdir = browser
|
||||
skip-if = os == 'android'
|
||||
subprocess = true
|
||||
|
||||
[test_process_kill.js]
|
||||
skip-if = os == 'android'
|
||||
[test_total_cpu_time.js]
|
||||
support-files = test_total_cpu_time_child.js
|
||||
|
|
Загрузка…
Ссылка в новой задаче