diff --git a/tools/profiler/core/platform-linux-android.cpp b/tools/profiler/core/platform-linux-android.cpp index 24b5a8c305d2..f5877c7bb0f8 100644 --- a/tools/profiler/core/platform-linux-android.cpp +++ b/tools/profiler/core/platform-linux-android.cpp @@ -297,6 +297,25 @@ uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { return aRawValue; } +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_clock_gettime); + if (timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0) { + newRunningTimes.SetThreadCPUDelta(uint64_t(ts.tv_sec) * 1'000'000'000u + + uint64_t(ts.tv_nsec)); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + static RunningTimes GetThreadRunningTimesDiff( PSLockRef aLock, ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { diff --git a/tools/profiler/core/platform-macos.cpp b/tools/profiler/core/platform-macos.cpp index 5bd272aef187..ae76a247fcfa 100644 --- a/tools/profiler/core/platform-macos.cpp +++ b/tools/profiler/core/platform-macos.cpp @@ -60,6 +60,30 @@ uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { return aRawValue; } +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_task_info); + + static const auto pid = getpid(); + struct proc_taskinfo pti; + if ((unsigned long)proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pti, + PROC_PIDTASKINFO_SIZE) >= + PROC_PIDTASKINFO_SIZE) { + newRunningTimes.SetThreadCPUDelta(pti.pti_total_user + + pti.pti_total_system); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + static RunningTimes GetThreadRunningTimesDiff( PSLockRef aLock, ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { diff --git a/tools/profiler/core/platform-win32.cpp b/tools/profiler/core/platform-win32.cpp index 63107e77212e..b73020e018e7 100644 --- a/tools/profiler/core/platform-win32.cpp +++ b/tools/profiler/core/platform-win32.cpp @@ -121,10 +121,30 @@ uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { return (aRawValue * GHZ_PER_MHZ + (GHZ_PER_MHZ / 2u)) / cycleTimeFrequencyMHz; } +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + static const HANDLE processHandle = GetCurrentProcess(); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_QueryProcessCycleTime); + if (ULONG64 cycles; QueryProcessCycleTime(processHandle, &cycles) != 0) { + newRunningTimes.SetThreadCPUDelta(cycles); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + static RunningTimes GetThreadRunningTimesDiff( PSLockRef aLock, ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { - AUTO_PROFILER_STATS(GetRunningTimes); + AUTO_PROFILER_STATS(GetThreadRunningTimes); const mozilla::profiler::PlatformData& platformData = aThreadData.PlatformDataCRef(); @@ -132,7 +152,7 @@ static RunningTimes GetThreadRunningTimesDiff( const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( [profiledThread](RunningTimes& aRunningTimes) { - AUTO_PROFILER_STATS(GetRunningTimes_QueryThreadCycleTime); + AUTO_PROFILER_STATS(GetThreadRunningTimes_QueryThreadCycleTime); if (ULONG64 cycles; QueryThreadCycleTime(profiledThread, &cycles) != 0) { aRunningTimes.ResetThreadCPUDelta(cycles); diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp index b9961ca089ff..b7e374e32523 100644 --- a/tools/profiler/core/platform.cpp +++ b/tools/profiler/core/platform.cpp @@ -569,6 +569,18 @@ ProfileChunkedBuffer& profiler_get_core_buffer() { return CorePS::CoreBuffer(); } +void locked_profiler_add_sampled_counter(PSLockRef aLock, + BaseProfilerCount* aCounter) { + CorePS::AppendCounter(aLock, aCounter); +} + +void locked_profiler_remove_sampled_counter(PSLockRef aLock, + BaseProfilerCount* aCounter) { + // Note: we don't enforce a final sample, though we could do so if the + // profiler was active + CorePS::RemoveCounter(aLock, aCounter); +} + class SamplerThread; static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, @@ -678,6 +690,9 @@ class ActivePS { CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager); return CorePS::CoreBuffer(); }()), + mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures) + ? new ProcessCPUCounter(aLock) + : nullptr), // The new sampler thread doesn't start sampling immediately because the // main loop within Run() is blocked until this function's caller // unlocks gPSMutex. @@ -727,6 +742,9 @@ class ActivePS { } ~ActivePS() { + MOZ_ASSERT( + !mMaybeProcessCPUCounter, + "mMaybeProcessCPUCounter should have been deleted before ~ActivePS()"); #if !defined(RELEASE_OR_BETA) if (mInterposeObserver) { // We need to unregister the observer on the main thread, because that's @@ -789,6 +807,12 @@ class ActivePS { [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) { MOZ_ASSERT(sInstance); + if (sInstance->mMaybeProcessCPUCounter) { + locked_profiler_remove_sampled_counter( + aLock, sInstance->mMaybeProcessCPUCounter); + delete sInstance->mMaybeProcessCPUCounter; + sInstance->mMaybeProcessCPUCounter = nullptr; + } auto samplerThread = sInstance->mSamplerThread; delete sInstance; sInstance = nullptr; @@ -1070,6 +1094,26 @@ class ActivePS { } } + // This is a counter to collect process CPU utilization during profiling. + // It cannot be a raw `ProfilerCounter` because we need to manually add/remove + // it while the profiler lock is already held. + class ProcessCPUCounter final : public BaseProfilerCount { + public: + explicit ProcessCPUCounter(PSLockRef aLock) + : BaseProfilerCount("processCPU", &mCounter, nullptr, "CPU", + "Process CPU utilization") { + // Adding on construction, so it's ready before the sampler starts. + locked_profiler_add_sampled_counter(aLock, this); + // Note: Removed from ActivePS::Destroy, because a lock is needed. + } + + void Add(int64_t aNumber) { mCounter += aNumber; } + + private: + ProfilerAtomicSigned mCounter; + }; + PS_GET(ProcessCPUCounter*, MaybeProcessCPUCounter); + PS_GET_AND_SET(bool, IsPaused) // True if sampling is paused (though generic `SetIsPaused()` or specific @@ -1297,6 +1341,9 @@ class ActivePS { // We are removing them when we ensure that we won't need them anymore. Vector> mDeadProfiledPages; + // Used to collect process CPU utilization values, if the feature is on. + ProcessCPUCounter* mMaybeProcessCPUCounter; + // The current sampler thread. This class is not responsible for destroying // the SamplerThread object; the Destroy() method returns it so the caller // can destroy it. @@ -3323,6 +3370,10 @@ static void DiscardSuspendedThreadRunningTimes( PSLockRef aLock, ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData); +// Platform-specific function that retrieves process CPU measurements. +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated); + // Template function to be used by `GetThreadRunningTimesDiff()` (unless some // platform has a better way to achieve this). // It help perform CPU measurements and tie them to a timestamp, such that the @@ -3617,6 +3668,10 @@ void SamplerThread::Run() { // Will be kept between collections, to know what each collection does. auto previousState = localBuffer.GetState(); + // This will be filled at every loop, to be used by the next loop to compute + // the CPU utilization between samples. + RunningTimes processRunningTimes; + // This will be set inside the loop, from inside the lock scope, to capture // all callbacks added before that, but none after the lock is released. UniquePtr postSamplingCallbacks; @@ -3674,6 +3729,18 @@ void SamplerThread::Run() { (sampleStart - CorePS::ProcessStartTime()).ToMilliseconds(); ProfileBuffer& buffer = ActivePS::Buffer(lock); + // Before sampling counters, update the process CPU counter if active. + if (ActivePS::ProcessCPUCounter* processCPUCounter = + ActivePS::MaybeProcessCPUCounter(lock); + processCPUCounter) { + RunningTimes processRunningTimesDiff = + GetProcessRunningTimesDiff(lock, processRunningTimes); + Maybe cpu = processRunningTimesDiff.GetJsonThreadCPUDelta(); + if (cpu) { + processCPUCounter->Add(static_cast(*cpu)); + } + } + // handle per-process generic counters const Vector& counters = CorePS::Counters(lock); for (auto& counter : counters) { @@ -5824,15 +5891,13 @@ void profiler_write_active_configuration(JSONWriter& aWriter) { void profiler_add_sampled_counter(BaseProfilerCount* aCounter) { DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel); PSAutoLock lock; - CorePS::AppendCounter(lock, aCounter); + locked_profiler_add_sampled_counter(lock, aCounter); } void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) { DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel); PSAutoLock lock; - // Note: we don't enforce a final sample, though we could do so if the - // profiler was active - CorePS::RemoveCounter(lock, aCounter); + locked_profiler_remove_sampled_counter(lock, aCounter); } ProfilingStack* profiler_register_thread(const char* aName,