From 7e4143050234e1116dc4cd0ca5ec005c726e1f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcelo=20Lynch=20=F0=9F=A7=89?= Date: Mon, 3 Jun 2024 17:43:20 +0000 Subject: [PATCH] Merged PR 786872: Log internal warning on open file descriptor spike Log internal warning on open file descriptor spike Related work items: #2180387 --- Public/Src/Engine/Scheduler/Scheduler.cs | 16 ++++++++++++++++ Public/Src/Engine/Scheduler/Tracing/Log.cs | 9 +++++++++ .../Src/Engine/Scheduler/Tracing/LogEventId.cs | 1 + 3 files changed, 26 insertions(+) diff --git a/Public/Src/Engine/Scheduler/Scheduler.cs b/Public/Src/Engine/Scheduler/Scheduler.cs index 0137ccea0..b6f0445c7 100644 --- a/Public/Src/Engine/Scheduler/Scheduler.cs +++ b/Public/Src/Engine/Scheduler/Scheduler.cs @@ -1147,6 +1147,11 @@ namespace BuildXL.Scheduler /// Whether a low commit memory perf smell was reached /// private volatile bool m_hitLowCommitMemoryPerfSmell; + + /// + /// Whether a high file descriptor usage perf smell was reached + /// + private volatile bool m_hitHighFileDescriptorUsagePerfSmell; private int m_historicPerfDataMisses; private int m_historicPerfDataZeroMemoryHits; @@ -3167,6 +3172,17 @@ namespace BuildXL.Scheduler resourceManager.TryManageResources(1, ManageMemoryMode.CancellationRam); } } + + // Log an internal warning if the number of open file descriptors (by the BuildXL process) is greater than 10k + // The threshold is arbitrary but: + // - conservative: based on telemetry, BuildXL having more than 1000 file descriptors open is an anomaly + // - low enough that we have a chance to measure and log this warning (if we're going overboard with file descriptors, all sorts of operations start failing) + const int FileDescriptorCountThreshold = 10_000; + if (!m_hitHighFileDescriptorUsagePerfSmell && m_perfInfo.MachineOpenFileDescriptors > FileDescriptorCountThreshold) + { + m_hitHighFileDescriptorUsagePerfSmell = true; + Logger.Log.HighFileDescriptorCount(m_executePhaseLoggingContext, perfInfo.MachineOpenFileDescriptors, FileDescriptorCountThreshold); + } } private void ToggleResourceAvailability(PerformanceCollector.MachinePerfInfo perfInfo, MemoryResource memoryResource, bool cpuResourceAvailable) diff --git a/Public/Src/Engine/Scheduler/Tracing/Log.cs b/Public/Src/Engine/Scheduler/Tracing/Log.cs index 30de15037..888d3e11a 100644 --- a/Public/Src/Engine/Scheduler/Tracing/Log.cs +++ b/Public/Src/Engine/Scheduler/Tracing/Log.cs @@ -3597,6 +3597,15 @@ namespace BuildXL.Scheduler.Tracing Keywords = (int)Keywords.UserMessage)] public abstract void HitLowMemorySmell(LoggingContext context); + [GeneratedEvent( + (ushort)LogEventId.HighFileDescriptorCount, + EventGenerators = EventGenerators.LocalOnly, + EventLevel = Level.Informational, + EventTask = (ushort)Tasks.HostApplication, + Message = "BuildXL has opened a high amount of file descriptors, exceeding the warning theshold ({fileDescriptorCount} > {threshold}). The build can fail if the file descriptors limit for the system is reached.", + Keywords = (int)(Keywords.UserMessage | Keywords.InfrastructureIssue))] + public abstract void HighFileDescriptorCount(LoggingContext context, int fileDescriptorCount, int threshold); + [GeneratedEvent( (ushort)SharedLogEventId.CacheMissAnalysis, EventGenerators = EventGenerators.LocalAndTelemetry, diff --git a/Public/Src/Engine/Scheduler/Tracing/LogEventId.cs b/Public/Src/Engine/Scheduler/Tracing/LogEventId.cs index 075b1a13f..126145b52 100644 --- a/Public/Src/Engine/Scheduler/Tracing/LogEventId.cs +++ b/Public/Src/Engine/Scheduler/Tracing/LogEventId.cs @@ -458,6 +458,7 @@ namespace BuildXL.Scheduler.Tracing LowRamMemory = 14007, LowCommitMemory = 14014, HitLowMemorySmell = 14015, + HighFileDescriptorCount = 14016, DirtyBuildExplicitlyRequestedModules = 14200, DirtyBuildProcessNotSkippedDueToMissingOutput = 14201,