Merged PR 786194: Update the threshold for LimitProblematicWorkerCount

Because many builds are now failing with high problematic worker count, we now increase the threshold from 0.5 to 0.9.
This commit is contained in:
Semih Okur 2024-05-22 18:27:35 +00:00
Родитель a66b7252e3
Коммит 250eb23e6f
2 изменённых файлов: 10 добавлений и 1 удалений

Просмотреть файл

@ -8137,8 +8137,9 @@ namespace BuildXL.Scheduler
if (EngineEnvironmentSettings.LimitProblematicWorkerCount &&
m_remoteWorkers.Length >= 4 &&
numProblematicWorkers > m_remoteWorkers.Length / 2)
numProblematicWorkers >= (m_remoteWorkers.Length * EngineEnvironmentSettings.LimitProblematicWorkerThreshold))
{
// Because LimitProblematicWorkerThreshold is 0.9 by default, we will fail the build only when all workers fail until 10 workers.
Logger.Log.HighCountProblematicWorkers(m_loggingContext, numProblematicWorkers, m_remoteWorkers.Length);
TerminateForInternalError();
}

Просмотреть файл

@ -287,6 +287,14 @@ namespace BuildXL.Utilities.Configuration
/// </remarks>
public static readonly Setting<bool> LimitProblematicWorkerCount = CreateSetting("BuildXLLimitProblematicWorkerCount", value => string.IsNullOrWhiteSpace(value) || value == "1");
/// <summary>
/// It defines the fraction of remote workers that must be problematic before considering a build failure.
/// </summary>
/// <remarks>
/// For example, a threshold of 0.9 means that if 90% or more of the workers are problematic, the build will be terminated due to excessive errors.
/// </remarks>
public static readonly Setting<double> LimitProblematicWorkerThreshold = CreateSetting("BuildXLLimitProblematicWorkerCount", value => ParseDouble(value) ?? 0.9);
#endregion
#region Grpc related settings