Merged PR 532412: Convert to user-error: CTMIS due to 'access denied' errors

- Some process interacts with motif.pdb and that access is whitelisted. So BuildXL isn't tracking for dependency order or concurrent writes
- Some other process concurrently needs this file materialized from the cache. That materialization cannot happen because of the concurrent access
- We bucket this build as a failure with internal error bucket: "PipMaterializeDependenciesFromCacheFailure". It is true that the engine failed to materialize failures from the cache, but the reason was not the cache. Instead it was that the operation to delete the file prior to replaying it from cache failed

So we are differentiating the cache issues from this error category which are caused by user defined race conditions so we can triage them away in our analysis.

Related work items: #1657589
This commit is contained in:
Rijul Luman 2020-02-10 21:15:46 +00:00
Родитель b6ccb9e5ed
Коммит 8d8f3474f7
11 изменённых файлов: 145 добавлений и 38 удалений

Просмотреть файл

@ -268,7 +268,7 @@ namespace BuildXL.Engine.Cache.Plugin.CacheCore
if (!mayBeDelete.Succeeded)
{
return mayBeDelete.Failure;
return mayBeDelete.Failure.Annotate(LocalDiskContentStore.ExistingFileDeletionFailure);
}
}

Просмотреть файл

@ -34,19 +34,25 @@ namespace BuildXL.Engine.Cache.Artifacts
/// </summary>
public readonly string SourceCache;
/// <summary>
/// Type of failure when ContentAvailabilityResult is unavailable
/// </summary>
public readonly string FailureType;
/// <nodoc />
public ContentAvailabilityResult(ContentHash hash, bool isAvailable, long bytesTransferred, string sourceCache)
public ContentAvailabilityResult(ContentHash hash, bool isAvailable, long bytesTransferred, string sourceCache, string failureType = null)
{
Hash = hash;
IsAvailable = isAvailable;
BytesTransferred = bytesTransferred;
SourceCache = sourceCache;
FailureType = failureType;
}
/// <inherit />
public override string ToString()
{
return $"({Hash.ToString()}: is{(IsAvailable ? "" : " not")} available, {BytesTransferred}B transferred from {SourceCache})";
return $"({Hash.ToString()}: is{(IsAvailable ? "" : " not")} available, {BytesTransferred}B transferred from {SourceCache}){(FailureType != null ? " due to Failure: " + FailureType : "")}";
}
}
}

Просмотреть файл

@ -233,7 +233,7 @@ namespace BuildXL.Engine.Cache.Artifacts
if (!mayBeDelete.Succeeded)
{
return mayBeDelete.Failure;
return mayBeDelete.Failure.Annotate(LocalDiskContentStore.ExistingFileDeletionFailure);
}
try

Просмотреть файл

@ -62,6 +62,11 @@ namespace BuildXL.Engine.Cache.Artifacts
private readonly LoggingContext m_loggingContext;
private readonly string m_vfsCasRoot;
/// <summary>
/// String used to identify existing file deletion failures
/// </summary>
public const string ExistingFileDeletionFailure = "Try materialize file from cache failed due to file deletion failure";
/// <summary>
/// Creates a store which tracks files and content with the provided <paramref name="fileContentTable"/> and <paramref name="fileChangeTracker"/>.
/// The change tracker is allowed to fail each tracking operation, so long as it can record that tracking is incomplete (<see cref="FileChangeTracker.CreateDisabledTracker"/> is sufficient).

Просмотреть файл

@ -38,5 +38,10 @@ namespace BuildXL.Scheduler.Artifacts
/// Placing the file from cache failed (cache target miss in setup).
/// </summary>
PlaceFileFailed = 5,
/// <summary>
/// Placing the file from cache failed due to exisiting file deletion failure.
/// </summary>
PlaceFileFailedDueToDeletionFailure = 6,
}
}

Просмотреть файл

@ -243,6 +243,8 @@ namespace BuildXL.Scheduler.Artifacts
/// </summary>
public SourceChangeAffectedInputs SourceChangeAffectedInputs { get; }
private enum PlaceFile { Success, UserError, InternalError }
#endregion
/// <summary>
@ -454,6 +456,15 @@ namespace BuildXL.Scheduler.Artifacts
/// Ensures pip inputs are materialized
/// </summary>
public async Task<bool> TryMaterializeDependenciesAsync(Pip pip, OperationContext operationContext)
{
var result = await TryMaterializeDependenciesInternalAsync(pip, operationContext);
return result == ArtifactMaterializationResult.Succeeded;
}
/// <summary>
/// Ensures pip inputs are materialized and returns ArtifactMaterializationResult
/// </summary>
internal async Task<ArtifactMaterializationResult> TryMaterializeDependenciesInternalAsync(Pip pip, OperationContext operationContext)
{
using (PipArtifactsState state = GetPipArtifactsState())
{
@ -467,26 +478,27 @@ namespace BuildXL.Scheduler.Artifacts
switch (result)
{
case ArtifactMaterializationResult.Succeeded:
return true;
break;
case ArtifactMaterializationResult.PlaceFileFailed:
Logger.Log.PipMaterializeDependenciesFromCacheFailure(
operationContext,
pip.GetDescription(Context),
state.GetFailure().DescribeIncludingInnerFailures());
ExecutionLog?.CacheMaterializationError(new CacheMaterializationErrorEventData()
{
PipId = pip.PipId,
FailedFiles = state.FailedFiles.ToReadOnlyArray()
});
return false;
cacheMaterializationErrorLog(state);
break;
case ArtifactMaterializationResult.PlaceFileFailedDueToDeletionFailure:
Logger.Log.PipMaterializeDependenciesFromCacheFailureDueToFileDeletionFailure(
operationContext,
pip.GetDescription(Context),
state.GetFailure().DescribeIncludingInnerFailures());
cacheMaterializationErrorLog(state);
break;
case ArtifactMaterializationResult.VerifySourceFilesFailed:
Logger.Log.PipMaterializeDependenciesFailureDueToVerifySourceFilesFailed(
operationContext,
pip.GetDescription(Context),
state.GetFailure().DescribeIncludingInnerFailures());
return false;
break;
default:
// Catch-all error for non-cache dependency materialization failures
Logger.Log.PipMaterializeDependenciesFailureUnrelatedToCache(
@ -494,8 +506,19 @@ namespace BuildXL.Scheduler.Artifacts
pip.GetDescription(Context),
result.ToString(),
state.GetFailure().DescribeIncludingInnerFailures());
return false;
break;
}
return result;
}
void cacheMaterializationErrorLog(PipArtifactsState state)
{
ExecutionLog?.CacheMaterializationError(new CacheMaterializationErrorEventData()
{
PipId = pip.PipId,
FailedFiles = state.FailedFiles.ToReadOnlyArray()
});
}
}
@ -551,7 +574,7 @@ namespace BuildXL.Scheduler.Artifacts
OperationContext operationContext,
IReadOnlyList<(FileArtifact fileArtifact, ContentHash contentHash)> filesAndContentHashes,
Action onFailure = null,
Action<int, string> onContentUnavailable = null,
Action<int, string, string> onContentUnavailable = null,
bool materialize = false)
{
Logger.Log.ScheduleTryBringContentToLocalCache(operationContext, pipInfo.Description);
@ -567,7 +590,7 @@ namespace BuildXL.Scheduler.Artifacts
onlyLogUnavailableContent: true,
filesAndContentHashes: filesAndContentHashes.SelectList((tuple, index) => (tuple.fileArtifact, tuple.contentHash, index)),
onFailure: failure => { onFailure?.Invoke(); },
onContentUnavailable: onContentUnavailable ?? ((index, hashLogStr) => { /* Do nothing. Callee already logs the failure */ }));
onContentUnavailable: onContentUnavailable ?? ((index, hashLogStr, failureType) => { /* Do nothing. Callee already logs the failure */ }));
return result;
}
@ -590,7 +613,8 @@ namespace BuildXL.Scheduler.Artifacts
symlinkTarget: AbsolutePath.Invalid);
}
return await PlaceFilesAsync(operationContext, pipInfo, state, materialize: true);
var placeResult = await PlaceFilesAsync(operationContext, pipInfo, state, materialize: true);
return placeResult == PlaceFile.Success;
}
}
}
@ -1613,9 +1637,12 @@ namespace BuildXL.Scheduler.Artifacts
}
// Place Files:
if (!await PlaceFilesAsync(operationContext, pipInfo, state))
var possiblyPlaced = await PlaceFilesAsync(operationContext, pipInfo, state);
if (possiblyPlaced != PlaceFile.Success)
{
return ArtifactMaterializationResult.PlaceFileFailed;
return possiblyPlaced == PlaceFile.UserError
? ArtifactMaterializationResult.PlaceFileFailedDueToDeletionFailure
: ArtifactMaterializationResult.PlaceFileFailed;
}
// Mark directories as materialized so that the full set of files in the directory will
@ -2010,13 +2037,14 @@ namespace BuildXL.Scheduler.Artifacts
/// <remarks>
/// Logs warnings when a file placement fails; does not log errors.
/// </remarks>
private async Task<bool> PlaceFilesAsync(
private async Task<PlaceFile> PlaceFilesAsync(
OperationContext operationContext,
PipInfo pipInfo,
PipArtifactsState state,
bool materialize = true)
{
bool success = true;
bool userError = false;
if (state.MaterializationFiles.Count != 0)
{
@ -2034,7 +2062,14 @@ namespace BuildXL.Scheduler.Artifacts
if (!materialize)
{
return success;
return success ? PlaceFile.Success : PlaceFile.InternalError;
}
if (!success &&
state.InnerFailure != null &&
state.InnerFailure.DescribeIncludingInnerFailures().Contains(LocalDiskContentStore.ExistingFileDeletionFailure))
{
userError = true;
}
// Remove the failures
@ -2097,6 +2132,11 @@ namespace BuildXL.Scheduler.Artifacts
state.SetMaterializationFailure(fileIndex: materializationFileIndex);
if (possiblyPlaced.Failure.DescribeIncludingInnerFailures().Contains(LocalDiskContentStore.ExistingFileDeletionFailure))
{
userError = true;
}
// Latch overall success (across all placements) to false.
success = false;
}
@ -2135,7 +2175,7 @@ namespace BuildXL.Scheduler.Artifacts
}
}
return success;
return success ? PlaceFile.Success : (userError ? PlaceFile.UserError : PlaceFile.InternalError);
}
private async Task<Possible<ContentMaterializationResult>> PlaceSingleFileAsync(
@ -2264,7 +2304,7 @@ namespace BuildXL.Scheduler.Artifacts
state.InnerFailure = failure;
},
onContentUnavailable: (index, hashLogStr) =>
onContentUnavailable: (index, hashLogStr, failureType) =>
{
state.SetMaterializationFailure(index);
@ -2276,6 +2316,12 @@ namespace BuildXL.Scheduler.Artifacts
pipInfo.Description,
hashLogStr,
state.MaterializationFiles[index].Artifact.Path.ToString(Context.PathTable));
if (failureType == LocalDiskContentStore.ExistingFileDeletionFailure)
{
state.InnerFailure = new Failure<string>(LocalDiskContentStore.ExistingFileDeletionFailure);
}
}
},
state: state);
@ -2293,7 +2339,7 @@ namespace BuildXL.Scheduler.Artifacts
bool materializingOutputs,
IReadOnlyList<(FileArtifact fileArtifact, ContentHash contentHash, int fileIndex)> filesAndContentHashes,
Action<Failure> onFailure,
Action<int, string> onContentUnavailable,
Action<int, string, string> onContentUnavailable,
bool onlyLogUnavailableContent = false,
PipArtifactsState state = null)
{
@ -2574,7 +2620,7 @@ namespace BuildXL.Scheduler.Artifacts
if (!isAvailable)
{
success = false;
onContentUnavailable(currentFileIndex, hashLogStr);
onContentUnavailable(currentFileIndex, hashLogStr, result.FailureType);
}
}
}
@ -2611,7 +2657,10 @@ namespace BuildXL.Scheduler.Artifacts
else
{
allContentAvailable = false;
results[resultIndex] = new ContentAvailabilityResult(fileAndIndex.materializationFile.MaterializationInfo.Hash, false, 0, "ContentMiss");
string failureType = result.Failure.DescribeIncludingInnerFailures().Contains(LocalDiskContentStore.ExistingFileDeletionFailure)
? LocalDiskContentStore.ExistingFileDeletionFailure
: null;
results[resultIndex] = new ContentAvailabilityResult(fileAndIndex.materializationFile.MaterializationInfo.Hash, false, 0, "ContentMiss", failureType);
}
};

Просмотреть файл

@ -3935,7 +3935,7 @@ namespace BuildXL.Scheduler
operationContext,
allHashes,
materialize: materializeToVerifyAvailability,
onContentUnavailable: (i, s) => {
onContentUnavailable: (i, s, f) => {
onContentUnavailable?.Invoke(allHashes[i].fileArtifact);
});

Просмотреть файл

@ -357,6 +357,15 @@ namespace BuildXL.Scheduler.Tracing
Message = "[{pipDescription}] Failed to materialize pip dependencies content from cache: {errorMessage}")]
internal abstract void PipMaterializeDependenciesFromCacheFailure(LoggingContext loggingContext, string pipDescription, string errorMessage);
[GeneratedEvent(
(ushort)LogEventId.PipMaterializeDependenciesFromCacheFailureDueToFileDeletionFailure,
EventGenerators = EventGenerators.LocalOnly,
EventLevel = Level.Error,
Keywords = (int)(Keywords.UserMessage | Keywords.UserError),
EventTask = (ushort)Tasks.Scheduler,
Message = "[{pipDescription}] Failed materialize pip dependencies from cache due to failure to delete file. Typically this happens when the file in question is whitelisted and another pip is concurrently accessing the file. Deletion failure: {errorMessage}")]
internal abstract void PipMaterializeDependenciesFromCacheFailureDueToFileDeletionFailure(LoggingContext loggingContext, string pipDescription, string errorMessage);
[GeneratedEvent(
(ushort)EventId.DetailedPipMaterializeDependenciesFromCacheFailure,
EventGenerators = EventGenerators.LocalOnly,

Просмотреть файл

@ -157,6 +157,8 @@ namespace BuildXL.Scheduler.Tracing
CacheOnlyStatistics = 5073,
PipMaterializeDependenciesFailureDueToVerifySourceFilesFailed = 5080,
SuspiciousPathsInAugmentedPathSet = 5081,
PipMaterializeDependenciesFromCacheFailureDueToFileDeletionFailure = 5082,
// was DependencyViolationGenericWithRelatedPip_AsError = 25000,
// was DependencyViolationGeneric_AsError = 25001,
@ -168,7 +170,5 @@ namespace BuildXL.Scheduler.Tracing
// was DependencyViolationUndeclaredReadCycle_AsError = 25007,
// was DependencyViolationUndeclaredOutput_AsError = 25008,
// was DependencyViolationReadUndeclaredOutput_AsError = 25009,
SuspiciousPathsInAugmentedPathSet = 5081,
}
}

Просмотреть файл

@ -15,6 +15,7 @@ using Xunit;
using Xunit.Abstractions;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System;
namespace Test.BuildXL.Engine.Cache.Plugin.CacheCore
{
@ -143,6 +144,23 @@ namespace Test.BuildXL.Engine.Cache.Plugin.CacheCore
XAssert.IsTrue(maybeMaterialized.Succeeded);
XAssert.AreEqual(TargetContent, File.ReadAllText(targetPath));
if (!OperatingSystemHelper.IsUnixOS)
{
// Test materialization failure due to open file handle
// Disabled on Mac because on unix file systems opening a file doesn't prevent it from being deleted by a different process
using (var fs = new FileStream(targetPath, FileMode.Open, FileAccess.ReadWrite, FileShare.None))
{
maybeMaterialized = await ContentCache.TryMaterializeAsync(
FileRealizationMode.Copy,
AbsolutePath.Create(Context.PathTable, targetPath).Expand(Context.PathTable),
availableHash);
XAssert.IsFalse(maybeMaterialized.Succeeded, "Expected materialization failure due to open file handle, but it succeed");
XAssert.Contains(maybeMaterialized.Failure.DescribeIncludingInnerFailures(), LocalDiskContentStore.ExistingFileDeletionFailure);
}
}
}
[Fact]

Просмотреть файл

@ -105,8 +105,10 @@ namespace Test.BuildXL.Scheduler
harness.VerifyContent(copyChainFinalOutput, copyFileOutputContents);
}
[Fact]
public async Task SourceFilesMaterialization()
[Theory]
[InlineData(true)]
[InlineData(false)]
public async Task SourceFilesMaterialization(bool testFileDeletionFailure)
{
var harness = CreateDefaultHarness();
@ -137,11 +139,24 @@ namespace Test.BuildXL.Scheduler
dependencies: new[] { sourceFile },
outputs: new[] { CreateOutputFile() });
// Call the file content manager to ensure that the source file is materialized
var fileMaterializationResult = await harness.FileContentManager.TryMaterializeDependenciesAsync(consumer, harness.UntrackedOpContext);
Assert.True(fileMaterializationResult);
if (!testFileDeletionFailure)
{
// Call the file content manager to ensure that the source file is materialized
var fileMaterializationResult = await harness.FileContentManager.TryMaterializeDependenciesAsync(consumer, harness.UntrackedOpContext);
Assert.True(fileMaterializationResult);
harness.VerifyContent(sourceFile, sourceFileContents);
harness.VerifyContent(sourceFile, sourceFileContents);
}
else if (!OperatingSystemHelper.IsUnixOS)
{
// Disabled on Mac because on unix file systems opening a file doesn't prevent it from being deleted by a different process
using (var fs = new FileStream(sourceFile.Path.ToString(harness.PipContext.PathTable), FileMode.Create, FileAccess.ReadWrite, FileShare.None))
{
// Call the file content manager to ensure that the source file materialization fails when local file cannot be deleted
var fileMaterializationResult = await harness.FileContentManager.TryMaterializeDependenciesInternalAsync(consumer, harness.UntrackedOpContext);
Assert.Equal(fileMaterializationResult, ArtifactMaterializationResult.PlaceFileFailedDueToDeletionFailure);
}
}
}
[Fact]
@ -338,7 +353,7 @@ namespace Test.BuildXL.Scheduler
filesAndContentHashes,
onFailure: () => XAssert.Fail("TryLoadAvailableOutputContentAsync failed"),
onContentUnavailable:
(i, s) =>
(i, s, f) =>
XAssert.Fail(
I(
$"Content of '{filesAndContentHashes[i].Item1.Path.ToString(harness.PipContext.PathTable)}' with content hash '{filesAndContentHashes[i].Item2.ToHex()}' is not available")));
@ -390,7 +405,7 @@ namespace Test.BuildXL.Scheduler
filesAndContentHashes,
onFailure: () => XAssert.Fail("TryLoadAvailableOutputContentAsync failed"),
onContentUnavailable:
(i, s) =>
(i, s, f) =>
{
if (checkFileOnDiskForUpToDate)
{