From a2caf12e5ec7412b4af681dc7f9244c47ea9c7d1 Mon Sep 17 00:00:00 2001 From: Sergey Tepliakov Date: Fri, 24 Mar 2023 17:07:01 +0000 Subject: [PATCH] Merged PR 706846: Add retries when opening a file fails with UnauthorizedAccessException Add retries when opening a file fails with UnauthorizedAccessException. The PR also changes the exception type thrown due to sharing violation on linux. Before the code was failing with `IOException` on Linux and `UnauthoziedAccessException` on Windows. Now the behavior is consistent. Related work items: #2039887 --- .../FileSystem/AbsFileSystemExtension.cs | 23 ++++++ .../Interfaces/FileSystem/IAbsFileSystem.cs | 5 ++ .../FileSystem/PassThroughFileSystem.cs | 36 ++++++---- .../Library/Stores/ContentStoreSettings.cs | 10 +++ .../Stores/FileSystemContentStoreInternal.cs | 26 ++++++- .../FileSystem/PassThroughFileSystemTests.cs | 70 ++++++++++++++++++- .../DistributedContentSettings.cs | 6 ++ .../DistributedContentStoreFactory.cs | 2 + 8 files changed, 163 insertions(+), 15 deletions(-) diff --git a/Public/Src/Cache/ContentStore/Interfaces/FileSystem/AbsFileSystemExtension.cs b/Public/Src/Cache/ContentStore/Interfaces/FileSystem/AbsFileSystemExtension.cs index ca88e0328..99dc3babc 100644 --- a/Public/Src/Cache/ContentStore/Interfaces/FileSystem/AbsFileSystemExtension.cs +++ b/Public/Src/Cache/ContentStore/Interfaces/FileSystem/AbsFileSystemExtension.cs @@ -6,6 +6,7 @@ using System.Diagnostics.ContractsLight; using System.IO; using System.Threading.Tasks; using BuildXL.Cache.ContentStore.Hashing; +using BuildXL.Cache.ContentStore.Interfaces.Tracing; using BuildXL.Cache.ContentStore.UtilitiesCore; namespace BuildXL.Cache.ContentStore.Interfaces.FileSystem @@ -109,6 +110,28 @@ namespace BuildXL.Cache.ContentStore.Interfaces.FileSystem return fileSystem.TryOpen(path, fileAccess, fileMode, share, FileOptions.None, DefaultFileStreamBufferSize); } + /// + /// Tries opening a given and retries if is happening. + /// + public static async Task TryOpenWithRetriesAsync(this IAbsFileSystem fileSystem, AbsolutePath path, FileAccess fileAccess, FileMode fileMode, FileShare share, int retryCount, TimeSpan retryDelay, Action onException) + { + Contract.Requires(retryCount > 0); + for (int i = 0; i < retryCount; i++) + { + try + { + return fileSystem.TryOpen(path, fileAccess, fileMode, share, FileOptions.None, DefaultFileStreamBufferSize); + } + catch (UnauthorizedAccessException e) when (i < retryCount - 1) + { + onException(e); + await Task.Delay(retryDelay); + } + } + + throw Contract.AssertFailure("Not reachable"); + } + /// /// Open the named file asynchronously for reading. /// diff --git a/Public/Src/Cache/ContentStore/Interfaces/FileSystem/IAbsFileSystem.cs b/Public/Src/Cache/ContentStore/Interfaces/FileSystem/IAbsFileSystem.cs index 1cb323ff1..2e772c5c1 100644 --- a/Public/Src/Cache/ContentStore/Interfaces/FileSystem/IAbsFileSystem.cs +++ b/Public/Src/Cache/ContentStore/Interfaces/FileSystem/IAbsFileSystem.cs @@ -46,6 +46,11 @@ namespace BuildXL.Cache.ContentStore.Interfaces.FileSystem /// Unlike System.IO.FileStream, this provides a way to atomically check for the existence of a file and open it. /// This method throws the same set of exceptions that constructor does. /// + /// An I/O error occurred. + /// + /// Unlike 's ctor, this method fails with in case of + /// sharing violation and not with . Plus it tries to find an active process that owns the handle and add such information into the error's text message. + /// StreamWithLength? TryOpen(AbsolutePath path, FileAccess fileAccess, FileMode fileMode, FileShare share, FileOptions options, int bufferSize); /// diff --git a/Public/Src/Cache/ContentStore/Library/FileSystem/PassThroughFileSystem.cs b/Public/Src/Cache/ContentStore/Library/FileSystem/PassThroughFileSystem.cs index 71d7d23d7..d56ef3c62 100644 --- a/Public/Src/Cache/ContentStore/Library/FileSystem/PassThroughFileSystem.cs +++ b/Public/Src/Cache/ContentStore/Library/FileSystem/PassThroughFileSystem.cs @@ -549,7 +549,15 @@ namespace BuildXL.Cache.ContentStore.FileSystem return null; } - return new FileStream(path.Path, mode, accessMode, share, bufferSize, options); + try + { + return new FileStream(path.Path, mode, accessMode, share, bufferSize, options); + } + catch (IOException e) when(e.Message.Contains("The process cannot access the file")) + { + // Finding open handles is not supported for Unix. + throw CreateUnauthorizedAccessException(e.Message, path.ToString(), tryFindOpenHandles: false); + } } /// @@ -1204,23 +1212,27 @@ namespace BuildXL.Cache.ContentStore.FileSystem throw new DirectoryNotFoundException(message); case ERROR_ACCESS_DENIED: case ERROR_SHARING_VIOLATION: - - string extraMessage = string.Empty; - - if (path != null) - { - extraMessage = " " + (FileUtilities.TryFindOpenHandlesToFile(path, out var info, printCurrentFilePath: false) - ? info - : "Attempt to find processes with open handles to the file failed."); - } - - throw new UnauthorizedAccessException($"{message}.{extraMessage}"); + throw CreateUnauthorizedAccessException(message, path, tryFindOpenHandles: true); default: throw new IOException(message, ExceptionUtilities.HResultFromWin32(lastError)); } } } + private static UnauthorizedAccessException CreateUnauthorizedAccessException(string message, string? path, bool tryFindOpenHandles) + { + string extraMessage = string.Empty; + + if (path != null && tryFindOpenHandles) + { + extraMessage = " " + (FileUtilities.TryFindOpenHandlesToFile(path, out var info, printCurrentFilePath: false) + ? info + : "Attempt to find processes with open handles to the file failed."); + } + + throw new UnauthorizedAccessException($"{message}.{extraMessage}"); + } + /// public DateTime GetDirectoryCreationTimeUtc(AbsolutePath path) { diff --git a/Public/Src/Cache/ContentStore/Library/Stores/ContentStoreSettings.cs b/Public/Src/Cache/ContentStore/Library/Stores/ContentStoreSettings.cs index 107e87e0b..d7e8f529d 100644 --- a/Public/Src/Cache/ContentStore/Library/Stores/ContentStoreSettings.cs +++ b/Public/Src/Cache/ContentStore/Library/Stores/ContentStoreSettings.cs @@ -74,6 +74,16 @@ namespace BuildXL.Cache.ContentStore.Stores public bool AssumeCallerCreatesDirectoryForPlace { get; set; } = false; public bool RemoveAuditRuleInheritance { get; set; } = false; + + /// + /// A number of retries used for opening a file for hashing. + /// + public int? RetryCountForFileHashing { get; set; } + + /// + /// A delay between retrying opening a file for hashing. + /// + public TimeSpan RetryDelayForFileHashing { get; set; } = TimeSpan.FromMilliseconds(100); } /// diff --git a/Public/Src/Cache/ContentStore/Library/Stores/FileSystemContentStoreInternal.cs b/Public/Src/Cache/ContentStore/Library/Stores/FileSystemContentStoreInternal.cs index 9e1c7f8b6..1935fc91f 100644 --- a/Public/Src/Cache/ContentStore/Library/Stores/FileSystemContentStoreInternal.cs +++ b/Public/Src/Cache/ContentStore/Library/Stores/FileSystemContentStoreInternal.cs @@ -270,7 +270,7 @@ namespace BuildXL.Cache.ContentStore.Stores public async Task TryHashFileAsync(Context context, AbsolutePath path, HashType hashType, Func? wrapStream = null) { // We only hash the file if a trusted hash is not supplied - using var stream = FileSystem.TryOpen(path, FileAccess.Read, FileMode.Open, FileShare.Read | FileShare.Delete); + using var stream = await tryOpenFileAsync(); if (stream == null) { return null; @@ -284,8 +284,30 @@ namespace BuildXL.Cache.ContentStore.Stores // Hash the file in place return await HashContentAsync(context, wrappedStream.AssertHasLength(), hashType); - } + Task tryOpenFileAsync() + { + if (_settings.RetryCountForFileHashing is { } retryCount) + { + return FileSystem.TryOpenWithRetriesAsync( + path, + FileAccess.Read, + FileMode.Open, + FileShare.Read | FileShare.Delete, + retryCount: retryCount, + retryDelay: _settings.RetryDelayForFileHashing, + onException: + ex => + { + Tracer.Warning(context, ex, $"Transient failure during opening file for hashing. Retrying in '{_settings.RetryDelayForFileHashing}'"); + } + ); + } + + return Task.FromResult(FileSystem.TryOpen(path, FileAccess.Read, FileMode.Open, FileShare.Read | FileShare.Delete)); + } + } + private void DeleteTempFolder() { if (FileSystem.DirectoryExists(_tempFolder)) diff --git a/Public/Src/Cache/ContentStore/Test/FileSystem/PassThroughFileSystemTests.cs b/Public/Src/Cache/ContentStore/Test/FileSystem/PassThroughFileSystemTests.cs index acd65d3a7..882d7e38d 100644 --- a/Public/Src/Cache/ContentStore/Test/FileSystem/PassThroughFileSystemTests.cs +++ b/Public/Src/Cache/ContentStore/Test/FileSystem/PassThroughFileSystemTests.cs @@ -18,7 +18,6 @@ using BuildXL.Cache.ContentStore.Hashing; using BuildXL.Cache.ContentStore.Interfaces.Tracing; using BuildXL.Cache.ContentStore.Tracing.Internal; using BuildXL.Utilities.ParallelAlgorithms; -using BuildXL.Utilities.Tracing; using BuildXL.Utilities.Core.Tracing; using Xunit.Abstractions; @@ -32,6 +31,75 @@ namespace ContentStoreTest.FileSystem { } + [Fact] + public void SharingViolationFailsWithUnauthorizedAccessException() + { + using (var testDirectory = new DisposableDirectory(FileSystem)) + { + var filePath = testDirectory.Path / "file"; + FileSystem.WriteAllBytes(filePath, new byte[] {1, 2, 3}); + using var file = FileSystem.OpenForWrite(filePath, expectingLength: 42, FileMode.Open, FileShare.None); + + Assert.Throws( + () => FileSystem.OpenForWrite(filePath, expectingLength: 42, FileMode.Open, FileShare.None)); + } + } + + [Fact] + public async Task TryOpenWithRetriesAsyncRetriesMultipleTimes() + { + using (var testDirectory = new DisposableDirectory(FileSystem)) + { + var filePath = testDirectory.Path / "file"; + FileSystem.WriteAllBytes(filePath, new byte[] { 1, 2, 3 }); + int callbackCount = 0; + const int retryCount = 4; + using var file = FileSystem.OpenForWrite(filePath, expectingLength: 42, FileMode.Open, FileShare.None); + try + { + using var file2 = await FileSystem.TryOpenWithRetriesAsync( + filePath, + FileAccess.Read, + FileMode.Open, + FileShare.None, + retryCount: retryCount, + retryDelay: TimeSpan.FromMilliseconds(10), + onException: _ => + { + callbackCount++; + }); + Assert.True(false, "TryOpenWithRetriesAsync should fail."); + } + catch (UnauthorizedAccessException) + { } + + Assert.Equal(retryCount - 1, callbackCount); + } + } + + [Fact] + public async Task TryOpenWithRetriesAsyncRecovers() + { + using (var testDirectory = new DisposableDirectory(FileSystem)) + { + var filePath = testDirectory.Path / "file"; + FileSystem.WriteAllBytes(filePath, new byte[] { 1, 2, 3 }); + var file = FileSystem.OpenForWrite(filePath, expectingLength: 42, FileMode.Open, FileShare.Write); + using var file2 = await FileSystem.TryOpenWithRetriesAsync( + filePath, + FileAccess.Read, + FileMode.Open, + FileShare.None, + retryCount: 4, + retryDelay: TimeSpan.FromMilliseconds(10), + onException: _ => + { + // Disposing the file should allow us to open it. + file.Dispose(); + }); + } + } + [Fact] public void OpenFileFromAbsentDirectoryShouldThrowDirectoryNotFoundException() { diff --git a/Public/Src/Cache/DistributedCache.Host/Configuration/DistributedContentSettings.cs b/Public/Src/Cache/DistributedCache.Host/Configuration/DistributedContentSettings.cs index 56e59c7f7..a43f22738 100644 --- a/Public/Src/Cache/DistributedCache.Host/Configuration/DistributedContentSettings.cs +++ b/Public/Src/Cache/DistributedCache.Host/Configuration/DistributedContentSettings.cs @@ -796,6 +796,12 @@ namespace BuildXL.Cache.Host.Configuration [DataMember] public bool TraceFileSystemContentStoreDiagnosticMessages { get; set; } = false; + [DataMember] + public int? RetryCountForFileHashing { get; set; } + + [DataMember] + public TimeSpan? RetryDelayForFileHashing { get; set; } + [DataMember] [Validation.Range(1, int.MaxValue)] public int? SilentOperationDurationThreshold { get; set; } diff --git a/Public/Src/Cache/DistributedCache.Host/Service/Internal/DistributedContentStoreFactory.cs b/Public/Src/Cache/DistributedCache.Host/Service/Internal/DistributedContentStoreFactory.cs index a9e7d4653..bf1a97c07 100644 --- a/Public/Src/Cache/DistributedCache.Host/Service/Internal/DistributedContentStoreFactory.cs +++ b/Public/Src/Cache/DistributedCache.Host/Service/Internal/DistributedContentStoreFactory.cs @@ -448,6 +448,8 @@ namespace BuildXL.Cache.Host.Service.Internal ApplyIfNotNull(settings.ReserveSpaceTimeoutInMinutes, v => result.ReserveTimeout = TimeSpan.FromMinutes(v)); ApplyIfNotNull(settings.UseHierarchicalTraceIds, v => Context.UseHierarchicalIds = v); + ApplyIfNotNull(settings.RetryCountForFileHashing, v => result.RetryCountForFileHashing = v); + ApplyIfNotNull(settings.RetryDelayForFileHashing, v => result.RetryDelayForFileHashing = v); return result; }