зеркало из https://github.com/microsoft/BuildXL.git
Merged PR 788685: Modify last access time tracking so it's always updated'
This change modifies how we touch files when doing uploads of files that already exist or pins. Currently, we'll update the last access time by doing a 1-byte download. In the new version, we instead update the Content-Language to a random value. The reason this works is that updating Content-Language updates the Last Modification Time, the ETag, and the Last Access Time. Because this only works when we have write permissions, we do this only for non-read-only sessions. This also changes GC so that when it reads the last access time from a blob, it also fetches the ETag and the last modification time. The reason this change is being made is we've had already 6 WDG build failures where: 1. A content is added long ago 2. A build runs, it inserts the content, the content is already there, so we touch it instead 3. In parallel, GC is running and sees the last access time from before the touch, so it proceeds to delete it. Because we had no way to prevent the deletion from happening when there's a race, this would cause a CTMIS Reverts !785193 Reverts !788669 This re-introduces the change along with a fix to prevent build failures due to the ETag changing while a parallel download is happening in parallel and already downloaded one of the chunks.
This commit is contained in:
Родитель
f2a0d7b1e2
Коммит
8290ecb8c0
|
@ -5,6 +5,7 @@ using System;
|
|||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Azure;
|
||||
|
@ -17,14 +18,20 @@ using BuildXL.Cache.ContentStore.Interfaces.FileSystem;
|
|||
using BuildXL.Cache.ContentStore.Interfaces.Results;
|
||||
using BuildXL.Cache.ContentStore.Interfaces.Sessions;
|
||||
using BuildXL.Cache.ContentStore.Interfaces.Stores;
|
||||
using BuildXL.Cache.ContentStore.Interfaces.Time;
|
||||
using BuildXL.Cache.ContentStore.Interfaces.Tracing;
|
||||
using BuildXL.Cache.ContentStore.Sessions;
|
||||
using BuildXL.Cache.ContentStore.Sessions.Internal;
|
||||
using BuildXL.Cache.ContentStore.Tracing;
|
||||
using BuildXL.Cache.ContentStore.UtilitiesCore;
|
||||
using BuildXL.Cache.ContentStore.Utils;
|
||||
using BuildXL.Cache.Host.Configuration;
|
||||
using BuildXL.Utilities.Core;
|
||||
using BuildXL.Utilities.Core.Tracing;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
using AbsolutePath = BuildXL.Cache.ContentStore.Interfaces.FileSystem.AbsolutePath;
|
||||
using Context = BuildXL.Cache.ContentStore.Interfaces.Tracing.Context;
|
||||
using OperationContext = BuildXL.Cache.ContentStore.Tracing.Internal.OperationContext;
|
||||
|
||||
#nullable enable
|
||||
|
@ -56,12 +63,15 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
|
||||
private readonly List<Func<Context, ContentHash, Task>> _contentNotFoundListeners = new();
|
||||
|
||||
private readonly IClock _clock;
|
||||
|
||||
/// <nodoc />
|
||||
public AzureBlobStorageContentSession(Configuration sessionConfiguration, AzureBlobStorageContentStore store)
|
||||
public AzureBlobStorageContentSession(Configuration sessionConfiguration, AzureBlobStorageContentStore store, IClock? clock = null)
|
||||
: base(sessionConfiguration.Name)
|
||||
{
|
||||
_sessionConfiguration = sessionConfiguration;
|
||||
_configuration = _sessionConfiguration.StoreConfiguration;
|
||||
_clock = clock ?? SystemClock.Instance;
|
||||
|
||||
_store = store;
|
||||
_clientAdapter = new BlobStorageClientAdapter(
|
||||
|
@ -88,7 +98,7 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
{
|
||||
(var client, blobPath) = await GetBlobClientAsync(context, contentHash);
|
||||
// Let's make sure that we bump the last access time
|
||||
var touchResult = await _clientAdapter.TouchAsync(context, client);
|
||||
var touchResult = await TouchAsync(context, client);
|
||||
|
||||
if (!touchResult.Succeeded)
|
||||
{
|
||||
|
@ -144,7 +154,7 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
// 3. People often want to seek in the stream, so we can't just return a network-backed stream to a file
|
||||
// being downloaded at the current time.
|
||||
var temporaryPath = _fileSystem.GetTempPath() / Guid.NewGuid().ToString();
|
||||
var result = await PlaceRemoteFileAsync(context, contentHash, temporaryPath, FileAccessMode.ReadOnly, FileReplacementMode.FailIfExists).ThrowIfFailureAsync();
|
||||
var result = await DownloadToFileAsync(context, contentHash, temporaryPath, FileAccessMode.ReadOnly, FileReplacementMode.FailIfExists).ThrowIfFailureAsync();
|
||||
switch (result.ResultCode)
|
||||
{
|
||||
case PlaceFileResult.ResultCode.PlacedWithHardLink:
|
||||
|
@ -176,7 +186,7 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
UrgencyHint urgencyHint,
|
||||
Counter retryCounter)
|
||||
{
|
||||
var remoteDownloadResult = await PlaceRemoteFileAsync(context, contentHash, path, accessMode, replacementMode).ThrowIfFailureAsync();
|
||||
var remoteDownloadResult = await DownloadToFileAsync(context, contentHash, path, accessMode, replacementMode).ThrowIfFailureAsync();
|
||||
var result = new PlaceFileResult(
|
||||
code: remoteDownloadResult.ResultCode,
|
||||
fileSize: remoteDownloadResult.FileSize ?? 0,
|
||||
|
@ -229,117 +239,43 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
return (FileStream)stream;
|
||||
}
|
||||
|
||||
private async Task<Result<RemoteDownloadResult>> PlaceRemoteFileAsync(
|
||||
private readonly AsyncRetryPolicy _conflictRetryPolicy = Policy
|
||||
// This exception will be thrown when:
|
||||
// 1. An upload happens in parallel with a download
|
||||
// 2. The file already exists, so the upload decides to touch it
|
||||
// 3. The download downloads the first block of the file and then the upload touches it.
|
||||
// 4. Because the download will fetch subsequent blocks using an If-Match condition with the ETag of the
|
||||
// first block, the download will fail with a PreconditionFailed error.
|
||||
// In these cases, we'll just immediately retry the operation, as the touch has already happened.
|
||||
.Handle<RequestFailedException>(e => e.Status == (int)HttpStatusCode.PreconditionFailed)
|
||||
.WaitAndRetryAsync(
|
||||
retryCount: 5,
|
||||
sleepDurationProvider: attempt => TimeSpan.FromMilliseconds(Math.Pow(2, attempt)));
|
||||
|
||||
private async Task<Result<RemoteDownloadResult>> DownloadToFileAsync(
|
||||
OperationContext context,
|
||||
ContentHash contentHash,
|
||||
AbsolutePath path,
|
||||
FileAccessMode accessMode,
|
||||
FileReplacementMode replacementMode)
|
||||
{
|
||||
AbsoluteBlobPath? blobPath = null;
|
||||
(var client, var blobPath) = await GetBlobClientAsync(context, contentHash);
|
||||
|
||||
var result = await context.PerformOperationWithTimeoutAsync(
|
||||
Tracer,
|
||||
async context =>
|
||||
{
|
||||
var statistics = new DownloadStatistics();
|
||||
long fileSize;
|
||||
ContentHash observedContentHash;
|
||||
|
||||
var stopwatch = StopwatchSlim.Start();
|
||||
BlobClient client;
|
||||
try
|
||||
return await _conflictRetryPolicy.ExecuteAsync((pollyContext) =>
|
||||
{
|
||||
(client, blobPath) = await GetBlobClientAsync(context, contentHash);
|
||||
stopwatch.ElapsedAndReset();
|
||||
|
||||
using var fileStream = OpenFileStream(path);
|
||||
statistics.OpenFileStreamDuration = stopwatch.ElapsedAndReset();
|
||||
|
||||
await using (var hashingStream = HashInfoLookup
|
||||
.GetContentHasher(contentHash.HashType)
|
||||
.CreateWriteHashingStream(
|
||||
fileStream,
|
||||
parallelHashingFileSizeBoundary: _configuration.ParallelHashingFileSizeBoundary))
|
||||
{
|
||||
var blobDownloadToOptions = new BlobDownloadToOptions()
|
||||
{
|
||||
TransferOptions = new StorageTransferOptions()
|
||||
{
|
||||
InitialTransferSize = _configuration.InitialTransferSize,
|
||||
MaximumTransferSize = _configuration.MaximumTransferSize,
|
||||
MaximumConcurrency = _configuration.MaximumConcurrency,
|
||||
},
|
||||
};
|
||||
|
||||
// The following download is done in parallel onto the hashing stream. The hashing stream will
|
||||
// itself run the hashing of the input in parallel as well.
|
||||
await client.DownloadToAsync(
|
||||
hashingStream,
|
||||
blobDownloadToOptions,
|
||||
cancellationToken: context.Token);
|
||||
statistics.DownloadDuration = stopwatch.ElapsedAndReset();
|
||||
|
||||
observedContentHash = await hashingStream.GetContentHashAsync();
|
||||
statistics.HashingDuration = hashingStream.TimeSpentHashing;
|
||||
}
|
||||
|
||||
statistics.WriteDuration = fileStream.GetWriteDurationIfAvailable() ?? TimeSpan.MinValue;
|
||||
fileSize = fileStream.Length;
|
||||
}
|
||||
// This exception will be thrown whenever storage tries to do the first operation on the blob, which
|
||||
// should be in the DownloadToAsync.
|
||||
catch (RequestFailedException e) when (e.ErrorCode == BlobErrorCode.BlobNotFound)
|
||||
{
|
||||
statistics.DownloadDuration ??= stopwatch.ElapsedAndReset();
|
||||
|
||||
await TryNotify(context, new DeleteEvent(blobPath!.Value, contentHash));
|
||||
return Result.Success(new RemoteDownloadResult
|
||||
{
|
||||
ResultCode = PlaceFileResult.ResultCode.NotPlacedContentNotFound,
|
||||
FileSize = null,
|
||||
DownloadResult = statistics
|
||||
});
|
||||
}
|
||||
|
||||
if (observedContentHash != contentHash)
|
||||
{
|
||||
// TODO: there should be some way to either notify or delete the file in storage
|
||||
Tracer.Error(context, $"Expected to download file with hash {contentHash} into file {path}, but found {observedContentHash} instead");
|
||||
|
||||
// The file we downloaded on to disk has the wrong file. Delete it so it can't be used incorrectly.
|
||||
try
|
||||
{
|
||||
_fileSystem.DeleteFile(path);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
return new Result<RemoteDownloadResult>(exception, $"Failed to delete {path} containing partial download results for content {contentHash}");
|
||||
}
|
||||
|
||||
return Result.Success(
|
||||
new RemoteDownloadResult()
|
||||
{
|
||||
ResultCode = PlaceFileResult.ResultCode.NotPlacedContentNotFound,
|
||||
FileSize = fileSize,
|
||||
DownloadResult = statistics,
|
||||
});
|
||||
}
|
||||
|
||||
await TryNotify(context, new AddEvent(blobPath!.Value, contentHash, fileSize));
|
||||
return Result.Success(
|
||||
new RemoteDownloadResult
|
||||
{
|
||||
ResultCode = PlaceFileResult.ResultCode.PlacedWithCopy,
|
||||
FileSize = fileSize,
|
||||
DownloadResult = statistics,
|
||||
});
|
||||
return TryDownloadToFileAsync(context, contentHash, path, blobPath, client);
|
||||
}, context.Token);
|
||||
},
|
||||
traceOperationStarted: false,
|
||||
timeout: _configuration.StorageInteractionTimeout,
|
||||
extraEndMessage: r =>
|
||||
{
|
||||
var baseline =
|
||||
$"ContentHash=[{contentHash.ToShortString()}] Path=[{path}] AccessMode=[{accessMode}] ReplacementMode=[{replacementMode}] BlobPath=[{blobPath.ToString() ?? "UNKNOWN"}]";
|
||||
$"ContentHash=[{contentHash.ToShortString()}] Path=[{path}] AccessMode=[{accessMode}] ReplacementMode=[{replacementMode}] BlobPath=[{blobPath}]";
|
||||
if (!r.Succeeded)
|
||||
{
|
||||
return baseline;
|
||||
|
@ -367,6 +303,100 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
return result;
|
||||
}
|
||||
|
||||
private async Task<Result<RemoteDownloadResult>> TryDownloadToFileAsync(OperationContext context, ContentHash contentHash, AbsolutePath path, AbsoluteBlobPath blobPath, BlobClient client)
|
||||
{
|
||||
var statistics = new DownloadStatistics();
|
||||
long fileSize;
|
||||
ContentHash observedContentHash;
|
||||
|
||||
var stopwatch = StopwatchSlim.Start();
|
||||
try
|
||||
{
|
||||
stopwatch.ElapsedAndReset();
|
||||
|
||||
using var fileStream = OpenFileStream(path);
|
||||
statistics.OpenFileStreamDuration = stopwatch.ElapsedAndReset();
|
||||
|
||||
await using (var hashingStream = HashInfoLookup
|
||||
.GetContentHasher(contentHash.HashType)
|
||||
.CreateWriteHashingStream(
|
||||
fileStream,
|
||||
parallelHashingFileSizeBoundary: _configuration.ParallelHashingFileSizeBoundary))
|
||||
{
|
||||
var blobDownloadToOptions = new BlobDownloadToOptions()
|
||||
{
|
||||
TransferOptions = new StorageTransferOptions()
|
||||
{
|
||||
InitialTransferSize = _configuration.InitialTransferSize,
|
||||
MaximumTransferSize = _configuration.MaximumTransferSize,
|
||||
MaximumConcurrency = _configuration.MaximumConcurrency,
|
||||
},
|
||||
};
|
||||
|
||||
// The following download is done in parallel onto the hashing stream. The hashing stream will
|
||||
// itself run the hashing of the input in parallel as well.
|
||||
await client.DownloadToAsync(
|
||||
hashingStream,
|
||||
blobDownloadToOptions,
|
||||
cancellationToken: context.Token);
|
||||
statistics.DownloadDuration = stopwatch.ElapsedAndReset();
|
||||
|
||||
observedContentHash = await hashingStream.GetContentHashAsync();
|
||||
statistics.HashingDuration = hashingStream.TimeSpentHashing;
|
||||
}
|
||||
|
||||
statistics.WriteDuration = fileStream.GetWriteDurationIfAvailable() ?? TimeSpan.MinValue;
|
||||
fileSize = fileStream.Length;
|
||||
}
|
||||
// This exception will be thrown whenever storage tries to do the first operation on the blob, which
|
||||
// should be in the DownloadToAsync.
|
||||
catch (RequestFailedException e) when (e.ErrorCode == BlobErrorCode.BlobNotFound)
|
||||
{
|
||||
statistics.DownloadDuration ??= stopwatch.ElapsedAndReset();
|
||||
|
||||
await TryNotify(context, new DeleteEvent(blobPath, contentHash));
|
||||
return Result.Success(new RemoteDownloadResult
|
||||
{
|
||||
ResultCode = PlaceFileResult.ResultCode.NotPlacedContentNotFound,
|
||||
FileSize = null,
|
||||
DownloadResult = statistics
|
||||
});
|
||||
}
|
||||
|
||||
if (observedContentHash != contentHash)
|
||||
{
|
||||
// TODO: there should be some way to either notify or delete the file in storage
|
||||
Tracer.Error(context, $"Expected to download file with hash {contentHash} into file {path}, but found {observedContentHash} instead");
|
||||
|
||||
// The file we downloaded on to disk has the wrong file. Delete it so it can't be used incorrectly.
|
||||
try
|
||||
{
|
||||
_fileSystem.DeleteFile(path);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
return new Result<RemoteDownloadResult>(exception, $"Failed to delete {path} containing partial download results for content {contentHash}");
|
||||
}
|
||||
|
||||
return Result.Success(
|
||||
new RemoteDownloadResult()
|
||||
{
|
||||
ResultCode = PlaceFileResult.ResultCode.NotPlacedContentNotFound,
|
||||
FileSize = fileSize,
|
||||
DownloadResult = statistics,
|
||||
});
|
||||
}
|
||||
|
||||
await TryNotify(context, new AddEvent(blobPath, contentHash, fileSize));
|
||||
return Result.Success(
|
||||
new RemoteDownloadResult
|
||||
{
|
||||
ResultCode = PlaceFileResult.ResultCode.PlacedWithCopy,
|
||||
FileSize = fileSize,
|
||||
DownloadResult = statistics,
|
||||
});
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async Task<PutResult> PutFileCoreAsync(
|
||||
OperationContext context,
|
||||
|
@ -543,7 +573,7 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
// it knows that the new CHL exists. By touching the content, the last access time for this blob will
|
||||
// now be above the deletion threshold (GC will not delete blobs touched more recently than 24 hours ago)
|
||||
// and the race condition is eliminated.
|
||||
var touchResult = await _clientAdapter.TouchAsync(context, client);
|
||||
var touchResult = await TouchAsync(context, client);
|
||||
|
||||
if (!touchResult.Succeeded)
|
||||
{
|
||||
|
@ -573,6 +603,53 @@ public sealed class AzureBlobStorageContentSession : ContentSessionBase, IConten
|
|||
return new PutResult(contentHash, contentSize, contentAlreadyExistsInCache);
|
||||
}
|
||||
|
||||
private async Task<Result<(DateTimeOffset? dateTimeOffset, long? contentLength)>> TouchAsync(OperationContext context, BlobClient client)
|
||||
{
|
||||
// There's two kinds of touch: write and read-only. The write touch forces an update of the blob's
|
||||
// metadata, which changes the ETag. The read-only touch does a 1 byte download. The write touch is a
|
||||
// "forceful" method, which works but interacts with chunked downloads of files in a not-so-nice way.
|
||||
//
|
||||
// We really want to avoid the write touch because it'll fail any downloads that may be happening in parallel,
|
||||
// which is a problem for large files. if we can, so we try to do a read-only touch first. If that tells
|
||||
// us that the last access time was too long ago, we'll also perform a write-only touch.
|
||||
var result = await _clientAdapter.TouchAsync(context, client, hard: false);
|
||||
if (_configuration.IsReadOnly || !result.Succeeded)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
var now = _clock.UtcNow;
|
||||
var lastAccessTime = result.Value.dateTimeOffset ?? now;
|
||||
if (lastAccessTime > now)
|
||||
{
|
||||
lastAccessTime = now;
|
||||
}
|
||||
|
||||
var timeSinceLastAccess = now - lastAccessTime;
|
||||
|
||||
// When we're below the minimum threshold, we explicitly disallow write-touches. This prevents spurious updates
|
||||
// from happening.
|
||||
if (timeSinceLastAccess < _configuration.AllowHardTouchThreshold)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
// When we're over the force threshold, we'll force a write-touch. When we're below the threshold, we'll do
|
||||
// this probabilistically to prevent too many write-touches from happening. The intention is that the number of
|
||||
// these touches will be low, but that they will happen when the blob is accessed for the first time after a
|
||||
// while.
|
||||
//
|
||||
// Because a single touch is enough to force a reset, this should mostly be good enough because it's quite
|
||||
// unlikely that a lot of these happen.
|
||||
double lastAccessFraction = Math.Min((timeSinceLastAccess - _configuration.AllowHardTouchThreshold).TotalHours / (_configuration.ForceHardTouchThreshold - _configuration.AllowHardTouchThreshold).TotalHours, 1.0);
|
||||
if (timeSinceLastAccess < _configuration.ForceHardTouchThreshold && ThreadSafeRandom.ContinuousUniform(0.0, 1.0) > Math.Pow(lastAccessFraction, 2))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
return await _clientAdapter.TouchAsync(context, client, hard: true);
|
||||
}
|
||||
|
||||
private Task<(BlobClient Client, AbsoluteBlobPath Path)> GetBlobClientAsync(OperationContext context, ContentHash contentHash)
|
||||
{
|
||||
return _store.GetBlobClientAsync(context, contentHash);
|
||||
|
|
|
@ -58,6 +58,34 @@ public sealed record AzureBlobStorageContentStoreConfiguration
|
|||
// This is a catch-all for all cases that go over the sizes above.
|
||||
new UploadOperationTimeout(long.MaxValue, TimeSpan.FromHours(24)),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Whether this session is supposed to be read-only or read-write.
|
||||
/// </summary>
|
||||
public bool IsReadOnly { get; internal set; }
|
||||
|
||||
/// <summary>
|
||||
/// The amount of time since the last touch before we allow issuing a write-touch which will cause the content's
|
||||
/// ETag to change.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This number was picked to ensure we don't do any hard touches on content that was recently used, as a way to
|
||||
/// prevent having too many ETag changes, which put a bit of extra stress on downloaders.
|
||||
/// </remarks>
|
||||
public TimeSpan AllowHardTouchThreshold { get; set; } = TimeSpan.FromHours(12);
|
||||
|
||||
/// <summary>
|
||||
/// The amount of time since the last touch before we force issue a write-touch which will cause the content's
|
||||
/// ETag to change.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This number was picked to ensure we keep the content with access times below 24h, which is when content might
|
||||
/// become eligible for deletion.
|
||||
///
|
||||
/// Please note that between 12h and 22h, we'll touch with a probability that increases quadratically with the time
|
||||
/// that passed since 12h.
|
||||
/// </remarks>
|
||||
public TimeSpan ForceHardTouchThreshold { get; set; } = TimeSpan.FromHours(22);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
@ -398,35 +398,77 @@ public class BlobStorageClientAdapter
|
|||
caller: caller);
|
||||
}
|
||||
|
||||
public Task<Result<(DateTimeOffset? dateTimeOffset, long? contentLength)>> TouchAsync(OperationContext context, BlobClient blob)
|
||||
public Task<Result<(DateTimeOffset? dateTimeOffset, long? contentLength)>> TouchAsync(OperationContext context, BlobClient blob, bool hard = false)
|
||||
{
|
||||
return context.PerformOperationWithTimeoutAsync(
|
||||
Tracer,
|
||||
async context =>
|
||||
{
|
||||
// This updates the last access time in blob storage when last access time tracking is enabled. Please note,
|
||||
// we're not downloading anything here because we're doing a 0-length download.
|
||||
// This updates the last access time in blob storage when last access time tracking is enabled.
|
||||
// See: https://learn.microsoft.com/en-us/azure/storage/blobs/lifecycle-management-overview?tabs=azure-portal#move-data-based-on-last-accessed-time
|
||||
try
|
||||
if (hard)
|
||||
{
|
||||
var response = await blob.DownloadContentAsync(
|
||||
new BlobDownloadOptions()
|
||||
// Update the last access time by updating a header. This method is good because it forces an
|
||||
// update to the last modification time (which we use as last access time as well), and the update
|
||||
// is never binned (i.e., it's updated every time).
|
||||
// The method is bad because it requires write access to the blob, which some clients may not have.
|
||||
//
|
||||
// We update the HTTP headers to a random value because there's no direct way to force an update of
|
||||
// the last access time, so we update the last modification time. There's also no way to update the
|
||||
// last modification time in the blob storage API, so we have to update either the metadata or the
|
||||
// properties.
|
||||
//
|
||||
// Updating the Metadata is not a good idea here because the API replaces the previous metadata
|
||||
// with the new one, so you need to have the old metadata in order to perform a single-item update,
|
||||
// and we actually use the metadata to implement compression.
|
||||
//
|
||||
// The same thing happens with the properties, but since we don't actually use the properties it's
|
||||
// not a big deal.
|
||||
var response = await blob.SetHttpHeadersAsync(
|
||||
httpHeaders: new BlobHttpHeaders()
|
||||
{
|
||||
Range = new HttpRange(0, 1),
|
||||
Conditions = new BlobRequestConditions() { IfMatch = ETag.All, }
|
||||
// Setting this to a random value guarantees the last modification time gets updated.
|
||||
// The reason we set this particular field is we don't expect it to ever be useful for our
|
||||
// use-case. If it ever becomes useful, we can change it to a more meaningful value and
|
||||
// we'll have to pick another one.
|
||||
ContentLanguage = Guid.NewGuid().ToString(),
|
||||
},
|
||||
cancellationToken: context.Token);
|
||||
|
||||
return Result.Success<(DateTimeOffset?, long?)>((response.Value.Details.LastAccessed, response.Value.Details.ContentLength), isNullAllowed: true);
|
||||
// The last modified time in this case is also the last access time. Unfortunately, the blob size
|
||||
// is unavailable with this method.
|
||||
return Result.Success<(DateTimeOffset?, long?)>(
|
||||
(response.Value.LastModified, null),
|
||||
isNullAllowed: true);
|
||||
}
|
||||
catch (RequestFailedException ex) when (ex.ErrorCode == BlobErrorCode.InvalidRange)
|
||||
else
|
||||
{
|
||||
// We are dealing with a zero-size piece of content. Use unbounded download API to touch the file.
|
||||
var response = await blob.DownloadContentAsync(
|
||||
conditions: new BlobRequestConditions() { IfMatch = ETag.All, },
|
||||
cancellationToken: context.Token);
|
||||
// Update the last access time using a 1-byte download. This method is good because it can be used
|
||||
// when there's only read access, but bad because the update of the last access time is binned on
|
||||
// 1d increments (i.e., it's only updated every 24h).
|
||||
//
|
||||
// See: https://learn.microsoft.com/en-us/azure/storage/blobs/lifecycle-management-overview?tabs=azure-portal#move-data-based-on-last-accessed-time
|
||||
try
|
||||
{
|
||||
var response = await blob.DownloadContentAsync(
|
||||
new BlobDownloadOptions()
|
||||
{
|
||||
Range = new HttpRange(0, 1),
|
||||
Conditions = new BlobRequestConditions() { IfMatch = ETag.All, }
|
||||
},
|
||||
cancellationToken: context.Token);
|
||||
|
||||
return Result.Success<(DateTimeOffset?, long?)>((response.Value.Details.LastAccessed, response.Value.Details.ContentLength), isNullAllowed: true);
|
||||
return Result.Success<(DateTimeOffset?, long?)>((response.Value.Details.LastAccessed, response.Value.Details.ContentLength), isNullAllowed: true);
|
||||
}
|
||||
catch (RequestFailedException ex) when (ex.ErrorCode == BlobErrorCode.InvalidRange)
|
||||
{
|
||||
// We are dealing with a zero-size piece of content. Use unbounded download API to touch the file.
|
||||
var response = await blob.DownloadContentAsync(
|
||||
conditions: new BlobRequestConditions() { IfMatch = ETag.All, },
|
||||
cancellationToken: context.Token);
|
||||
|
||||
return Result.Success<(DateTimeOffset?, long?)>((response.Value.Details.LastAccessed, response.Value.Details.ContentLength), isNullAllowed: true);
|
||||
}
|
||||
}
|
||||
},
|
||||
traceOperationStarted: false,
|
||||
|
|
|
@ -539,7 +539,13 @@ public class EphemeralContentSession : ContentSessionBase
|
|||
return false;
|
||||
}
|
||||
|
||||
private readonly record struct ElisionResult(bool Allow, long Size, DateTime? LatestPersistentTouchTime);
|
||||
private readonly record struct ElisionResult(bool Allow, long Size, DateTime? LatestPersistentTouchTime)
|
||||
{
|
||||
public static ElisionResult Disallow()
|
||||
{
|
||||
return new ElisionResult(Allow: false, Size: -1, LatestPersistentTouchTime: null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This checks for file existence in the persistent cache. This method is used to decide whether we can avoid
|
||||
|
@ -565,7 +571,7 @@ public class EphemeralContentSession : ContentSessionBase
|
|||
|
||||
if (localOnly)
|
||||
{
|
||||
return Result.Success(new ElisionResult(Allow: false, Size: -1, LatestPersistentTouchTime: null));
|
||||
return ElisionResult.Disallow();
|
||||
}
|
||||
|
||||
var remote = await _ephemeralHost.ContentResolver.GetSingleLocationAsync(context, contentHash).ThrowIfFailureAsync();
|
||||
|
@ -574,7 +580,7 @@ public class EphemeralContentSession : ContentSessionBase
|
|||
return Result.Success(new ElisionResult(Allow: true, Size: remote.Size, LatestPersistentTouchTime: latestPersistentTouchTime));
|
||||
}
|
||||
|
||||
return Result.Success(new ElisionResult(Allow: false, Size: -1, LatestPersistentTouchTime: null));
|
||||
return ElisionResult.Disallow();
|
||||
},
|
||||
traceOperationStarted: false,
|
||||
traceErrorsOnly: true,
|
||||
|
@ -582,10 +588,10 @@ public class EphemeralContentSession : ContentSessionBase
|
|||
{
|
||||
if (result.Succeeded)
|
||||
{
|
||||
return $"ContentHash=[{contentHash}] LocalOnly=[{localOnly}] Allow=[{result.Value.Allow}] Size=[{result.Value.Size}]";
|
||||
return $"ContentHash=[{contentHash}] LocalOnly=[{localOnly}] Allow=[{result.Value.Allow}] Size=[{result.Value.Size}] LastPersistentTouchTime=[{result.Value.LatestPersistentTouchTime?.ToString() ?? "null"}]";
|
||||
}
|
||||
return $"ContentHash=[{contentHash}] LocalOnly=[{localOnly}] Allow=[false] Size=[-1]";
|
||||
})).GetValueOrDefault(defaultValue: new ElisionResult(Allow: false, Size: -1, LatestPersistentTouchTime: null));
|
||||
})).GetValueOrDefault(defaultValue: ElisionResult.Disallow());
|
||||
|
||||
bool shouldElide(ContentEntry contentEntry, DateTime nowUtc, out DateTime? latestPersistentTouchTime)
|
||||
{
|
||||
|
|
|
@ -189,10 +189,8 @@ namespace BuildXL.Cache.ContentStore.Utils
|
|||
/// </summary>
|
||||
public async Task<T> WithOperationContext<T>(Context context, CancellationToken token, Func<OperationContext, Task<T>> func)
|
||||
{
|
||||
using (var operationContext = TrackShutdown(context, token))
|
||||
{
|
||||
return await func(operationContext);
|
||||
}
|
||||
using var operationContext = TrackShutdown(context, token);
|
||||
return await func(operationContext);
|
||||
}
|
||||
|
||||
/// <nodoc />
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.ContractsLight;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Azure;
|
||||
|
@ -20,6 +22,7 @@ using BuildXL.Cache.ContentStore.Interfaces.Time;
|
|||
using BuildXL.Cache.ContentStore.Timers;
|
||||
using BuildXL.Cache.ContentStore.Tracing;
|
||||
using BuildXL.Cache.ContentStore.Tracing.Internal;
|
||||
using BuildXL.Cache.ContentStore.Utils;
|
||||
using BuildXL.Cache.MemoizationStore.Stores;
|
||||
using BuildXL.Utilities.Core.Tasks;
|
||||
using BuildXL.Utilities.ParallelAlgorithms;
|
||||
|
@ -327,11 +330,11 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
// It's possible that a fingerprint is currently being created that references this piece of content. This means there's a race condition that we need to account for.
|
||||
// Because of this, the current design is that clients will update the last access time of a blob when they get a content cache hit when ulpoading the contents of a new strong fingerprint.
|
||||
// On the GC side of things, what this means is that we have to check that the content has not been accessed recently.
|
||||
var lastAccessTime = await GetLastAccessTimeAsync(context, client);
|
||||
if (lastAccessTime > GetLastAccesstimeDeletionThreshold(startTime))
|
||||
var blobVersion = await GetBlobVersionAsync(context, client);
|
||||
if (blobVersion.Value.LastAccessTimeUtc > GetDeletionThreshold(startTime))
|
||||
{
|
||||
Tracer.Debug(context,
|
||||
$"Skipping deletion of {contentHash.ToShortString()} because it has been accessed too recently to be deleted. LastAccessTime=[{lastAccessTime}]");
|
||||
$"Skipping deletion of {contentHash.ToShortString()} because it has been accessed too recently to be deleted. LastAccessTime=[{blobVersion}]");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -344,7 +347,7 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
}
|
||||
else
|
||||
{
|
||||
var result = await DeleteBlobFromStorageAsync(context, client, contentSize, lastAccessTime);
|
||||
var result = await DeleteBlobFromStorageAsync(context, client, contentSize, blobVersion);
|
||||
if (!result.Succeeded)
|
||||
{
|
||||
return false;
|
||||
|
@ -367,32 +370,32 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
// that API doesn't exist. This leaves a race condition open where we might access the strong fingerprint in between the
|
||||
// last access time check and the deletion. However the timing is very precise and this wouldn't break the cache; we would
|
||||
// only be prematurely evicting a fingerprint.
|
||||
var currentLastAccessTime = await GetLastAccessTimeAsync(context, client);
|
||||
if (currentLastAccessTime is null)
|
||||
var currentVersion = await GetBlobVersionAsync(context, client);
|
||||
if (currentVersion is null)
|
||||
{
|
||||
// A null value means the blob does not exist. This must mean that the blob has already been deleted so it's safe to proceed as if we had
|
||||
// deleted it.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (currentLastAccessTime > contentHashList.LastAccessTime)
|
||||
if (currentVersion.Value.LastAccessTimeUtc > contentHashList.LastAccessTime)
|
||||
{
|
||||
Tracer.Debug(context,
|
||||
$"Current last access time for CHL '{contentHashList.BlobName}' is greater than the stored last access time. " +
|
||||
$"Updating database and skipping deletion. " +
|
||||
$"Current=[{currentLastAccessTime}], Stored=[{contentHashList.LastAccessTime}]");
|
||||
$"Current=[{currentVersion}], Stored=[{contentHashList.LastAccessTime}]");
|
||||
|
||||
var updatedHashList = contentHashList with { LastAccessTime = currentLastAccessTime.Value };
|
||||
var updatedHashList = contentHashList with { LastAccessTime = currentVersion.Value.LastAccessTimeUtc };
|
||||
|
||||
_database.UpdateContentHashListLastAccessTime(updatedHashList);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (currentLastAccessTime > GetLastAccesstimeDeletionThreshold(startTime))
|
||||
if (currentVersion.Value.LastAccessTimeUtc > GetDeletionThreshold(startTime))
|
||||
{
|
||||
Tracer.Debug(context,
|
||||
$"Skipping deletion of {contentHashList.BlobName} because it has been accessed too recently to be deleted. LastAccessTime=[{currentLastAccessTime}]");
|
||||
$"Skipping deletion of {contentHashList.BlobName} because it has been accessed too recently to be deleted. LastAccessTime=[{currentVersion}]");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -405,14 +408,14 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
}
|
||||
else
|
||||
{
|
||||
var result = await DeleteBlobFromStorageAsync(context, client, contentHashList.BlobSize, currentLastAccessTime);
|
||||
var result = await DeleteBlobFromStorageAsync(context, client, contentHashList.BlobSize, currentVersion);
|
||||
return result.Succeeded;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
internal static Task<Result<bool>> DeleteBlobFromStorageAsync(OperationContext context, BlobClient client, long size, DateTime? lastAccessTime)
|
||||
internal static Task<Result<bool>> DeleteBlobFromStorageAsync(OperationContext context, BlobClient client, long size, BlobVersion? blobVersion)
|
||||
{
|
||||
return context.PerformOperationAsync<Result<bool>>(
|
||||
Tracer,
|
||||
|
@ -420,12 +423,28 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
{
|
||||
try
|
||||
{
|
||||
var response = await client.DeleteAsync();
|
||||
BlobRequestConditions? conditions = null;
|
||||
if (blobVersion is not null)
|
||||
{
|
||||
// We prefer ETag if we have it, but sometimes we don't. In that case, we use the last access time.
|
||||
if (blobVersion.Value.ETag is not null)
|
||||
{
|
||||
conditions ??= new BlobRequestConditions();
|
||||
conditions.IfMatch = blobVersion.Value.ETag;
|
||||
}
|
||||
else if (blobVersion.Value.LastAccessTimeUtc > DateTime.MinValue)
|
||||
{
|
||||
conditions ??= new BlobRequestConditions();
|
||||
conditions.IfUnmodifiedSince = blobVersion.Value.LastAccessTimeUtc;
|
||||
}
|
||||
}
|
||||
|
||||
var response = await client.DeleteAsync(conditions: conditions, cancellationToken: context.Token);
|
||||
return response.IsError
|
||||
? new Result<bool>($"Failed to delete blob {client.Name}. Error ({response.Status}): {response.ReasonPhrase}")
|
||||
: true;
|
||||
}
|
||||
catch (RequestFailedException e) when (e.ErrorCode == BlobErrorCode.BlobNotFound)
|
||||
catch (RequestFailedException e) when (e.Status == (int)HttpStatusCode.NotFound || e.Status == (int)HttpStatusCode.PreconditionFailed)
|
||||
{
|
||||
// Consider this a success, but trace for diagnostic purposes.
|
||||
return false;
|
||||
|
@ -433,15 +452,64 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
},
|
||||
traceOperationStarted: false,
|
||||
extraEndMessage: result =>
|
||||
$"BlobName=[{client.Name}], Size=[{size}], Shard=[{client.AccountName}], LastAccessTime=[{lastAccessTime}], BlobExisted=[{(result.Succeeded ? result.Value : null)}]");
|
||||
$"BlobName=[{client.Name}], Size=[{size}], Shard=[{client.AccountName}], LastAccessTime=[{blobVersion}], BlobExisted=[{(result.Succeeded ? result.Value : null)}]");
|
||||
}
|
||||
|
||||
private static async Task<DateTime?> GetLastAccessTimeAsync(OperationContext context, BlobClient client)
|
||||
internal readonly record struct BlobVersion(DateTime LastAccessTimeUtc, ETag? ETag)
|
||||
{
|
||||
public static BlobVersion FromLastAccessTime(DateTime lastAccessTimeUtc)
|
||||
{
|
||||
return new(lastAccessTimeUtc, ETag: null);
|
||||
}
|
||||
|
||||
public static BlobVersion FromBlobResponse(Response response)
|
||||
{
|
||||
DateTime lastAccessTime = DateTime.MinValue;
|
||||
if (response.Headers.TryGetValue("Last-Modified", out var lastModifiedString) &&
|
||||
DateTime.TryParse(lastModifiedString, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal, out var lastModified) &&
|
||||
lastModified > lastAccessTime)
|
||||
{
|
||||
lastAccessTime = lastModified;
|
||||
}
|
||||
|
||||
if (response.Headers.TryGetValue("x-ms-last-access-time", out var lastAccessTimeString) &&
|
||||
DateTime.TryParse(lastAccessTimeString, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal, out var lastAccess) &&
|
||||
lastAccess > lastAccessTime)
|
||||
{
|
||||
lastAccessTime = lastAccess;
|
||||
}
|
||||
|
||||
if (response.Headers.TryGetValue("x-ms-creation-time", out var creationTimeString) &&
|
||||
DateTime.TryParse(creationTimeString, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal, out var creationTime) &&
|
||||
creationTime > lastAccessTime)
|
||||
{
|
||||
lastAccessTime = creationTime;
|
||||
}
|
||||
|
||||
return new BlobVersion(LastAccessTimeUtc: lastAccessTime, ETag: response.Headers.ETag);
|
||||
}
|
||||
|
||||
public static BlobVersion FromBlobProperties(BlobProperties properties)
|
||||
{
|
||||
var creationTime = properties.CreatedOn.UtcDateTime;
|
||||
var lastAccessTime = properties.LastAccessed.UtcDateTime;
|
||||
var lastModificationTime = properties.LastModified.UtcDateTime;
|
||||
|
||||
return new BlobVersion(
|
||||
LastAccessTimeUtc:
|
||||
creationTime
|
||||
.Max(lastAccessTime)
|
||||
.Max(lastModificationTime),
|
||||
ETag: properties.ETag);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<BlobVersion?> GetBlobVersionAsync(OperationContext context, BlobClient client)
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await client.GetPropertiesAsync(cancellationToken: context.Token);
|
||||
return response.Value.LastAccessed.UtcDateTime;
|
||||
return BlobVersion.FromBlobProperties(response.Value!);
|
||||
}
|
||||
catch (RequestFailedException e) when (e.ErrorCode == BlobErrorCode.BlobNotFound)
|
||||
{
|
||||
|
@ -449,7 +517,7 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
}
|
||||
}
|
||||
|
||||
private DateTime GetLastAccesstimeDeletionThreshold(DateTime startTime)
|
||||
private DateTime GetDeletionThreshold(DateTime startTime)
|
||||
{
|
||||
var configuredThreshold = _clock.UtcNow.Add(-_lastAccessTimeDeletionThreshold);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ using BuildXL.Cache.ContentStore.Interfaces.Results;
|
|||
using BuildXL.Cache.ContentStore.Interfaces.Time;
|
||||
using BuildXL.Cache.ContentStore.Tracing;
|
||||
using BuildXL.Cache.ContentStore.Tracing.Internal;
|
||||
using BuildXL.Cache.ContentStore.Utils;
|
||||
using BuildXL.Cache.MemoizationStore.Interfaces.Sessions;
|
||||
using BuildXL.Cache.MemoizationStore.Stores;
|
||||
using BuildXL.Utilities;
|
||||
|
@ -285,6 +286,7 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
// TODO: consider using pooled memory streams.
|
||||
using var stream = new MemoryStream();
|
||||
|
||||
BlobQuotaKeeper.BlobVersion? version = null;
|
||||
try
|
||||
{
|
||||
var response = await blobClient.DownloadToAsync(stream, context.Token);
|
||||
|
@ -293,6 +295,8 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
{
|
||||
return new Result<ProcessContentHashListResult>($"Download of the content hash list failed with status code: {response.Status}");
|
||||
}
|
||||
|
||||
version = BlobQuotaKeeper.BlobVersion.FromBlobResponse(response);
|
||||
}
|
||||
catch (RequestFailedException e) when (e.ErrorCode == BlobErrorCode.BlobNotFound)
|
||||
{
|
||||
|
@ -308,6 +312,14 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
// We use "now" as last access time because we just downloaded the blob, and thus we just affected the CHL's blob last access time.
|
||||
// Adding a minute to account for clocks not being in sync. In the grand scheme of things, an error of one minute shouldn't make a difference.
|
||||
var lastAccessTime = clock.UtcNow.AddMinutes(1);
|
||||
if (version is not null)
|
||||
{
|
||||
version = version.Value with { LastAccessTimeUtc = version.Value.LastAccessTimeUtc.Max(lastAccessTime) };
|
||||
}
|
||||
else
|
||||
{
|
||||
version = BlobQuotaKeeper.BlobVersion.FromLastAccessTime(lastAccessTime);
|
||||
}
|
||||
|
||||
var processResult = await ProcessContentHashListAsync(context, blobName, blobLength, contentHashList, lastAccessTime, database, topology);
|
||||
|
||||
|
@ -319,7 +331,7 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
if (!processResult.Value)
|
||||
{
|
||||
// This signals that one of the referenced blobs does not exist. The CHL is invalid and should be deleted.
|
||||
var deleteResult = await BlobQuotaKeeper.DeleteBlobFromStorageAsync(context, blobClient, blobLength, lastAccessTime);
|
||||
var deleteResult = await BlobQuotaKeeper.DeleteBlobFromStorageAsync(context, blobClient, blobLength, version);
|
||||
if (!deleteResult.Succeeded)
|
||||
{
|
||||
return new Result<ProcessContentHashListResult>(deleteResult, "Failed to delete invalid content hash list.");
|
||||
|
@ -403,14 +415,5 @@ namespace BuildXL.Cache.BlobLifetimeManager.Library
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static DateTime GetLastAccessTime(
|
||||
BlobItem blob)
|
||||
{
|
||||
var offset = blob.Properties.LastAccessedOn ?? blob.Properties.CreatedOn;
|
||||
Contract.Assert(offset is not null);
|
||||
|
||||
return offset.Value.UtcDateTime;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -174,6 +174,7 @@ public static class AzureBlobStorageCacheFactory
|
|||
Topology = topology,
|
||||
StorageInteractionTimeout = configuration.StorageInteractionTimeout,
|
||||
Announcer = announcer,
|
||||
IsReadOnly = configuration.IsReadOnly,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -248,7 +248,7 @@ namespace BuildXL.Cache.MemoizationStore.Stores
|
|||
async strongFingerprintTask =>
|
||||
{
|
||||
var strongFingerprint = await strongFingerprintTask;
|
||||
return await _storageClientAdapter.TouchAsync(context, await GetStrongFingerprintClientAsync(context, strongFingerprint));
|
||||
return await _storageClientAdapter.TouchAsync(context, await GetStrongFingerprintClientAsync(context, strongFingerprint), hard: false);
|
||||
});
|
||||
|
||||
return (await TaskUtilities.SafeWhenAll(tasks)).And();
|
||||
|
|
Загрузка…
Ссылка в новой задаче