Merged PR 764740: Avoid unnecessary rehashing in ephemeral layer

We need to always hash files when we're uploading to ensure the `ContentHash` matches that we're supposed to be uploading. However, in most code paths we're already hashing the files when inserting into the local, so we've already done a check to ensure the file hash matches. In such cases, we can avoid hashing the file twice.
This commit is contained in:
Julian Bayardo 2024-02-01 21:54:39 +00:00
Родитель b542074324
Коммит 69a854a55e
3 изменённых файлов: 59 добавлений и 21 удалений

Просмотреть файл

@ -30,8 +30,8 @@ public class EphemeralContentSession : ContentSessionBase
{
protected override Tracer Tracer { get; } = new(nameof(EphemeralContentSession));
private readonly IContentSession _local;
private readonly IContentSession _persistent;
private readonly ITrustedContentSession _local;
private readonly ITrustedContentSession _persistent;
private readonly EphemeralHost _ephemeralHost;
private readonly IDistributedContentCopierHost2 _contentCopierAdapter;
@ -55,11 +55,7 @@ public class EphemeralContentSession : ContentSessionBase
}
}
// TODO: when we confirm existence (or lack of) of content in the persistent session, it'd be ideal to add that
// fact to the ephemeral cache as a "permanent fact". This would allow us to avoid the existence check in the
// future.
public EphemeralContentSession(string name, IContentSession local, IContentSession persistent, EphemeralHost ephemeralHost)
public EphemeralContentSession(string name, ITrustedContentSession local, ITrustedContentSession persistent, EphemeralHost ephemeralHost)
: base(name, counterTracker: null)
{
_local = local;
@ -112,9 +108,6 @@ public class EphemeralContentSession : ContentSessionBase
}
}
var session = _local as ITrustedContentSession;
Contract.AssertNotNull(session, "The local content session was expected to be a trusted session, but failed to cast.");
var tempLocation = AbsolutePath.CreateRandomFileName(_ephemeralHost.Configuration.Workspace);
var persistent = await _persistent.PlaceFileAsync(
context,
@ -126,7 +119,7 @@ public class EphemeralContentSession : ContentSessionBase
context.Token,
urgencyHint).ThrowIfFailureAsync();
await session.PutTrustedFileAsync(
await _local.PutTrustedFileAsync(
context,
new ContentHashWithSize(contentHash, persistent.FileSize),
tempLocation,
@ -224,7 +217,8 @@ public class EphemeralContentSession : ContentSessionBase
FileRealizationMode.CopyNoVerify => FileRealizationMode.Copy,
_ => throw new ArgumentOutOfRangeException(nameof(realizationMode), realizationMode, null)
};
await _local.PutFileAsync(context, contentHash, path, putRealizationMode, context.Token, urgencyHint).IgnoreFailure();
await _local.PutTrustedFileAsync(context, new ContentHashWithSize(contentHash, persistent.FileSize), path, putRealizationMode, context.Token, urgencyHint).IgnoreFailure();
}
return persistent.WithMaterializationSource(PlaceFileResult.Source.BackingStore);
@ -300,9 +294,7 @@ public class EphemeralContentSession : ContentSessionBase
copyInfo =>
{
var (copyResult, tempLocation, attemptCount) = copyInfo;
var local = _local as ITrustedContentSession;
Contract.AssertNotNull(local, "The local content session was expected to be a trusted session, but failed to cast.");
return local.PutTrustedFileAsync(context, new ContentHashWithSize(contentHash, contentEntry.Size), tempLocation, FileRealizationMode.Move, context.Token, urgencyHint);
return _local.PutTrustedFileAsync(context, new ContentHashWithSize(contentHash, contentEntry.Size), tempLocation, FileRealizationMode.Move, context.Token, urgencyHint);
},
CopyCompression.None,
null,
@ -372,7 +364,17 @@ public class EphemeralContentSession : ContentSessionBase
return new PutResult(local.ContentHash, elision.Size, contentAlreadyExistsInCache: true);
}
return await _persistent.PutFileAsync(context, hashType, path, realizationMode, context.Token, urgencyHint);
if (local.Succeeded)
{
// If the insertion into the local cache succeeded, we know the true hash of the file matches what the
// caller provided. We can use this to avoid a rehashing in the persistent cache.
return await _persistent.PutTrustedFileAsync(context, new ContentHashWithSize(local.ContentHash, local.ContentSize), path, realizationMode, context.Token, urgencyHint);
}
else
{
return await _persistent.PutFileAsync(context, local.ContentHash, path, realizationMode, context.Token, urgencyHint);
}
}
protected override async Task<PutResult> PutFileCoreAsync(
@ -416,7 +418,16 @@ public class EphemeralContentSession : ContentSessionBase
return new PutResult(local.ContentHash, elision.Size, contentAlreadyExistsInCache: true);
}
return await _persistent.PutFileAsync(context, local.ContentHash, path, realizationMode, context.Token, urgencyHint);
if (local.Succeeded)
{
// If the insertion into the local cache succeeded, we know the true hash of the file matches what the
// caller provided. We can use this to avoid a rehashing in the persistent cache.
return await _persistent.PutTrustedFileAsync(context, new ContentHashWithSize(contentHash, local.ContentSize), path, realizationMode, context.Token, urgencyHint);
}
else
{
return await _persistent.PutFileAsync(context, local.ContentHash, path, realizationMode, context.Token, urgencyHint);
}
}
protected override async Task<PutResult> PutStreamCoreAsync(OperationContext context, HashType hashType, Stream stream, UrgencyHint urgencyHint, Counter retryCounter)

Просмотреть файл

@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.ContractsLight;
using System.Threading.Tasks;
using BuildXL.Cache.ContentStore.Distributed.NuCache;
using BuildXL.Cache.ContentStore.Distributed.Stores;
@ -14,6 +15,7 @@ using BuildXL.Cache.ContentStore.Interfaces.Stores;
using BuildXL.Cache.ContentStore.Interfaces.Time;
using BuildXL.Cache.ContentStore.Interfaces.Tracing;
using BuildXL.Cache.ContentStore.Service.Grpc;
using BuildXL.Cache.ContentStore.Sessions.Internal;
using BuildXL.Cache.ContentStore.Synchronization;
using BuildXL.Cache.ContentStore.Tracing;
using BuildXL.Cache.ContentStore.Tracing.Internal;
@ -194,9 +196,11 @@ public class EphemeralContentStore : StartupShutdownComponentBase, IContentStore
{
// We never enable implicit pinning for the local cache, because the local cache is expected to be ephemeral
// and very small.
var localResult = _local.CreateSession(context, $"EphemeralCache({name}/Local)", ImplicitPin.None).ThrowIfFailure();
var remoteResult = _persistent.CreateSession(context, $"EphemeralCache({name}/Persistent)", implicitPin).ThrowIfFailure();
return new CreateSessionResult<IContentSession>(new EphemeralContentSession($"EphemeralCache({name}/Datacenter)", localResult.Session!, remoteResult.Session!, _ephemeralHost));
var localResult = _local.CreateSession(context, $"EphemeralCache({name}/Local)", ImplicitPin.None).ThrowIfFailure().Session as ITrustedContentSession;
Contract.AssertNotNull(localResult, $"{nameof(localResult)} is supposed to implement {nameof(ITrustedContentSession)}");
var remoteResult = _persistent.CreateSession(context, $"EphemeralCache({name}/Persistent)", implicitPin).ThrowIfFailure().Session as ITrustedContentSession;
Contract.AssertNotNull(remoteResult, $"{nameof(remoteResult)} is supposed to implement {nameof(ITrustedContentSession)}");
return new CreateSessionResult<IContentSession>(new EphemeralContentSession($"EphemeralCache({name}/Datacenter)", localResult, remoteResult, _ephemeralHost));
}
public async Task<GetStatsResult> GetStatsAsync(Context context)

Просмотреть файл

@ -17,6 +17,7 @@ using BuildXL.Cache.ContentStore.Interfaces.Sessions;
using BuildXL.Cache.ContentStore.Interfaces.Stores;
using BuildXL.Cache.ContentStore.Interfaces.Tracing;
using BuildXL.Cache.ContentStore.Interfaces.Utils;
using BuildXL.Cache.ContentStore.Sessions.Internal;
using BuildXL.Cache.MemoizationStore.Interfaces.Results;
using BuildXL.Cache.MemoizationStore.Sessions;
@ -25,7 +26,7 @@ using BuildXL.Cache.MemoizationStore.Sessions;
namespace BuildXL.Cache.MemoizationStore.Interfaces.Sessions
{
/// <nodoc />
public class OneLevelCacheSession : ICacheSessionWithLevelSelectors, IHibernateCacheSession
public class OneLevelCacheSession : ICacheSessionWithLevelSelectors, IHibernateCacheSession, ITrustedContentSession
{
/// <summary>
/// Auto-pinning behavior configuration.
@ -485,5 +486,27 @@ namespace BuildXL.Cache.MemoizationStore.Interfaces.Sessions
{
return ContentSession.PutStreamAsync(context, contentHash, stream, cts, urgencyHint);
}
/// <inheritdoc />
public Task<PutResult> PutTrustedFileAsync(Context context, ContentHashWithSize contentHashWithSize, AbsolutePath path, FileRealizationMode realizationMode, CancellationToken cts, UrgencyHint urgencyHint)
{
if (ContentSession is ITrustedContentSession session)
{
return session.PutTrustedFileAsync(context, contentHashWithSize, path, realizationMode, cts, urgencyHint);
}
throw new NotImplementedException();
}
/// <inheritdoc />
public AbsolutePath? TryGetWorkingDirectory(AbsolutePath? pathHint)
{
if (ContentSession is ITrustedContentSession session)
{
return session.TryGetWorkingDirectory(pathHint);
}
throw new NotImplementedException();
}
}
}