Merged PR 806537: Exclude SealDirectory pips in ID filter results

SealDirectory pips take part in filtering. This makes sense when using filters that target paths. But it falls apart when using filters that target other properties like a pipid.

The related bug demonstrates this well. Consider a graph with 2 process pips that each produce a SealDirectory:
P0 -> SD0
P1 -> SD1
A pip id filter of `id='p0'` will target p0 as expected. But the behavior prior to this change would be a negated id filter of `~(id='p0')` would match on p1, sd0, and sd1. But since p0 is a dependency of sd0, p0 would actually end up getting scheduled as well.

This change scopes the id filter to Process, CopyFile, and WriteFile pips. Due to the way negation is implemented with being pushed down in to the filter itself, this change makes the behavior of `~(id='p0')` only match on p1, as expected.

It isn't perfect though. Fundamentally, since SealDirectories are included in filtering, other filter combinations may end up including them back again.

Related work items: #2213617
This commit is contained in:
Michael Pysson 2024-09-26 00:41:25 +00:00
Родитель ced9bedb2b
Коммит 94104cd2ef
4 изменённых файлов: 79 добавлений и 4 удалений

Просмотреть файл

@ -153,7 +153,7 @@ binaryOperator = "and" | "or" ;
filterFunction = "~" | (*negation*)
"dpt" | (*all transitive dependents*)
"dpc" ; (*all transitive dependencies*)
filterType = "id" | (*Unique identifier given to a pip*)
filterType = "id" | (*Unique identifier given to a Process, WriteFile, or CopyFile pip*)
"input" | (*Any input file of the pip*)
"output" | (*Any outputut file of the pip*)
"spec" | (*The spec file producing the pip*)

Просмотреть файл

@ -358,6 +358,64 @@ namespace Test.BuildXL.Scheduler
AssertSetEquals(expectedDependentsOutputs, actualDependentsOutputs);
}
[Fact]
public void TestOpaqueProducersAndNegatedIdFilter()
{
// p0 -> opaque0
// p1 -> opaque1
// p1 -> opaque2
var opaque0 = CreateUniqueObjPath("o0");
var opaque1 = CreateUniqueObjPath("o1");
var opaque2 = CreateUniqueObjPath("o2");
Process p0 = CreateAndScheduleProcess(
dependencies: [CreateSourceFile()],
outputDirectoryPaths: [opaque0],
outputs: [CreateOutputFileArtifact()],
provenance: CreateProvenance());
Process p1 = CreateAndScheduleProcess(
dependencies: [CreateSourceFile()],
outputDirectoryPaths: [opaque1],
outputs: [CreateOutputFileArtifact()],
provenance: CreateProvenance());
Process p2 = CreateAndScheduleProcess(
dependencies: [CreateSourceFile()],
outputDirectoryPaths: [opaque2],
outputs: [CreateOutputFileArtifact()],
provenance: CreateProvenance());
var graph = PipGraphBuilder.Build();
// Negation filter for !p0. This should match opaque1 and opaque2
AssertFilteredOutputs(graph, $"~(id='{p0.FormattedSemiStableHash}')", expectedMatching: [opaque1, opaque2], expectedNotMatching: [opaque0]);
// These filters should be equivalent and only match on opaque2
AssertFilteredOutputs(graph, $"~(id='{p0.FormattedSemiStableHash}' or id='{p1.FormattedSemiStableHash}')", expectedMatching: [opaque2], expectedNotMatching: [opaque0, opaque1]);
AssertFilteredOutputs(graph, $"~(id='{p0.FormattedSemiStableHash}') and ~(id='{p1.FormattedSemiStableHash}')", expectedMatching: [opaque2], expectedNotMatching: [opaque0, opaque1]);
}
private void AssertFilteredOutputs(PipGraph graph, string filter, AbsolutePath[] expectedMatching, AbsolutePath[] expectedNotMatching)
{
FilterParser parser = new FilterParser(Context, getPathByMountName, filter);
Assert.True(parser.TryParse(out var rootFilter, out var pathFilter));
var outputs = graph.FilterOutputs(rootFilter);
foreach (var match in expectedMatching)
{
Assert.True(outputs.Contains(new DirectoryArtifact(match, 0)), $"Expected to find {match.ToString(Context.PathTable)}");
}
foreach (var noMatch in expectedNotMatching)
{
Assert.False(outputs.Contains(new DirectoryArtifact(noMatch, 0)), $"Did not expect to find {noMatch.ToString(Context.PathTable)}");
}
static bool getPathByMountName(string mountName, out AbsolutePath path)
{
path = AbsolutePath.Invalid;
return false;
}
}
[Fact]
public void TestDependenciesFilter()
{

Просмотреть файл

@ -169,6 +169,8 @@ namespace Test.BuildXL.Scheduler
protected static long GetNextPipId() { return Interlocked.Increment(ref s_pipIdCounter); }
private static long s_semistableHashCounter = 0;
/// <summary>
/// The pip graph builder.
/// </summary>
@ -317,6 +319,7 @@ namespace Test.BuildXL.Scheduler
{
BaseSetup(configuration);
m_pipConstructionHelper = null;
s_semistableHashCounter = 0;
}
protected PipTestBase(ITestOutputHelper output) : base(output)
@ -814,7 +817,7 @@ namespace Test.BuildXL.Scheduler
var moduleNameId = StringId.Create(context.StringTable, moduleName);
var provenance = new PipProvenance(
0,
Interlocked.Increment(ref s_semistableHashCounter),
ModuleId.Create(moduleNameId),
moduleNameId,
FullSymbol.Create(context.SymbolTable, valueName),

Просмотреть файл

@ -51,11 +51,25 @@ namespace BuildXL.Pips.Filter
return ParallelProcessAllOutputs<FileOrDirectoryArtifact>(
context,
(pipId, localOutputs) =>
{
// Only allow this filter to match on a few specific pip types. This prevents some unintended
// consequences like negated pip id filters matching all sealed directory pips.
//
// Note: this does not completely eliminate sealed directory pips from slipping into filtering since
// other filter groups may end up including them. This special case primarily guards against unintended
// pip inclusion via sealed directory pips in simple filters like ~(id='pip12345'), which could end
// up including pip12345 anyway since that filter would still match on an opaque directory produced
// by that pip.
var pipType = context.GetPipType(pipId);
if (pipType == Operations.PipType.Process ||
pipType == Operations.PipType.WriteFile ||
pipType == Operations.PipType.CopyFile)
{
if ((context.GetSemiStableHash(pipId) == m_semiStableHash) ^ negate)
{
AddOutputs(context, pipId, localOutputs);
}
}
},
constrainingPips);
}