Merged PR 706413: Fix directory enumeration with legacy Win32 pattern "*.*"

The pattern "*.*" in directory enumeration means include all members.

Our observed input processor turns it into a regex pattern that exclude any member whose name has no '.'. This means we miss all file and directory members that don't have any extension in our fingerprint.

You can run `dir *.*` in your command prompt to test it. But if you run that in Powershell terminal, all names without extension are filtered out. But in this case, for Windows, we follow the semantics of `FindFirstFile` and `PathMatchSpec`.

Apparently some tools like `python` performs directory enumerations with this legacy pattern.
This commit is contained in:
Iman Narasamdya 2023-03-10 01:39:59 +00:00
Родитель f09d3ec7b3
Коммит cf6c7ec568
7 изменённых файлов: 98 добавлений и 17 удалений

Просмотреть файл

@ -1976,6 +1976,8 @@ namespace BuildXL.Scheduler.Fingerprints
case DirectoryEnumerationMode.FullGraph:
using (Counters.StartStopwatch(PipExecutorCounter.FullGraphDirectoryEnumerationsDuration))
{
Contract.Assert(rule == null, $"{DirectoryEnumerationMode.FullGraph} does not enumerate file system, so {nameof(rule)} must be null");
eventData.IsStatic = true;
result = m_env.State.DirectoryMembershipFingerprinter.TryComputeDirectoryFingerprint(
directoryPath,
@ -1991,8 +1993,9 @@ namespace BuildXL.Scheduler.Fingerprints
case DirectoryEnumerationMode.MinimalGraph:
using (Counters.StartStopwatch(PipExecutorCounter.MinimalGraphDirectoryEnumerationsDuration))
{
eventData.IsStatic = true;
Contract.Assert(rule == null, $"{DirectoryEnumerationMode.MinimalGraph} does not enumerate file system, so {nameof(rule)} must be null");
eventData.IsStatic = true;
result = m_env.State.DirectoryMembershipFingerprinter.TryComputeDirectoryFingerprint(
directoryPath,
process,
@ -2020,7 +2023,7 @@ namespace BuildXL.Scheduler.Fingerprints
break;
}
default:
Contract.Assume(enumerationMode == DirectoryEnumerationMode.RealFilesystem);
Contract.Assert(enumerationMode == DirectoryEnumerationMode.RealFilesystem);
var enumerateFunc = trackPathExistence ? (Func<EnumerationRequest, PathExistence?>) TryEnumerateAndTrackDirectoryWithFilesystem : TryEnumerateDirectoryWithFilesystem;
using (Counters.StartStopwatch(PipExecutorCounter.RealFilesystemDirectoryEnumerationsDuration))

Просмотреть файл

@ -14,13 +14,18 @@ namespace BuildXL.Scheduler
public class RegexDirectoryMembershipFilter : DirectoryMembershipFilter
{
/// <nodoc/>
public static readonly ObjectCache<string, RegexDirectoryMembershipFilter> RegexCache = new ObjectCache<string, RegexDirectoryMembershipFilter>(1000);
public static readonly ObjectCache<string, RegexDirectoryMembershipFilter> RegexCache = new(1000);
/// <summary>
/// Regex that allows all.
/// </summary>
public const string AllowAllRegex = "^.*$";
/// <summary>
/// Win32 legacy regex that allows all, i.e., '*.*';
/// </summary>
public const string Win32LegacyAllowAllRegex = @"^(.*\..*)$";
private readonly Regex m_regex;
/// <nodoc/>
@ -50,6 +55,12 @@ namespace BuildXL.Scheduler
return AllowAllRegex;
}
if (OperatingSystemHelper.IsWindowsOS && string.Equals(pattern, "*.*"))
{
// On Windows, the pattern "*.*" matches all (legacy behavior), so we simply returns the regex that matches all.
return AllowAllRegex;
}
sb.Append(isAdded ? "|" : string.Empty);
var regexStr = Regex.Escape(pattern).Replace(@"\*", ".*").Replace(@"\?", ".");
sb.Append("(" + regexStr + ")");
@ -76,7 +87,8 @@ namespace BuildXL.Scheduler
/// </summary>
public static DirectoryMembershipFilter Create(string enumeratePatternRegex)
{
if (enumeratePatternRegex == AllowAllRegex)
if (enumeratePatternRegex == AllowAllRegex
|| (OperatingSystemHelper.IsWindowsOS && enumeratePatternRegex == Win32LegacyAllowAllRegex))
{
// If the regex allows all, then returns an efficient AllowAllFilter.
return AllowAllFilter;

Просмотреть файл

@ -439,7 +439,7 @@ namespace Test.BuildXL.Scheduler
public TestObservation AddDirectoryEnumeration(string path, bool isSearchPath = false, string[] members = null, bool addDependency = true, string[] enumeratePatterns = null)
{
members = members ?? new[] { "test.txt" };
members ??= new[] { "test.txt" };
var root = Path(path);
var contents = members
.Select(m => root.Combine(Context.PathTable, RelativePath.Create(Context.StringTable, m)))
@ -458,7 +458,7 @@ namespace Test.BuildXL.Scheduler
}
var observation = TestObservation.ExpectDirectoryEnumeration(Path(path), CreateFakeContentHash(2));
observation.EnumeratePatternRegex = RegexDirectoryMembershipFilter.ConvertWildcardsToRegex(enumeratePatterns ?? new string[] { });
observation.EnumeratePatternRegex = RegexDirectoryMembershipFilter.ConvertWildcardsToRegex(enumeratePatterns ?? Array.Empty<string>());
observation.IsSearchPathEnumeration = isSearchPath;
AddEnumerableDirectory(observation.Path, new DirectoryFingerprint(observation.ExpectedHash));
m_observations.Add(observation);
@ -1160,6 +1160,42 @@ namespace Test.BuildXL.Scheduler
}
}
[Fact]
public void DirectoryEnumerationsWithLegacyAllowedAllFilters()
{
var harness = new Harness();
// Enumerations
var members = new[] { "foo.hpp", "bar", ".gitignore" };
var enumeration = harness.AddDirectoryEnumeration(A("X", "Dir1", ""), isSearchPath: false, members: members, enumeratePatterns: new[] { "*.*" });
harness.Process(ObservedInputProcessingStatus.Success, true);
var filter = harness.TrackedDirectoryFilters[enumeration.Path];
// On Windows, the pattern "*.*" matches all, so the filter would be the allow-all filter.
XAssert.AreEqual(OperatingSystemHelper.IsWindowsOS, filter == DirectoryMembershipFilter.AllowAllFilter);
// On non-Windows, the pattern "*.*" matches names that have '.', and so the pattern is turned into a regex filter.
if (!OperatingSystemHelper.IsWindowsOS)
{
Assert.IsType(typeof(RegexDirectoryMembershipFilter), filter);
}
var stringTable = harness.Context.StringTable;
var includedMembers = members.Where(m => filter.Include(PathAtom.Create(stringTable, m), m));
if (OperatingSystemHelper.IsWindowsOS)
{
// On Windows, the pattern "*.*" matches all, so all members will be included.
XAssert.SetEqual(members, includedMembers, OperatingSystemHelper.PathComparer);
}
else
{
// On non-Windows, only members whose names have '.' will be included.
XAssert.SetEqual(new[] { "foo.hpp", ".gitignore" }, includedMembers, OperatingSystemHelper.PathComparer);
}
}
[Fact]
public void DirectoryEnumerationsWithWhitespaceFilename()
{

Просмотреть файл

@ -235,7 +235,7 @@ namespace BuildXL.FrontEnd.Sdk.FileSystem
// in test code, with the assumption being that no other wildcards are specified in the pattern.
private static Regex CreateRegexFromPattern(string pattern)
{
if (pattern == "*.*")
if (pattern == "*.*" && OperatingSystemHelper.IsWindowsOS)
{
pattern = "*";
}

Просмотреть файл

@ -97,7 +97,7 @@ namespace Test.BuildXL.FrontEnd.Nuget
var generatedContent = result.Result.GetParent(pathTable).ToString(pathTable);
var expectedGeneratedSpecCount = 2;
var expectedGeneratedMetataFileCount = 1;
XAssert.AreEqual(expectedGeneratedSpecCount + expectedGeneratedMetataFileCount, Directory.EnumerateFiles(generatedContent, "*.*", SearchOption.AllDirectories).Count());
XAssert.AreEqual(expectedGeneratedSpecCount + expectedGeneratedMetataFileCount, Directory.EnumerateFiles(generatedContent, "*", SearchOption.AllDirectories).Count());
// package file is tested already
var packageDsc = Path.Combine(generatedContent, "package.dsc");

Просмотреть файл

@ -9,6 +9,7 @@ using Test.BuildXL.TestUtilities.Xunit;
using Test.BuildXL.FrontEnd.Core;
using Xunit;
using Xunit.Abstractions;
using System.Collections.Generic;
namespace Test.DScript.Ast.Interpretation
{
@ -24,38 +25,68 @@ namespace Test.DScript.Ast.Interpretation
[InlineData(@"glob(d`.`, '*.txt')", "a.txt", "b.txt")]
[InlineData(@"glob(d`.`, 'a.*')", "a.cs", "a.txt")]
[InlineData(@"glob(d`.`, '*')", "a.cs", "a.txt", "b.cs", "b.txt", "other", "project.dsc")]
[InlineData(@"glob(d`.`, '*.*')", "a.cs", "a.txt", "b.cs", "b.txt", "other", "project.dsc")]
private void GlobCurrentFolder(string globFunction, params string[] expectedPaths)
{
TestGlob(globFunction, expectedPaths);
}
[Fact]
private void GlobCurrentFolderWithWinLegacyPattern()
{
// On Windows, the legacy pattern "*.*" matches all.
TestGlob(
@"glob(d`.`, '*.*')",
OperatingSystemHelper.IsWindowsOS
? new []{ "a.cs", "a.txt", "b.cs", "b.txt", "other", "project.dsc" }
: new[] { "a.cs", "a.txt", "b.cs", "b.txt", "project.dsc" });
}
[Theory]
[InlineData(@"glob(d`f1`, 'a.txt')", @"f1\a.txt")]
[InlineData(@"glob(d`f1`, '*.txt')", @"f1\a.txt", @"f1\b.txt")]
[InlineData(@"glob(d`f1`, 'a.*')", @"f1\a.cs", @"f1\a.txt")]
[InlineData(@"glob(d`f1`, '*')", @"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt", @"f1\other")]
[InlineData(@"glob(d`f1`, '*.*')", @"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt", @"f1\other")]
private void GlobF1Folder(string globFunction, params string[] expectedPaths)
{
TestGlob(globFunction, expectedPaths);
}
[Fact]
private void GlobF1FolderWithWinLegacyPattern()
{
// On Windows, the legacy pattern "*.*" matches all.
TestGlob(
@"glob(d`f1`, '*.*')",
OperatingSystemHelper.IsWindowsOS
? new[] { @"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt", @"f1\other" }
: new[] { @"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt" });
}
[Theory]
[InlineData(@"glob(d`.`, '*\\a.txt')", @"f1\a.txt", @"f2\a.txt")]
[InlineData(@"glob(d`.`, '*/*.txt')", @"f1\a.txt", @"f1\b.txt", @"f2\a.txt", @"f2\b.txt")]
[InlineData(@"glob(d`.`, '*\\a.*')", @"f1\a.cs", @"f1\a.txt", @"f2\a.cs", @"f2\a.txt")]
[InlineData(@"glob(d`.`, '*/other')", @"f1\other", @"f3\other", @"f4\other")]
[InlineData(@"glob(d`.`, '*\\*.*')",
@"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt", @"f1\other",
@"f2\a.cs", @"f2\a.txt", @"f2\b.cs", @"f2\b.txt",
@"f3\other",
@"f4\other")]
private void GlobSkippingFolder(string globFunction, params string[] expectedPaths)
{
TestGlob(globFunction, expectedPaths);
}
[Fact]
private void GlobSkippingFolderWithWinLegacyPattern()
{
// On Windows, the legacy pattern "*.*" matches all.
TestGlob(
@"glob(d`.`, '*\\*.*')",
OperatingSystemHelper.IsWindowsOS
? new[] { @"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt", @"f1\other",
@"f2\a.cs", @"f2\a.txt", @"f2\b.cs", @"f2\b.txt",
@"f3\other", @"f4\other" }
: new[] { @"f1\a.cs", @"f1\a.txt", @"f1\b.cs", @"f1\b.txt",
@"f2\a.cs", @"f2\a.txt", @"f2\b.cs", @"f2\b.txt" });
}
[Theory]
[InlineData(@"globFolders(d`.`, 'f*')", @"f1", @"f2", @"f3", @"f4")]
[InlineData(@"globFolders(d`.`, '*')", @"f1", @"f2", @"f3", @"f4", @"x.cs")]

Просмотреть файл

@ -67,8 +67,7 @@ namespace BuildXL.Native.IO.Unix
var matchEverythingRegex = TranslatePattern("*");
m_patternRegexes = new ConcurrentDictionary<string, Regex>
{
[ "*" ] = matchEverythingRegex,
[ "*.*" ] = matchEverythingRegex // legacy Win32 behavior
[ "*" ] = matchEverythingRegex
};
}