Pauldorsch/reconcile dependency graph logic (#1183)

* reconcile dependency graph logic

* handle null / empty conditional vars

* remove files

* current pip detector case insensitive metadata file match

* some cleanup

* fix tests

* test with reverted reqs

* Revert "test with reverted reqs"

This reverts commit 293a4b53cc.

* disable parallelism for all but pip report

* whitespace

* pr feedback, fix ignore packages, bump versions
This commit is contained in:
Paul Dorsch 2024-06-21 13:25:42 -07:00 коммит произвёл GitHub
Родитель 2284e06a29
Коммит c20c3b0f56
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
13 изменённых файлов: 185 добавлений и 22 удалений

Просмотреть файл

@ -1,4 +1,4 @@
namespace Microsoft.ComponentDetection.Contracts;
namespace Microsoft.ComponentDetection.Contracts;
using System;
using System.Collections.Generic;
@ -65,6 +65,8 @@ public abstract class FileComponentDetector : IComponentDetector
protected IObservable<IComponentStream> ComponentStreams { get; private set; }
protected virtual bool EnableParallelism { get; set; }
/// <inheritdoc />
public async virtual Task<IndividualDetectorScanResult> ExecuteDetectorAsync(ScanRequest request, CancellationToken cancellationToken = default)
{
@ -113,7 +115,7 @@ public abstract class FileComponentDetector : IComponentDetector
new ExecutionDataflowBlockOptions
{
// MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in
MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, maxThreads),
MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
});
var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs);

Просмотреть файл

@ -55,7 +55,8 @@ public class ScanRequest
public IComponentRecorder ComponentRecorder { get; private set; }
/// <summary>
/// Gets the maximum number of threads to use in parallel for executing the detection.
/// Gets the maximum number of threads to use in parallel for executing the detection, assuming parallelism is
/// enabled for the detector.
/// </summary>
public int MaxThreads { get; private set; }
}

Просмотреть файл

@ -11,4 +11,10 @@ public interface IPythonCommandService
Task<IList<(string PackageString, GitComponent Component)>> ParseFileAsync(string path, string pythonPath = null);
Task<string> GetPythonVersionAsync(string pythonPath = null);
/// <summary>
/// Gets the os type using: https://docs.python.org/3/library/sys.html#sys.platform .
/// </summary>
/// <returns>OS type where the python script runs.</returns>
Task<string> GetOsTypeAsync(string pythonPath = null);
}

Просмотреть файл

@ -1,5 +1,6 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
@ -24,7 +25,7 @@ public class PipDependencySpecification
/// <summary>
/// These are packages that we don't want to evaluate in our graph as they are generally python builtins.
/// </summary>
private static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
public static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
{
"-markerlib",
"pip",
@ -154,7 +155,7 @@ public class PipDependencySpecification
var conditionalVar = conditionalMatch.Groups[2].Value;
var conditionalOperator = conditionalMatch.Groups[3].Value;
var conditionalValue = conditionalMatch.Groups[4].Value;
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar))
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar) || string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]))
{
continue; // If the variable isn't in the environment, we can't evaluate it.
}
@ -175,7 +176,7 @@ public class PipDependencySpecification
else if (string.Equals(conditionalVar, "sys_platform", System.StringComparison.OrdinalIgnoreCase))
{
// if the platform is not windows or linux (empty string in env var), allow the package to be added. Otherwise, ensure it matches the python condition
conditionMet = string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]) || string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
}
else
{
@ -195,4 +196,17 @@ public class PipDependencySpecification
return conditionsMet;
}
/// <summary>
/// Common method that can be used to determine whether this package is a valid parent
/// package of another package. Note that this logic is not perfect, it does not
/// respect all of the environment identifiers, nor does it correctly handle extras (it ignores
/// them).
/// </summary>
/// <param name="pythonEnvironmentVariables">List of environment variables used to evaluate the environmant conditions, such as OS this is executing on.</param>
/// <returns>Whether or not this package is valid as a parent package.</returns>
public bool IsValidParentPackage(Dictionary<string, string> pythonEnvironmentVariables) =>
!this.PackageIsUnsafe()
&& this.PackageConditionsMet(pythonEnvironmentVariables)
&& !this.ConditionalDependencySpecifiers.Any(s => s.Contains("extra ==", StringComparison.OrdinalIgnoreCase));
}

Просмотреть файл

@ -99,7 +99,11 @@ public sealed class PyPiClient : IPyPiClient, IDisposable
var package = new ZipArchive(await response.Content.ReadAsStreamAsync());
var entry = package.GetEntry($"{name.Replace('-', '_')}-{version}.dist-info/METADATA");
var entryName = $"{name.Replace('-', '_')}-{version}.dist-info/METADATA";
// first try case insensitive dicitonary lookup O(1), then attempt case-insensitive match O(entries)
var entry = package.GetEntry(entryName)
?? package.Entries.FirstOrDefault(x => string.Equals(x.FullName, entryName, StringComparison.OrdinalIgnoreCase));
// If there is no metadata file, the package doesn't have any declared dependencies
if (entry == null)

Просмотреть файл

@ -4,7 +4,6 @@ using System;
using System.Collections.Generic;
using System.Linq;
using System.Reactive.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
@ -39,7 +38,7 @@ public class PipComponentDetector : FileComponentDetector
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
public override int Version { get; } = 9;
public override int Version { get; } = 10;
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
@ -55,11 +54,7 @@ public class PipComponentDetector : FileComponentDetector
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);
var pythonPlatformString = RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
? "win32"
: RuntimeInformation.IsOSPlatform(OSPlatform.Linux)
? "linux"
: string.Empty;
var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
}

Просмотреть файл

@ -30,18 +30,24 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
private readonly IPipCommandService pipCommandService;
private readonly IEnvironmentVariableService envVarService;
private readonly IPythonCommandService pythonCommandService;
private readonly IPythonResolver pythonResolver;
public PipReportComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
IPipCommandService pipCommandService,
IEnvironmentVariableService envVarService,
IPythonCommandService pythonCommandService,
IPythonResolver pythonResolver,
ILogger<PipReportComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.pipCommandService = pipCommandService;
this.envVarService = envVarService;
this.pythonCommandService = pythonCommandService;
this.pythonResolver = pythonResolver;
this.Logger = logger;
}
@ -53,7 +59,9 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
public override int Version { get; } = 2;
public override int Version { get; } = 3;
protected override bool EnableParallelism { get; set; } = true;
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
@ -75,6 +83,22 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
return Enumerable.Empty<ProcessRequest>().ToObservable();
}
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
{
this.Logger.LogInformation($"No python found on system. Python detection will not run.");
return Enumerable.Empty<ProcessRequest>().ToObservable();
}
else
{
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);
var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
}
return processRequests;
}
@ -169,11 +193,16 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
// graph ourselves using the requires_dist field.
var dependenciesByPkg = new Dictionary<string, List<PipDependencySpecification>>(StringComparer.OrdinalIgnoreCase);
var nodeReferences = new Dictionary<string, PipReportGraphNode>(StringComparer.OrdinalIgnoreCase);
var pythonEnvVars = this.pythonResolver.GetPythonEnvironmentVariables();
foreach (var package in report.InstallItems)
{
// Normalize the package name to ensure consistency between the package name and the graph nodes.
var normalizedPkgName = PipReportUtilities.NormalizePackageNameFormat(package.Metadata.Name);
if (PipDependencySpecification.PackagesToIgnore.Contains(normalizedPkgName))
{
continue;
}
var node = new PipReportGraphNode(
new PipComponent(
@ -200,7 +229,7 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
// futures; python_version <= \"2.7\"
// sphinx (!=1.8.0,!=3.1.0,!=3.1.1,>=1.6.5) ; extra == 'docs'
var dependencySpec = new PipDependencySpecification($"Requires-Dist: {dependency}", requiresDist: true);
if (dependencySpec.PackageIsUnsafe())
if (!dependencySpec.IsValidParentPackage(pythonEnvVars))
{
continue;
}

Просмотреть файл

@ -181,4 +181,11 @@ public class PythonCommandService : IPythonCommandService
var match = version.Match(versionResult.StdOut);
return match.Success ? match.Groups[1].Value : null;
}
public async Task<string> GetOsTypeAsync(string pythonPath)
{
var pythonCommand = await this.ResolvePythonAsync(pythonPath);
var versionResult = await this.commandLineInvocationService.ExecuteCommandAsync(pythonCommand, new List<string> { "python3", "python2" }, "-c", "\"import sys; print(sys.platform);\"");
return versionResult.ExitCode == 0 && string.IsNullOrEmpty(versionResult.StdErr) ? versionResult.StdOut.Trim() : null;
}
}

Просмотреть файл

@ -85,7 +85,7 @@ public class PythonResolver : PythonResolverBase, IPythonResolver
var (root, currentNode) = state.ProcessingQueue.Dequeue();
// gather all dependencies for the current node
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => !x.PackageIsUnsafe()).Where(x => x.PackageConditionsMet(this.pythonEnvironmentVariables)).ToList();
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => x.IsValidParentPackage(this.pythonEnvironmentVariables)).ToList();
foreach (var dependencyNode in dependencies)
{

Просмотреть файл

@ -139,4 +139,26 @@ public class PipDependencySpecifierTests
VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
}
[TestMethod]
public void TestPipDependencyRequireDistConditionalDependenciesMet_Empty()
{
var specs = new List<(string, bool, PipDependencySpecification)>
{
("Requires-Dist: TestPackage (>=2.0.1) ; python_version == \"3.8\" and sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=2.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.8\"", "and sys_platform == \"linux\"" } }),
("Requires-Dist: TestPackage (>=4.0.1) ; python_version == \"3.6\" and sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=4.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.6\"", "and sys_platform == \"win32\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"linux\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"win32\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"asdf\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"asdf\"" } }),
};
// test null and empty cases should allow packages through
var pythonEnvironmentVariables = new Dictionary<string, string>
{
{ "python_version", null },
{ "sys_platform", string.Empty },
};
VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
}
}

Просмотреть файл

@ -21,6 +21,8 @@ using Newtonsoft.Json;
public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportComponentDetector>
{
private readonly Mock<IPipCommandService> pipCommandService;
private readonly Mock<IPythonCommandService> pythonCommandService;
private readonly Mock<IPythonResolver> pythonResolver;
private readonly Mock<IEnvironmentVariableService> mockEnvVarService;
private readonly Mock<ILogger<PipReportComponentDetector>> mockLogger;
@ -35,6 +37,14 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
this.pipCommandService = new Mock<IPipCommandService>();
this.DetectorTestUtility.AddServiceMock(this.pipCommandService);
this.pythonCommandService = new Mock<IPythonCommandService>();
this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny<string>())).ReturnsAsync(true);
this.DetectorTestUtility.AddServiceMock(this.pythonCommandService);
this.pythonResolver = new Mock<IPythonResolver>();
this.pythonResolver.Setup(x => x.GetPythonEnvironmentVariables()).Returns(new Dictionary<string, string>());
this.DetectorTestUtility.AddServiceMock(this.pythonResolver);
this.mockLogger = new Mock<ILogger<PipReportComponentDetector>>();
this.DetectorTestUtility.AddServiceMock(this.mockLogger);
@ -434,11 +444,10 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
var jupyterGraph = graphsByLocations[file1];
var jupyterLabDependencies = jupyterGraph.GetDependenciesForComponent(jupyterComponent.Id);
jupyterLabDependencies.Should().HaveCount(15);
jupyterLabDependencies.Should().HaveCount(12);
jupyterLabDependencies.Should().Contain("async-lru 2.0.4 - pip");
jupyterLabDependencies.Should().Contain("jupyter-server 2.14.0 - pip");
jupyterLabDependencies.Should().Contain("traitlets 5.14.3 - pip");
jupyterLabDependencies.Should().Contain("requests 2.32.2 - pip");
jupyterLabDependencies.Should().Contain("jupyter-lsp 2.2.5 - pip");
var bleachComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("bleach")).Component as PipComponent;
@ -447,10 +456,9 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
bleachComponent.License.Should().Be("Apache Software License");
var bleachDependencies = jupyterGraph.GetDependenciesForComponent(bleachComponent.Id);
bleachDependencies.Should().HaveCount(3);
bleachDependencies.Should().HaveCount(2);
bleachDependencies.Should().Contain("six 1.16.0 - pip");
bleachDependencies.Should().Contain("webencodings 0.5.1 - pip");
bleachDependencies.Should().Contain("tinycss2 1.3.0 - pip");
ComponentRecorderTestUtilities.CheckChild<PipComponent>(
componentRecorder,

Просмотреть файл

@ -0,0 +1,75 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --strip-extras
#
azure-core==1.30.0
# via
# azure-identity
# msrest
azure-devops==7.1.0b4
# via -r requirements.in
azure-identity==1.16.0b1
# via -r requirements.in
certifi==2024.2.2
# via
# msrest
# requests
cffi==1.16.0
# via cryptography
charset-normalizer==3.3.2
# via requests
cryptography==42.0.4
# via
# azure-identity
# msal
# pyjwt
gitdb==4.0.11
# via gitpython
gitpython==3.1.42
# via -r requirements.in
idna==3.6
# via requests
isodate==0.6.1
# via msrest
msal==1.27.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.1.0
# via azure-identity
msrest==0.7.1
# via
# -r requirements.in
# azure-devops
oauthlib==3.2.2
# via requests-oauthlib
packaging==23.2
# via msal-extensions
portalocker==2.8.2
# via msal-extensions
pycparser==2.21
# via cffi
pyjwt==2.8.0
# via
# msal
# pyjwt
requests==2.31.0
# via
# azure-core
# msal
# msrest
# requests-oauthlib
requests-oauthlib==1.3.1
# via msrest
six==1.16.0
# via
# azure-core
# isodate
smmap==5.0.1
# via gitdb
typing-extensions==4.9.0
# via azure-core
urllib3==2.2.1
# via requests

Просмотреть файл

@ -1 +1 @@
requests [security] >= 2.32.3, == 2.32.* ; python_version > "2.7"
requests [socks] >= 2.32.3, == 2.32.* ; python_version > "2.7"