Pauldorsch/reconcile dependency graph logic (#1183)
* reconcile dependency graph logic
* handle null / empty conditional vars
* remove files
* current pip detector case insensitive metadata file match
* some cleanup
* fix tests
* test with reverted reqs
* Revert "test with reverted reqs"
This reverts commit 293a4b53cc
.
* disable parallelism for all but pip report
* whitespace
* pr feedback, fix ignore packages, bump versions
This commit is contained in:
Родитель
2284e06a29
Коммит
c20c3b0f56
|
@ -1,4 +1,4 @@
|
|||
namespace Microsoft.ComponentDetection.Contracts;
|
||||
namespace Microsoft.ComponentDetection.Contracts;
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
@ -65,6 +65,8 @@ public abstract class FileComponentDetector : IComponentDetector
|
|||
|
||||
protected IObservable<IComponentStream> ComponentStreams { get; private set; }
|
||||
|
||||
protected virtual bool EnableParallelism { get; set; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public async virtual Task<IndividualDetectorScanResult> ExecuteDetectorAsync(ScanRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
|
@ -113,7 +115,7 @@ public abstract class FileComponentDetector : IComponentDetector
|
|||
new ExecutionDataflowBlockOptions
|
||||
{
|
||||
// MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in
|
||||
MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, maxThreads),
|
||||
MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
|
||||
});
|
||||
|
||||
var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs);
|
||||
|
|
|
@ -55,7 +55,8 @@ public class ScanRequest
|
|||
public IComponentRecorder ComponentRecorder { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum number of threads to use in parallel for executing the detection.
|
||||
/// Gets the maximum number of threads to use in parallel for executing the detection, assuming parallelism is
|
||||
/// enabled for the detector.
|
||||
/// </summary>
|
||||
public int MaxThreads { get; private set; }
|
||||
}
|
||||
|
|
|
@ -11,4 +11,10 @@ public interface IPythonCommandService
|
|||
Task<IList<(string PackageString, GitComponent Component)>> ParseFileAsync(string path, string pythonPath = null);
|
||||
|
||||
Task<string> GetPythonVersionAsync(string pythonPath = null);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the os type using: https://docs.python.org/3/library/sys.html#sys.platform .
|
||||
/// </summary>
|
||||
/// <returns>OS type where the python script runs.</returns>
|
||||
Task<string> GetOsTypeAsync(string pythonPath = null);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
namespace Microsoft.ComponentDetection.Detectors.Pip;
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
|
@ -24,7 +25,7 @@ public class PipDependencySpecification
|
|||
/// <summary>
|
||||
/// These are packages that we don't want to evaluate in our graph as they are generally python builtins.
|
||||
/// </summary>
|
||||
private static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
|
||||
public static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
|
||||
{
|
||||
"-markerlib",
|
||||
"pip",
|
||||
|
@ -154,7 +155,7 @@ public class PipDependencySpecification
|
|||
var conditionalVar = conditionalMatch.Groups[2].Value;
|
||||
var conditionalOperator = conditionalMatch.Groups[3].Value;
|
||||
var conditionalValue = conditionalMatch.Groups[4].Value;
|
||||
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar))
|
||||
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar) || string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]))
|
||||
{
|
||||
continue; // If the variable isn't in the environment, we can't evaluate it.
|
||||
}
|
||||
|
@ -175,7 +176,7 @@ public class PipDependencySpecification
|
|||
else if (string.Equals(conditionalVar, "sys_platform", System.StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// if the platform is not windows or linux (empty string in env var), allow the package to be added. Otherwise, ensure it matches the python condition
|
||||
conditionMet = string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]) || string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
|
||||
conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -195,4 +196,17 @@ public class PipDependencySpecification
|
|||
|
||||
return conditionsMet;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Common method that can be used to determine whether this package is a valid parent
|
||||
/// package of another package. Note that this logic is not perfect, it does not
|
||||
/// respect all of the environment identifiers, nor does it correctly handle extras (it ignores
|
||||
/// them).
|
||||
/// </summary>
|
||||
/// <param name="pythonEnvironmentVariables">List of environment variables used to evaluate the environmant conditions, such as OS this is executing on.</param>
|
||||
/// <returns>Whether or not this package is valid as a parent package.</returns>
|
||||
public bool IsValidParentPackage(Dictionary<string, string> pythonEnvironmentVariables) =>
|
||||
!this.PackageIsUnsafe()
|
||||
&& this.PackageConditionsMet(pythonEnvironmentVariables)
|
||||
&& !this.ConditionalDependencySpecifiers.Any(s => s.Contains("extra ==", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
|
|
@ -99,7 +99,11 @@ public sealed class PyPiClient : IPyPiClient, IDisposable
|
|||
|
||||
var package = new ZipArchive(await response.Content.ReadAsStreamAsync());
|
||||
|
||||
var entry = package.GetEntry($"{name.Replace('-', '_')}-{version}.dist-info/METADATA");
|
||||
var entryName = $"{name.Replace('-', '_')}-{version}.dist-info/METADATA";
|
||||
|
||||
// first try case insensitive dicitonary lookup O(1), then attempt case-insensitive match O(entries)
|
||||
var entry = package.GetEntry(entryName)
|
||||
?? package.Entries.FirstOrDefault(x => string.Equals(x.FullName, entryName, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// If there is no metadata file, the package doesn't have any declared dependencies
|
||||
if (entry == null)
|
||||
|
|
|
@ -4,7 +4,6 @@ using System;
|
|||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Reactive.Linq;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.ComponentDetection.Contracts;
|
||||
|
@ -39,7 +38,7 @@ public class PipComponentDetector : FileComponentDetector
|
|||
|
||||
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
|
||||
|
||||
public override int Version { get; } = 9;
|
||||
public override int Version { get; } = 10;
|
||||
|
||||
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
|
||||
{
|
||||
|
@ -55,11 +54,7 @@ public class PipComponentDetector : FileComponentDetector
|
|||
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
|
||||
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);
|
||||
|
||||
var pythonPlatformString = RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
|
||||
? "win32"
|
||||
: RuntimeInformation.IsOSPlatform(OSPlatform.Linux)
|
||||
? "linux"
|
||||
: string.Empty;
|
||||
var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
|
||||
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,18 +30,24 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
|
|||
|
||||
private readonly IPipCommandService pipCommandService;
|
||||
private readonly IEnvironmentVariableService envVarService;
|
||||
private readonly IPythonCommandService pythonCommandService;
|
||||
private readonly IPythonResolver pythonResolver;
|
||||
|
||||
public PipReportComponentDetector(
|
||||
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
|
||||
IObservableDirectoryWalkerFactory walkerFactory,
|
||||
IPipCommandService pipCommandService,
|
||||
IEnvironmentVariableService envVarService,
|
||||
IPythonCommandService pythonCommandService,
|
||||
IPythonResolver pythonResolver,
|
||||
ILogger<PipReportComponentDetector> logger)
|
||||
{
|
||||
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
|
||||
this.Scanner = walkerFactory;
|
||||
this.pipCommandService = pipCommandService;
|
||||
this.envVarService = envVarService;
|
||||
this.pythonCommandService = pythonCommandService;
|
||||
this.pythonResolver = pythonResolver;
|
||||
this.Logger = logger;
|
||||
}
|
||||
|
||||
|
@ -53,7 +59,9 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
|
|||
|
||||
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
|
||||
|
||||
public override int Version { get; } = 2;
|
||||
public override int Version { get; } = 3;
|
||||
|
||||
protected override bool EnableParallelism { get; set; } = true;
|
||||
|
||||
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
|
||||
{
|
||||
|
@ -75,6 +83,22 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
|
|||
return Enumerable.Empty<ProcessRequest>().ToObservable();
|
||||
}
|
||||
|
||||
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
|
||||
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
|
||||
{
|
||||
this.Logger.LogInformation($"No python found on system. Python detection will not run.");
|
||||
|
||||
return Enumerable.Empty<ProcessRequest>().ToObservable();
|
||||
}
|
||||
else
|
||||
{
|
||||
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
|
||||
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);
|
||||
|
||||
var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
|
||||
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
|
||||
}
|
||||
|
||||
return processRequests;
|
||||
}
|
||||
|
||||
|
@ -169,11 +193,16 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
|
|||
// graph ourselves using the requires_dist field.
|
||||
var dependenciesByPkg = new Dictionary<string, List<PipDependencySpecification>>(StringComparer.OrdinalIgnoreCase);
|
||||
var nodeReferences = new Dictionary<string, PipReportGraphNode>(StringComparer.OrdinalIgnoreCase);
|
||||
var pythonEnvVars = this.pythonResolver.GetPythonEnvironmentVariables();
|
||||
|
||||
foreach (var package in report.InstallItems)
|
||||
{
|
||||
// Normalize the package name to ensure consistency between the package name and the graph nodes.
|
||||
var normalizedPkgName = PipReportUtilities.NormalizePackageNameFormat(package.Metadata.Name);
|
||||
if (PipDependencySpecification.PackagesToIgnore.Contains(normalizedPkgName))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var node = new PipReportGraphNode(
|
||||
new PipComponent(
|
||||
|
@ -200,7 +229,7 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
|
|||
// futures; python_version <= \"2.7\"
|
||||
// sphinx (!=1.8.0,!=3.1.0,!=3.1.1,>=1.6.5) ; extra == 'docs'
|
||||
var dependencySpec = new PipDependencySpecification($"Requires-Dist: {dependency}", requiresDist: true);
|
||||
if (dependencySpec.PackageIsUnsafe())
|
||||
if (!dependencySpec.IsValidParentPackage(pythonEnvVars))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -181,4 +181,11 @@ public class PythonCommandService : IPythonCommandService
|
|||
var match = version.Match(versionResult.StdOut);
|
||||
return match.Success ? match.Groups[1].Value : null;
|
||||
}
|
||||
|
||||
public async Task<string> GetOsTypeAsync(string pythonPath)
|
||||
{
|
||||
var pythonCommand = await this.ResolvePythonAsync(pythonPath);
|
||||
var versionResult = await this.commandLineInvocationService.ExecuteCommandAsync(pythonCommand, new List<string> { "python3", "python2" }, "-c", "\"import sys; print(sys.platform);\"");
|
||||
return versionResult.ExitCode == 0 && string.IsNullOrEmpty(versionResult.StdErr) ? versionResult.StdOut.Trim() : null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ public class PythonResolver : PythonResolverBase, IPythonResolver
|
|||
var (root, currentNode) = state.ProcessingQueue.Dequeue();
|
||||
|
||||
// gather all dependencies for the current node
|
||||
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => !x.PackageIsUnsafe()).Where(x => x.PackageConditionsMet(this.pythonEnvironmentVariables)).ToList();
|
||||
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => x.IsValidParentPackage(this.pythonEnvironmentVariables)).ToList();
|
||||
|
||||
foreach (var dependencyNode in dependencies)
|
||||
{
|
||||
|
|
|
@ -139,4 +139,26 @@ public class PipDependencySpecifierTests
|
|||
|
||||
VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void TestPipDependencyRequireDistConditionalDependenciesMet_Empty()
|
||||
{
|
||||
var specs = new List<(string, bool, PipDependencySpecification)>
|
||||
{
|
||||
("Requires-Dist: TestPackage (>=2.0.1) ; python_version == \"3.8\" and sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=2.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.8\"", "and sys_platform == \"linux\"" } }),
|
||||
("Requires-Dist: TestPackage (>=4.0.1) ; python_version == \"3.6\" and sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=4.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.6\"", "and sys_platform == \"win32\"" } }),
|
||||
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"linux\"" } }),
|
||||
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"win32\"" } }),
|
||||
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"asdf\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"asdf\"" } }),
|
||||
};
|
||||
|
||||
// test null and empty cases should allow packages through
|
||||
var pythonEnvironmentVariables = new Dictionary<string, string>
|
||||
{
|
||||
{ "python_version", null },
|
||||
{ "sys_platform", string.Empty },
|
||||
};
|
||||
|
||||
VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ using Newtonsoft.Json;
|
|||
public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportComponentDetector>
|
||||
{
|
||||
private readonly Mock<IPipCommandService> pipCommandService;
|
||||
private readonly Mock<IPythonCommandService> pythonCommandService;
|
||||
private readonly Mock<IPythonResolver> pythonResolver;
|
||||
private readonly Mock<IEnvironmentVariableService> mockEnvVarService;
|
||||
private readonly Mock<ILogger<PipReportComponentDetector>> mockLogger;
|
||||
|
||||
|
@ -35,6 +37,14 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
|
|||
this.pipCommandService = new Mock<IPipCommandService>();
|
||||
this.DetectorTestUtility.AddServiceMock(this.pipCommandService);
|
||||
|
||||
this.pythonCommandService = new Mock<IPythonCommandService>();
|
||||
this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny<string>())).ReturnsAsync(true);
|
||||
this.DetectorTestUtility.AddServiceMock(this.pythonCommandService);
|
||||
|
||||
this.pythonResolver = new Mock<IPythonResolver>();
|
||||
this.pythonResolver.Setup(x => x.GetPythonEnvironmentVariables()).Returns(new Dictionary<string, string>());
|
||||
this.DetectorTestUtility.AddServiceMock(this.pythonResolver);
|
||||
|
||||
this.mockLogger = new Mock<ILogger<PipReportComponentDetector>>();
|
||||
this.DetectorTestUtility.AddServiceMock(this.mockLogger);
|
||||
|
||||
|
@ -434,11 +444,10 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
|
|||
var jupyterGraph = graphsByLocations[file1];
|
||||
|
||||
var jupyterLabDependencies = jupyterGraph.GetDependenciesForComponent(jupyterComponent.Id);
|
||||
jupyterLabDependencies.Should().HaveCount(15);
|
||||
jupyterLabDependencies.Should().HaveCount(12);
|
||||
jupyterLabDependencies.Should().Contain("async-lru 2.0.4 - pip");
|
||||
jupyterLabDependencies.Should().Contain("jupyter-server 2.14.0 - pip");
|
||||
jupyterLabDependencies.Should().Contain("traitlets 5.14.3 - pip");
|
||||
jupyterLabDependencies.Should().Contain("requests 2.32.2 - pip");
|
||||
jupyterLabDependencies.Should().Contain("jupyter-lsp 2.2.5 - pip");
|
||||
|
||||
var bleachComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("bleach")).Component as PipComponent;
|
||||
|
@ -447,10 +456,9 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
|
|||
bleachComponent.License.Should().Be("Apache Software License");
|
||||
|
||||
var bleachDependencies = jupyterGraph.GetDependenciesForComponent(bleachComponent.Id);
|
||||
bleachDependencies.Should().HaveCount(3);
|
||||
bleachDependencies.Should().HaveCount(2);
|
||||
bleachDependencies.Should().Contain("six 1.16.0 - pip");
|
||||
bleachDependencies.Should().Contain("webencodings 0.5.1 - pip");
|
||||
bleachDependencies.Should().Contain("tinycss2 1.3.0 - pip");
|
||||
|
||||
ComponentRecorderTestUtilities.CheckChild<PipComponent>(
|
||||
componentRecorder,
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
#
|
||||
# This file is autogenerated by pip-compile with Python 3.12
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile --strip-extras
|
||||
#
|
||||
azure-core==1.30.0
|
||||
# via
|
||||
# azure-identity
|
||||
# msrest
|
||||
azure-devops==7.1.0b4
|
||||
# via -r requirements.in
|
||||
azure-identity==1.16.0b1
|
||||
# via -r requirements.in
|
||||
certifi==2024.2.2
|
||||
# via
|
||||
# msrest
|
||||
# requests
|
||||
cffi==1.16.0
|
||||
# via cryptography
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
cryptography==42.0.4
|
||||
# via
|
||||
# azure-identity
|
||||
# msal
|
||||
# pyjwt
|
||||
gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.42
|
||||
# via -r requirements.in
|
||||
idna==3.6
|
||||
# via requests
|
||||
isodate==0.6.1
|
||||
# via msrest
|
||||
msal==1.27.0
|
||||
# via
|
||||
# azure-identity
|
||||
# msal-extensions
|
||||
msal-extensions==1.1.0
|
||||
# via azure-identity
|
||||
msrest==0.7.1
|
||||
# via
|
||||
# -r requirements.in
|
||||
# azure-devops
|
||||
oauthlib==3.2.2
|
||||
# via requests-oauthlib
|
||||
packaging==23.2
|
||||
# via msal-extensions
|
||||
portalocker==2.8.2
|
||||
# via msal-extensions
|
||||
pycparser==2.21
|
||||
# via cffi
|
||||
pyjwt==2.8.0
|
||||
# via
|
||||
# msal
|
||||
# pyjwt
|
||||
requests==2.31.0
|
||||
# via
|
||||
# azure-core
|
||||
# msal
|
||||
# msrest
|
||||
# requests-oauthlib
|
||||
requests-oauthlib==1.3.1
|
||||
# via msrest
|
||||
six==1.16.0
|
||||
# via
|
||||
# azure-core
|
||||
# isodate
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
typing-extensions==4.9.0
|
||||
# via azure-core
|
||||
urllib3==2.2.1
|
||||
# via requests
|
|
@ -1 +1 @@
|
|||
requests [security] >= 2.32.3, == 2.32.* ; python_version > "2.7"
|
||||
requests [socks] >= 2.32.3, == 2.32.* ; python_version > "2.7"
|
||||
|
|
Загрузка…
Ссылка в новой задаче