Pauldorsch/bugfix invalid pipreport files (#1205)

* ignore pregenerated pipreports that don't cover the correct set of dependencies

* add validation to the pre-generated pipreport to prevent underdetection for overridden reports

* dispose of telemetry object

* move re-used code to a common utility method
This commit is contained in:
Paul Dorsch 2024-07-19 16:09:52 -04:00 коммит произвёл GitHub
Родитель 13744eeec1
Коммит f4d84a84e8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
12 изменённых файлов: 8700 добавлений и 131 удалений

Просмотреть файл

@ -0,0 +1,12 @@
namespace Microsoft.ComponentDetection.Common.Telemetry.Records;
public class PipReportTypeTelemetryRecord : BaseDetectionTelemetryRecord
{
public override string RecordName => "PipReportType";
public bool PreGenerated { get; set; }
public string FilePath { get; set; }
public int PackageCount { get; set; }
}

Просмотреть файл

@ -70,12 +70,9 @@ public class PipComponentDetector : FileComponentDetector
try
{
var initialPackages = await this.pythonCommandService.ParseFileAsync(file.Location, pythonExePath);
var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
.Select(tuple => tuple.PackageString)
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => new PipDependencySpecification(x))
.Where(x => !x.PackageIsUnsafe())
.Where(x => x.PackageConditionsMet(this.pythonResolver.GetPythonEnvironmentVariables()))
var listedPackage = SharedPipUtilities.ParsedPackagesToPipDependencies(
initialPackages,
this.pythonResolver.GetPythonEnvironmentVariables())
.ToList();
var roots = await this.pythonResolver.ResolveRootsAsync(singleFileComponentRecorder, listedPackage);

Просмотреть файл

@ -2,6 +2,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
using System.Linq;
@ -158,6 +159,10 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
}
var stopwatch = Stopwatch.StartNew();
using var pipReportTypeRecord = new PipReportTypeTelemetryRecord
{
FilePath = file.Location,
};
// Search for a pre-generated pip report file in the same directory as the file being scanned.
var fileParentDirectory = Path.GetDirectoryName(file.Location);
@ -190,12 +195,27 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
this.Logger.LogInformation("PipReport: Using pre-generated pip report '{ReportFile}' for package file '{File}'.", existingReport.FullName, file.Location);
var reportOutput = await this.fileUtilityService.ReadAllTextAsync(existingReport);
var report = JsonConvert.DeserializeObject<PipInstallationReport>(reportOutput);
reports.Add(report);
if (await this.IsValidPreGeneratedReportAsync(report, pythonExePath, file.Location))
{
reports.Add(report);
}
else
{
this.Logger.LogInformation(
"PipReport: Pre-generated pip report '{ReportFile}' is invalid. Did not contain all requested components in package file '{File}'.",
existingReport.FullName,
file.Location);
}
}
}
else
var foundPreGeneratedReport = reports.Any();
pipReportTypeRecord.PreGenerated = foundPreGeneratedReport;
if (!foundPreGeneratedReport)
{
this.Logger.LogInformation("PipReport: Generating pip installation report for {File}", file.Location);
pipReportTypeRecord.PreGenerated = false;
// create linked cancellation token that will cancel if the file level timeout is reached, or if the parent token is cancelled.
// default to only using parent token if the env var is not set or is invalid
@ -240,12 +260,6 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
return;
}
this.Logger.LogInformation(
"PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages.",
file.Location,
stopwatch.ElapsedMilliseconds / 1000.0,
report.InstallItems?.Length ?? 0);
// Now that all installed packages are known, we can build a graph of the dependencies.
if (report.InstallItems is not null)
{
@ -254,6 +268,14 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
}
}
var packageCount = singleFileComponentRecorder.GetDetectedComponents()?.Keys?.ToImmutableHashSet().Count ?? 0;
pipReportTypeRecord.PackageCount = packageCount;
this.Logger.LogInformation(
"PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages.",
file.Location,
stopwatch.ElapsedMilliseconds / 1000.0,
packageCount);
stopwatch.Stop();
}
catch (Exception e)
@ -421,12 +443,9 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
return;
}
var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
.Select(tuple => tuple.PackageString)
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => new PipDependencySpecification(x))
.Where(x => !x.PackageIsUnsafe())
.Where(x => x.PackageConditionsMet(this.pythonResolver.GetPythonEnvironmentVariables()))
var listedPackage = SharedPipUtilities.ParsedPackagesToPipDependencies(
initialPackages,
this.pythonResolver.GetPythonEnvironmentVariables())
.ToList();
listedPackage.Select(x => (x.Name, Version: x.GetHighestExplicitPackageVersion()))
@ -442,6 +461,35 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe
.ForEach(gitComponent => recorder.RegisterUsage(gitComponent, isExplicitReferencedDependency: true));
}
/// <summary>
/// Confirms that the detected report at least contains all of the packages directly requested
/// in the pip file. This prevents invalid reports from being used to create the dependency graph.
/// </summary>
private async Task<bool> IsValidPreGeneratedReportAsync(PipInstallationReport report, string pythonExePath, string filePath)
{
try
{
var initialPackages = await this.pythonCommandService.ParseFileAsync(filePath, pythonExePath);
var listedPackage = SharedPipUtilities.ParsedPackagesToPipDependencies(
initialPackages,
this.pythonResolver.GetPythonEnvironmentVariables())
.Select(x => x.Name)
.ToImmutableSortedSet();
var reportRequestedPackages = report.InstallItems
.Where(package => package.Requested)
.Select(package => package.Metadata.Name)
.ToImmutableSortedSet();
return listedPackage.IsSubsetOf(reportRequestedPackages);
}
catch (Exception e)
{
this.Logger.LogWarning(e, "PipReport: Failed to validate pre-generated report for {File}", filePath);
return false;
}
}
private PipReportOverrideBehavior GetPipReportOverrideBehavior()
{
if (!this.envVarService.DoesEnvironmentVariableExist(PipReportOverrideBehaviorEnvVar))

Просмотреть файл

@ -0,0 +1,25 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
public static class SharedPipUtilities
{
/// <summary>
/// Converts a list of parsed packages to a list of PipDependencySpecifications. Also performs some validations,
/// such as filtering out unsafe packages and checking if the package conditions are met.
/// </summary>
/// <param name="parsedPackages">List of packages and git components.</param>
/// <param name="pythonEnvironmentVariables">Python environment specifiers.</param>
/// <returns>Enumerable containing the converted, sanitized Pip dependency specs.</returns>
public static IEnumerable<PipDependencySpecification> ParsedPackagesToPipDependencies(
IList<(string PackageString, GitComponent Component)> parsedPackages,
Dictionary<string, string> pythonEnvironmentVariables) =>
parsedPackages.Where(tuple => tuple.PackageString != null)
.Select(tuple => tuple.PackageString)
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => new PipDependencySpecification(x))
.Where(x => !x.PackageIsUnsafe())
.Where(x => x.PackageConditionsMet(pythonEnvironmentVariables));
}

Просмотреть файл

@ -46,6 +46,9 @@
<None Update="Mocks\pip_report_single_pkg_bad_version.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Mocks\Invalid\invalid.component-detection-pip-report.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Mocks\test.component-detection-pip-report.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -535,6 +535,10 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
[TestMethod]
public async Task TestPipReportDetector_SimplePregeneratedFile_Async()
{
this.pythonCommandService
.Setup(x => x.ParseFileAsync(It.IsAny<string>(), It.IsAny<string>()))
.ReturnsAsync(new List<(string PackageString, GitComponent Component)> { ("requests", null) });
var file1 = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "requirements.txt");
var pregeneratedFile = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "test.component-detection-pip-report.json");
@ -561,6 +565,59 @@ public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportCompone
idnaComponent.Version.Should().Be("3.7");
}
[TestMethod]
public async Task TestPipReportDetector_InvalidPregeneratedFile_Async()
{
this.pipCommandService.Setup(x => x.GenerateInstallationReportAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<CancellationToken>()))
.ReturnsAsync((this.simpleExtrasReport, null));
this.pythonCommandService
.Setup(x => x.ParseFileAsync(It.IsAny<string>(), It.IsAny<string>()))
.ReturnsAsync(new List<(string PackageString, GitComponent Component)> { ("requests", null) });
var file1 = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "Invalid", "requirements.txt");
// this pre-generated file does not contains the 'requests' package, and so the report
// validator should fail and the detector should continue as if no pre-generated file was found
var pregeneratedFile = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "Invalid", "invalid.component-detection-pip-report.json");
var (result, componentRecorder) = await this.DetectorTestUtility
.WithFile("requirements.txt", string.Empty, fileLocation: file1)
.ExecuteDetectorAsync();
// found invalid pre-generated file
this.mockLogger.Verify(x => x.Log(
LogLevel.Information,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((o, t) => o.ToString().Contains("is invalid")),
It.IsAny<Exception>(),
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
// fell back to generating the report itself
this.mockLogger.Verify(x => x.Log(
LogLevel.Information,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((o, t) => o.ToString().StartsWith("PipReport: Generating pip installation report")),
It.IsAny<Exception>(),
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
this.pipCommandService.Verify(
x => x.GenerateInstallationReportAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<CancellationToken>()),
Times.Once);
// verify results
result.ResultCode.Should().Be(ProcessingResultCode.Success);
var detectedComponents = componentRecorder.GetDetectedComponents();
var pipComponents = detectedComponents.Where(detectedComponent => detectedComponent.Component.Id.Contains("pip")).ToList();
var requestsComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("requests")).Component as PipComponent;
requestsComponent.Version.Should().Be("2.32.3");
var idnaComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("idna")).Component as PipComponent;
idnaComponent.Version.Should().Be("3.7");
}
private List<(string PackageString, GitComponent Component)> ToGitTuple(IList<string> components)
{
return components.Select<string, (string, GitComponent)>(dep => (dep, null)).ToList();

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -0,0 +1,5 @@
azure.cli
azureml-core
requests
packaging==20.9
setuptools>=65.5.1

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -2,7 +2,6 @@ artifacts-keyring==0.3.4
certifi==2024.6.2
charset-normalizer==3.3.2
Cython==3.0.10
idna==3.7
jaraco.classes==3.4.0
jaraco.context==5.3.0
jaraco.functools==4.0.1
@ -10,5 +9,4 @@ keyring==25.2.1
more-itertools==10.2.0
numpy==1.26.4
pywin32-ctypes==0.2.2
requests==2.32.3
urllib3==2.2.1
requests==2.32.3