From ee44b89aa2f7a09f64791a656b7f402e9357880e Mon Sep 17 00:00:00 2001 From: Coby Allred Date: Thu, 24 Mar 2022 15:35:29 -0700 Subject: [PATCH] Migrate IPyPiClient cache to LRU MemoryCache (#80) * Migrate IPyPiClient cache to LRU MemoryCache * Update test formatting * Update Caching.Memory to 3.1.23 * Address PR comments * StyleCop --- Directory.Packages.props | 1 + .../EnvironmentVariableService.cs | 11 +- .../Records/PyPiCacheTelemetryRecord.cs | 19 +++ .../IEnvironmentVariableService.cs | 2 + ...rosoft.ComponentDetection.Detectors.csproj | 1 + .../pip/IPyPiClient.cs | 95 ++++++++---- .../BaseDetectionTelemetryRecordTests.cs | 1 + .../PyPiClientTests.cs | 143 +++++++++++++++++- 8 files changed, 239 insertions(+), 34 deletions(-) create mode 100644 src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index 8c9899cf..db5101ff 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -14,6 +14,7 @@ + diff --git a/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs b/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs index b3aeb89d..b4dc7733 100644 --- a/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs +++ b/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs @@ -9,13 +9,20 @@ namespace Microsoft.ComponentDetection.Common public class EnvironmentVariableService : IEnvironmentVariableService { public bool DoesEnvironmentVariableExist(string name) + { + return GetEnvironmentVariable(name) != null; + } + + public string GetEnvironmentVariable(string name) { // Environment variables are case-insensitive on Windows, and case-sensitive on // Linux and MacOS. // https://docs.microsoft.com/en-us/dotnet/api/system.environment.getenvironmentvariable - return Environment.GetEnvironmentVariables().Keys + var caseInsensitiveName = Environment.GetEnvironmentVariables().Keys .OfType() - .FirstOrDefault(x => string.Compare(x, name, true) == 0) != null; + .FirstOrDefault(x => string.Compare(x, name, true) == 0); + + return caseInsensitiveName != null ? Environment.GetEnvironmentVariable(caseInsensitiveName) : null; } } } diff --git a/src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs new file mode 100644 index 00000000..39ba28e8 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs @@ -0,0 +1,19 @@ +using System.Net; + +namespace Microsoft.ComponentDetection.Common.Telemetry.Records +{ + public class PypiCacheTelemetryRecord : BaseDetectionTelemetryRecord + { + public override string RecordName => "PyPiCache"; + + /// + /// Gets or sets total number of PyPi requests that hit the cache instead of PyPi APIs. + /// + public int NumCacheHits { get; set; } + + /// + /// Gets or sets the size of the PyPi cache at class destruction. + /// + public int FinalCacheSize { get; set; } + } +} diff --git a/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs b/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs index 43671312..0026aa6f 100644 --- a/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs +++ b/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs @@ -3,5 +3,7 @@ namespace Microsoft.ComponentDetection.Contracts public interface IEnvironmentVariableService { bool DoesEnvironmentVariableExist(string name); + + string GetEnvironmentVariable(string name); } } diff --git a/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj b/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj index 1a27aefc..c71619ee 100644 --- a/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj +++ b/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj @@ -9,6 +9,7 @@ + diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs index 86077e89..938ee8b5 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Concurrent; using System.Collections.Generic; using System.Composition; using System.IO; @@ -11,6 +10,7 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.ComponentDetection.Common.Telemetry.Records; using Microsoft.ComponentDetection.Contracts; +using Microsoft.Extensions.Caching.Memory; using Newtonsoft.Json; using Polly; @@ -31,10 +31,18 @@ namespace Microsoft.ComponentDetection.Detectors.Pip [Import] public ILogger Logger { get; set; } + [Import] + public IEnvironmentVariableService EnvironmentVariableService { get; set; } + private static HttpClientHandler httpClientHandler = new HttpClientHandler() { CheckCertificateRevocationList = true }; internal static HttpClient HttpClient = new HttpClient(httpClientHandler); + // Values used for cache creation + private const long CACHEINTERVALSECONDS = 60; + private const long DEFAULTCACHEENTRIES = 128; + private bool checkedMaxEntriesVariable = false; + // time to wait before retrying a failed call to pypi.org private static readonly TimeSpan RETRYDELAY = TimeSpan.FromSeconds(1); @@ -45,42 +53,75 @@ namespace Microsoft.ComponentDetection.Detectors.Pip private long retries = 0; /// - /// This cache is used mostly for consistency, to create a unified view of Pypi response. + /// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage + /// and has a limited number of entries which will expire after the cache fills or a specified interval. /// - private readonly ConcurrentDictionary> cachedResponses = new ConcurrentDictionary>(); + private MemoryCache cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES }); - /// - /// Returns a cached response if it exists, otherwise returns the response from Pypi REST call. - /// The response from Pypi is not automatically added to the cache, to allow caller to make that decision. - /// - /// The REST Uri to call. - /// The cached response or a new result from Pypi. - private async Task GetPypiResponse(string uri) + // Keep telemetry on how the cache is being used for future refinements + private PypiCacheTelemetryRecord cacheTelemetry; + + public PyPiClient() { - if (cachedResponses.TryGetValue(uri, out var value)) + cacheTelemetry = new PypiCacheTelemetryRecord() { - return await value; - } + NumCacheHits = 0, + FinalCacheSize = 0, + }; + } - Logger.LogInfo("Getting Python data from " + uri); - return await HttpClient.GetAsync(uri); + ~PyPiClient() + { + cacheTelemetry.FinalCacheSize = cachedResponses.Count; + cacheTelemetry.Dispose(); } /// - /// Used to update the consistency cache, decision has to be made by the caller to allow for retries!. + /// Returns a cached response if it exists, otherwise returns the response from PyPi REST call. + /// The response from PyPi is automatically added to the cache. /// /// The REST Uri to call. - /// The proposed response by the caller to store for this Uri. - /// The `first-wins` response accepted into the cache. - /// This might be different from the input if another caller wins the race!. - private async Task CachePypiResponse(string uri, HttpResponseMessage message) + /// The cached response or a new result from PyPi. + private async Task GetAndCachePyPiResponse(string uri) { - if (!cachedResponses.TryAdd(uri, Task.FromResult(message))) + if (!checkedMaxEntriesVariable) { - return await cachedResponses[uri]; + InitializeNonDefaultMemoryCache(); } - return message; + if (cachedResponses.TryGetValue(uri, out HttpResponseMessage result)) + { + cacheTelemetry.NumCacheHits++; + Logger.LogVerbose("Retrieved cached Python data from " + uri); + return result; + } + + Logger.LogInfo("Getting Python data from " + uri); + var response = await HttpClient.GetAsync(uri); + + // The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race. + return await cachedResponses.GetOrCreateAsync(uri, cacheEntry => + { + cacheEntry.SlidingExpiration = TimeSpan.FromSeconds(CACHEINTERVALSECONDS); // This entry will expire after CACHEINTERVALSECONDS seconds from last use + cacheEntry.Size = 1; // Specify a size of 1 so a set number of entries can always be in the cache + return Task.FromResult(response); + }); + } + + /// + /// On the initial caching attempt, see if the user specified an override for + /// PyPiMaxCacheEntries and recreate the cache if needed. + /// + private void InitializeNonDefaultMemoryCache() + { + var maxEntriesVariable = EnvironmentVariableService.GetEnvironmentVariable("PyPiMaxCacheEntries"); + if (!string.IsNullOrEmpty(maxEntriesVariable) && long.TryParse(maxEntriesVariable, out var maxEntries)) + { + Logger.LogInfo($"Setting IPyPiClient max cache entries to {maxEntries}"); + cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = maxEntries }); + } + + checkedMaxEntriesVariable = true; } public async Task> FetchPackageDependencies(string name, string version, PythonProjectRelease release) @@ -88,9 +129,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip var dependencies = new List(); var uri = release.Url.ToString(); - var response = await GetPypiResponse(uri); - - response = await CachePypiResponse(uri, response); + var response = await GetAndCachePyPiResponse(uri); if (!response.IsSuccessStatusCode) { @@ -169,11 +208,9 @@ namespace Microsoft.ComponentDetection.Detectors.Pip return Task.FromResult(null); } - return GetPypiResponse(requestUri); + return GetAndCachePyPiResponse(requestUri); }); - request = await CachePypiResponse(requestUri, request); - if (request == null) { using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray() }; diff --git a/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs index 203a25df..f3d0d3a2 100644 --- a/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs +++ b/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs @@ -59,6 +59,7 @@ namespace Microsoft.ComponentDetection.Common.Tests typeof(string), typeof(string[]), typeof(bool), + typeof(int), typeof(int?), typeof(TimeSpan?), typeof(HttpStatusCode), diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs index ae18825c..f64df081 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs @@ -1,10 +1,12 @@ using System; using System.Collections.Generic; +using System.IO; using System.Net; using System.Net.Http; using System.Threading; using System.Threading.Tasks; using FluentAssertions; +using Microsoft.ComponentDetection.Common; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Detectors.Pip; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -24,6 +26,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests { pypiClient = new PyPiClient() { + EnvironmentVariableService = new EnvironmentVariableService(), Logger = new Mock().Object, }; } @@ -41,14 +44,148 @@ namespace Microsoft.ComponentDetection.Detectors.Tests }, }; - PyPiClient.HttpClient = new HttpClient(MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject))); + var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject)); + PyPiClient.HttpClient = new HttpClient(mockHandler.Object); Func action = async () => await pypiClient.GetReleases(pythonSpecs); await action.Should().NotThrowAsync(); } - private HttpMessageHandler MockHttpMessageHandler(string content) + [TestMethod] + public async Task GetReleases_DuplicateEntries_CallsGetAsync_Once() + { + var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List { "==1.0.0" } }; + var pythonProject = new PythonProject + { + Releases = new Dictionary> + { + { "1.0.0", new List { new PythonProjectRelease() } }, + }, + }; + + var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject)); + PyPiClient.HttpClient = new HttpClient(mockHandler.Object); + + Func action = async () => await pypiClient.GetReleases(pythonSpecs); + + await action.Should().NotThrowAsync(); + await action.Should().NotThrowAsync(); + + // Verify the API call was performed only once + mockHandler.Protected().Verify( + "SendAsync", + Times.Once(), + ItExpr.IsAny(), + ItExpr.IsAny()); + } + + [TestMethod] + public async Task GetReleases_DifferentEntries_CallsGetAsync_Once() + { + var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List { "==1.0.0" } }; + var pythonProject = new PythonProject + { + Releases = new Dictionary> + { + { "1.0.0", new List { new PythonProjectRelease() } }, + }, + }; + + var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject)); + PyPiClient.HttpClient = new HttpClient(mockHandler.Object); + + Func action = async () => + { + pythonSpecs.Name = Guid.NewGuid().ToString(); + await pypiClient.GetReleases(pythonSpecs); + }; + + await action.Should().NotThrowAsync(); + await action.Should().NotThrowAsync(); + + // Verify the API call was performed only once + mockHandler.Protected().Verify( + "SendAsync", + Times.Exactly(2), + ItExpr.IsAny(), + ItExpr.IsAny()); + } + + [TestMethod] + public async Task FetchPackageDependencies_DuplicateEntries_CallsGetAsync_Once() + { + var mockHandler = MockHttpMessageHandler("invalid ZIP"); + PyPiClient.HttpClient = new HttpClient(mockHandler.Object); + + Func action = async () => await pypiClient.FetchPackageDependencies("a", "1.0.0", new PythonProjectRelease { PackageType = "bdist_wheel", PythonVersion = "3.5.2", Size = 1000, Url = new Uri($"https://testurl") }); + + await action.Should().ThrowAsync(); + await action.Should().ThrowAsync(); + + // Verify the API call was performed only once + mockHandler.Protected().Verify( + "SendAsync", + Times.Once(), + ItExpr.IsAny(), + ItExpr.IsAny()); + } + + [TestMethod] + public async Task FetchPackageDependencies_DifferentEntries_CallsGetAsync_Once() + { + var mockHandler = MockHttpMessageHandler("invalid ZIP"); + PyPiClient.HttpClient = new HttpClient(mockHandler.Object); + + Func action = async () => await pypiClient.FetchPackageDependencies("a", "1.0.0", new PythonProjectRelease { PackageType = "bdist_wheel", PythonVersion = "3.5.2", Size = 1000, Url = new Uri($"https://{Guid.NewGuid()}") }); + + await action.Should().ThrowAsync(); + await action.Should().ThrowAsync(); + + // Verify the API call was performed only once + mockHandler.Protected().Verify( + "SendAsync", + Times.Exactly(2), + ItExpr.IsAny(), + ItExpr.IsAny()); + } + + [TestMethod] + public async Task GetReleases_MaxEntriesVariable_CreatesNewCache() + { + var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List { "==1.0.0" } }; + var pythonProject = new PythonProject + { + Releases = new Dictionary> + { + { "1.0.0", new List { new PythonProjectRelease() } }, + }, + }; + + var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject)); + PyPiClient.HttpClient = new HttpClient(mockHandler.Object); + + var mockLogger = new Mock(); + var mockEvs = new Mock(); + mockEvs.Setup(x => x.GetEnvironmentVariable(It.Is(s => s.Equals("PyPiMaxCacheEntries")))).Returns("32"); + + var mockedPyPi = new PyPiClient() + { + EnvironmentVariableService = mockEvs.Object, + Logger = mockLogger.Object, + }; + + Func action = async () => await mockedPyPi.GetReleases(pythonSpecs); + + await action.Should().NotThrowAsync(); + await action.Should().NotThrowAsync(); + + // Verify the cache setup call was performed only once + mockEvs.Verify(x => x.GetEnvironmentVariable(It.IsAny()), Times.Once()); + mockLogger.Verify(x => x.LogInfo(It.Is(s => s.Equals("Setting IPyPiClient max cache entries to 32"))), Times.Once()); + } + + private Mock MockHttpMessageHandler(string content) { var handlerMock = new Mock(); handlerMock.Protected() @@ -62,7 +199,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests Content = new StringContent(content), }); - return handlerMock.Object; + return handlerMock; } } }