Migrate IPyPiClient cache to LRU MemoryCache (#80)

* Migrate IPyPiClient cache to LRU MemoryCache
* Update test formatting
* Update Caching.Memory to 3.1.23
* Address PR comments
* StyleCop
This commit is contained in:
Coby Allred 2022-03-24 15:35:29 -07:00 коммит произвёл GitHub
Родитель 2d5a418320
Коммит ee44b89aa2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 239 добавлений и 34 удалений

Просмотреть файл

@ -14,6 +14,7 @@
<PackageVersion Include="FluentAssertions" Version="6.1.0"/>
<PackageVersion Include="Microsoft.AspNet.WebApi.Client" Version="5.2.7"/>
<PackageVersion Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="3.3.0"/>
<PackageVersion Include="Microsoft.Extensions.Caching.Memory" Version="3.1.23" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="16.9.4"/>
<PackageVersion Include="Microsoft.SourceLink.GitHub" Version="1.0.0"/>
<PackageVersion Include="DotNet.Glob" Version="2.1.1"/>

Просмотреть файл

@ -9,13 +9,20 @@ namespace Microsoft.ComponentDetection.Common
public class EnvironmentVariableService : IEnvironmentVariableService
{
public bool DoesEnvironmentVariableExist(string name)
{
return GetEnvironmentVariable(name) != null;
}
public string GetEnvironmentVariable(string name)
{
// Environment variables are case-insensitive on Windows, and case-sensitive on
// Linux and MacOS.
// https://docs.microsoft.com/en-us/dotnet/api/system.environment.getenvironmentvariable
return Environment.GetEnvironmentVariables().Keys
var caseInsensitiveName = Environment.GetEnvironmentVariables().Keys
.OfType<string>()
.FirstOrDefault(x => string.Compare(x, name, true) == 0) != null;
.FirstOrDefault(x => string.Compare(x, name, true) == 0);
return caseInsensitiveName != null ? Environment.GetEnvironmentVariable(caseInsensitiveName) : null;
}
}
}

Просмотреть файл

@ -0,0 +1,19 @@
using System.Net;
namespace Microsoft.ComponentDetection.Common.Telemetry.Records
{
public class PypiCacheTelemetryRecord : BaseDetectionTelemetryRecord
{
public override string RecordName => "PyPiCache";
/// <summary>
/// Gets or sets total number of PyPi requests that hit the cache instead of PyPi APIs.
/// </summary>
public int NumCacheHits { get; set; }
/// <summary>
/// Gets or sets the size of the PyPi cache at class destruction.
/// </summary>
public int FinalCacheSize { get; set; }
}
}

Просмотреть файл

@ -3,5 +3,7 @@ namespace Microsoft.ComponentDetection.Contracts
public interface IEnvironmentVariableService
{
bool DoesEnvironmentVariableExist(string name);
string GetEnvironmentVariable(string name);
}
}

Просмотреть файл

@ -9,6 +9,7 @@
<PackageReference Include="Polly" />
<PackageReference Include="Semver" />
<PackageReference Include="yamldotnet" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" />
<PackageReference Include="Newtonsoft.Json" />
<PackageReference Include="System.Composition.AttributedModel" />
<PackageReference Include="System.Composition.Convention" />

Просмотреть файл

@ -1,5 +1,4 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Composition;
using System.IO;
@ -11,6 +10,7 @@ using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Common.Telemetry.Records;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.Extensions.Caching.Memory;
using Newtonsoft.Json;
using Polly;
@ -31,10 +31,18 @@ namespace Microsoft.ComponentDetection.Detectors.Pip
[Import]
public ILogger Logger { get; set; }
[Import]
public IEnvironmentVariableService EnvironmentVariableService { get; set; }
private static HttpClientHandler httpClientHandler = new HttpClientHandler() { CheckCertificateRevocationList = true };
internal static HttpClient HttpClient = new HttpClient(httpClientHandler);
// Values used for cache creation
private const long CACHEINTERVALSECONDS = 60;
private const long DEFAULTCACHEENTRIES = 128;
private bool checkedMaxEntriesVariable = false;
// time to wait before retrying a failed call to pypi.org
private static readonly TimeSpan RETRYDELAY = TimeSpan.FromSeconds(1);
@ -45,42 +53,75 @@ namespace Microsoft.ComponentDetection.Detectors.Pip
private long retries = 0;
/// <summary>
/// This cache is used mostly for consistency, to create a unified view of Pypi response.
/// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage
/// and has a limited number of entries which will expire after the cache fills or a specified interval.
/// </summary>
private readonly ConcurrentDictionary<string, Task<HttpResponseMessage>> cachedResponses = new ConcurrentDictionary<string, Task<HttpResponseMessage>>();
private MemoryCache cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES });
/// <summary>
/// Returns a cached response if it exists, otherwise returns the response from Pypi REST call.
/// The response from Pypi is not automatically added to the cache, to allow caller to make that decision.
/// </summary>
/// <param name="uri">The REST Uri to call.</param>
/// <returns>The cached response or a new result from Pypi.</returns>
private async Task<HttpResponseMessage> GetPypiResponse(string uri)
// Keep telemetry on how the cache is being used for future refinements
private PypiCacheTelemetryRecord cacheTelemetry;
public PyPiClient()
{
if (cachedResponses.TryGetValue(uri, out var value))
cacheTelemetry = new PypiCacheTelemetryRecord()
{
return await value;
}
NumCacheHits = 0,
FinalCacheSize = 0,
};
}
Logger.LogInfo("Getting Python data from " + uri);
return await HttpClient.GetAsync(uri);
~PyPiClient()
{
cacheTelemetry.FinalCacheSize = cachedResponses.Count;
cacheTelemetry.Dispose();
}
/// <summary>
/// Used to update the consistency cache, decision has to be made by the caller to allow for retries!.
/// Returns a cached response if it exists, otherwise returns the response from PyPi REST call.
/// The response from PyPi is automatically added to the cache.
/// </summary>
/// <param name="uri">The REST Uri to call.</param>
/// <param name="message">The proposed response by the caller to store for this Uri.</param>
/// <returns>The `first-wins` response accepted into the cache.
/// This might be different from the input if another caller wins the race!.</returns>
private async Task<HttpResponseMessage> CachePypiResponse(string uri, HttpResponseMessage message)
/// <returns>The cached response or a new result from PyPi.</returns>
private async Task<HttpResponseMessage> GetAndCachePyPiResponse(string uri)
{
if (!cachedResponses.TryAdd(uri, Task.FromResult(message)))
if (!checkedMaxEntriesVariable)
{
return await cachedResponses[uri];
InitializeNonDefaultMemoryCache();
}
return message;
if (cachedResponses.TryGetValue(uri, out HttpResponseMessage result))
{
cacheTelemetry.NumCacheHits++;
Logger.LogVerbose("Retrieved cached Python data from " + uri);
return result;
}
Logger.LogInfo("Getting Python data from " + uri);
var response = await HttpClient.GetAsync(uri);
// The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race.
return await cachedResponses.GetOrCreateAsync(uri, cacheEntry =>
{
cacheEntry.SlidingExpiration = TimeSpan.FromSeconds(CACHEINTERVALSECONDS); // This entry will expire after CACHEINTERVALSECONDS seconds from last use
cacheEntry.Size = 1; // Specify a size of 1 so a set number of entries can always be in the cache
return Task.FromResult(response);
});
}
/// <summary>
/// On the initial caching attempt, see if the user specified an override for
/// PyPiMaxCacheEntries and recreate the cache if needed.
/// </summary>
private void InitializeNonDefaultMemoryCache()
{
var maxEntriesVariable = EnvironmentVariableService.GetEnvironmentVariable("PyPiMaxCacheEntries");
if (!string.IsNullOrEmpty(maxEntriesVariable) && long.TryParse(maxEntriesVariable, out var maxEntries))
{
Logger.LogInfo($"Setting IPyPiClient max cache entries to {maxEntries}");
cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = maxEntries });
}
checkedMaxEntriesVariable = true;
}
public async Task<IList<PipDependencySpecification>> FetchPackageDependencies(string name, string version, PythonProjectRelease release)
@ -88,9 +129,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip
var dependencies = new List<PipDependencySpecification>();
var uri = release.Url.ToString();
var response = await GetPypiResponse(uri);
response = await CachePypiResponse(uri, response);
var response = await GetAndCachePyPiResponse(uri);
if (!response.IsSuccessStatusCode)
{
@ -169,11 +208,9 @@ namespace Microsoft.ComponentDetection.Detectors.Pip
return Task.FromResult<HttpResponseMessage>(null);
}
return GetPypiResponse(requestUri);
return GetAndCachePyPiResponse(requestUri);
});
request = await CachePypiResponse(requestUri, request);
if (request == null)
{
using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray() };

Просмотреть файл

@ -59,6 +59,7 @@ namespace Microsoft.ComponentDetection.Common.Tests
typeof(string),
typeof(string[]),
typeof(bool),
typeof(int),
typeof(int?),
typeof(TimeSpan?),
typeof(HttpStatusCode),

Просмотреть файл

@ -1,10 +1,12 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using FluentAssertions;
using Microsoft.ComponentDetection.Common;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Detectors.Pip;
using Microsoft.VisualStudio.TestTools.UnitTesting;
@ -24,6 +26,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests
{
pypiClient = new PyPiClient()
{
EnvironmentVariableService = new EnvironmentVariableService(),
Logger = new Mock<ILogger>().Object,
};
}
@ -41,14 +44,148 @@ namespace Microsoft.ComponentDetection.Detectors.Tests
},
};
PyPiClient.HttpClient = new HttpClient(MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject)));
var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
Func<Task> action = async () => await pypiClient.GetReleases(pythonSpecs);
await action.Should().NotThrowAsync();
}
private HttpMessageHandler MockHttpMessageHandler(string content)
[TestMethod]
public async Task GetReleases_DuplicateEntries_CallsGetAsync_Once()
{
var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List<string> { "==1.0.0" } };
var pythonProject = new PythonProject
{
Releases = new Dictionary<string, IList<PythonProjectRelease>>
{
{ "1.0.0", new List<PythonProjectRelease> { new PythonProjectRelease() } },
},
};
var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
Func<Task> action = async () => await pypiClient.GetReleases(pythonSpecs);
await action.Should().NotThrowAsync();
await action.Should().NotThrowAsync();
// Verify the API call was performed only once
mockHandler.Protected().Verify(
"SendAsync",
Times.Once(),
ItExpr.IsAny<HttpRequestMessage>(),
ItExpr.IsAny<CancellationToken>());
}
[TestMethod]
public async Task GetReleases_DifferentEntries_CallsGetAsync_Once()
{
var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List<string> { "==1.0.0" } };
var pythonProject = new PythonProject
{
Releases = new Dictionary<string, IList<PythonProjectRelease>>
{
{ "1.0.0", new List<PythonProjectRelease> { new PythonProjectRelease() } },
},
};
var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
Func<Task> action = async () =>
{
pythonSpecs.Name = Guid.NewGuid().ToString();
await pypiClient.GetReleases(pythonSpecs);
};
await action.Should().NotThrowAsync();
await action.Should().NotThrowAsync();
// Verify the API call was performed only once
mockHandler.Protected().Verify(
"SendAsync",
Times.Exactly(2),
ItExpr.IsAny<HttpRequestMessage>(),
ItExpr.IsAny<CancellationToken>());
}
[TestMethod]
public async Task FetchPackageDependencies_DuplicateEntries_CallsGetAsync_Once()
{
var mockHandler = MockHttpMessageHandler("invalid ZIP");
PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
Func<Task> action = async () => await pypiClient.FetchPackageDependencies("a", "1.0.0", new PythonProjectRelease { PackageType = "bdist_wheel", PythonVersion = "3.5.2", Size = 1000, Url = new Uri($"https://testurl") });
await action.Should().ThrowAsync<InvalidDataException>();
await action.Should().ThrowAsync<InvalidDataException>();
// Verify the API call was performed only once
mockHandler.Protected().Verify(
"SendAsync",
Times.Once(),
ItExpr.IsAny<HttpRequestMessage>(),
ItExpr.IsAny<CancellationToken>());
}
[TestMethod]
public async Task FetchPackageDependencies_DifferentEntries_CallsGetAsync_Once()
{
var mockHandler = MockHttpMessageHandler("invalid ZIP");
PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
Func<Task> action = async () => await pypiClient.FetchPackageDependencies("a", "1.0.0", new PythonProjectRelease { PackageType = "bdist_wheel", PythonVersion = "3.5.2", Size = 1000, Url = new Uri($"https://{Guid.NewGuid()}") });
await action.Should().ThrowAsync<InvalidDataException>();
await action.Should().ThrowAsync<InvalidDataException>();
// Verify the API call was performed only once
mockHandler.Protected().Verify(
"SendAsync",
Times.Exactly(2),
ItExpr.IsAny<HttpRequestMessage>(),
ItExpr.IsAny<CancellationToken>());
}
[TestMethod]
public async Task GetReleases_MaxEntriesVariable_CreatesNewCache()
{
var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List<string> { "==1.0.0" } };
var pythonProject = new PythonProject
{
Releases = new Dictionary<string, IList<PythonProjectRelease>>
{
{ "1.0.0", new List<PythonProjectRelease> { new PythonProjectRelease() } },
},
};
var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
var mockLogger = new Mock<ILogger>();
var mockEvs = new Mock<IEnvironmentVariableService>();
mockEvs.Setup(x => x.GetEnvironmentVariable(It.Is<string>(s => s.Equals("PyPiMaxCacheEntries")))).Returns("32");
var mockedPyPi = new PyPiClient()
{
EnvironmentVariableService = mockEvs.Object,
Logger = mockLogger.Object,
};
Func<Task> action = async () => await mockedPyPi.GetReleases(pythonSpecs);
await action.Should().NotThrowAsync();
await action.Should().NotThrowAsync();
// Verify the cache setup call was performed only once
mockEvs.Verify(x => x.GetEnvironmentVariable(It.IsAny<string>()), Times.Once());
mockLogger.Verify(x => x.LogInfo(It.Is<string>(s => s.Equals("Setting IPyPiClient max cache entries to 32"))), Times.Once());
}
private Mock<HttpMessageHandler> MockHttpMessageHandler(string content)
{
var handlerMock = new Mock<HttpMessageHandler>();
handlerMock.Protected()
@ -62,7 +199,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests
Content = new StringContent(content),
});
return handlerMock.Object;
return handlerMock;
}
}
}