From 61a523510ebcf10aaeb658249a01ed766bc858da Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Thu, 24 Aug 2023 19:16:11 +0200 Subject: [PATCH] C#: Only use small files during file content reference analysis. --- .../DependencyManager.cs | 17 ++++++++++++++++- .../ProgressMonitor.cs | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/DependencyManager.cs b/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/DependencyManager.cs index ec960168d12..be7eb16253c 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/DependencyManager.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/DependencyManager.cs @@ -60,7 +60,8 @@ namespace Semmle.Extraction.CSharp.DependencyFetching packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName)); var allFiles = GetFiles("*.*").ToList(); - this.fileContent = new FileContent(progressMonitor, GetFileNames(allFiles)); + var smallFiles = GetSmallFiles(allFiles); + this.fileContent = new FileContent(progressMonitor, GetFileNames(smallFiles)); this.allSources = GetFileNames(allFiles, ".cs").ToList(); var allProjects = GetFileNames(allFiles, ".csproj"); var solutions = options.SolutionFile is not null @@ -194,6 +195,20 @@ namespace Semmle.Extraction.CSharp.DependencyFetching private static IEnumerable GetFileNames(IEnumerable files, params string[] extensions) => files.Where(fi => !extensions.Any() || extensions.Contains(fi.Extension)).Select(fi => fi.FullName); + private IEnumerable GetSmallFiles(IEnumerable files) + { + const int oneMb = 1_048_576; + return files.Where(file => + { + if (file.Length > oneMb) + { + progressMonitor.LogDebug($"Skipping {file.FullName} because it is bigger than 1MB."); + return false; + } + return true; + }); + } + /// /// Computes a unique temp directory for the packages associated /// with this source tree. Use a SHA1 of the directory name. diff --git a/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/ProgressMonitor.cs b/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/ProgressMonitor.cs index 0d940873c2c..248b1d7c495 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/ProgressMonitor.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp.DependencyFetching/ProgressMonitor.cs @@ -18,7 +18,7 @@ namespace Semmle.Extraction.CSharp.DependencyFetching public void LogInfo(string message) => logger.Log(Severity.Info, message); - private void LogDebug(string message) => + public void LogDebug(string message) => logger.Log(Severity.Debug, message); private void LogError(string message) =>