C#: Early identification of duplicate extraction

This commit is contained in:
Tom Hvitved 2019-08-29 14:50:38 +02:00
Родитель 72db219c13
Коммит 8f3f9406e2
7 изменённых файлов: 128 добавлений и 79 удалений

Просмотреть файл

@ -1,14 +1,10 @@
using System;
using System.Collections.Immutable;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.Diagnostics;
using System.IO;
using System.Linq;
using Semmle.Extraction.CSharp.Populators;
using System.Runtime.InteropServices;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;
using Semmle.Util.Logging;
@ -44,26 +40,36 @@ namespace Semmle.Extraction.CSharp
/// Initialize the analyser.
/// </summary>
/// <param name="commandLineArguments">Arguments passed to csc.</param>
/// <param name="compilationIn">The Roslyn compilation.</param>
/// <param name="options">Extractor options.</param>
/// <param name="roslynArgs">The arguments passed to Roslyn.</param>
public void Initialize(
/// <param name="finalizeInit">A continuation for finalizing initialization based on a Roslyn compilation.</param>
/// <returns>A Boolean indicating whether to proceed with extraction.</returns>
public bool Initialize(
CSharpCommandLineArguments commandLineArguments,
CSharpCompilation compilationIn,
Options options,
string[] roslynArgs)
string[] roslynArgs,
out Action<CSharpCompilation> finalizeInit)
{
compilation = compilationIn;
if (!LogRoslynArgs(roslynArgs, Extraction.Extractor.Version))
{
finalizeInit = null;
return false;
}
layout = new Layout();
this.options = options;
extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger);
finalizeInit = comp =>
{
compilation = comp;
extractor = new Extraction.Extractor(false, GetOutputName(comp, commandLineArguments), Logger);
LogDiagnostics();
LogDiagnostics(roslynArgs);
SetReferencePaths();
SetReferencePaths();
CompilationErrors += FilteredDiagnostics.Count();
CompilationErrors += FilteredDiagnostics.Count();
};
return true;
}
/// <summary>
@ -110,7 +116,7 @@ namespace Semmle.Extraction.CSharp
layout = new Layout();
extractor = new Extraction.Extractor(true, null, Logger);
this.options = options;
LogDiagnostics(null);
LogExtractorInfo(Extraction.Extractor.Version);
SetReferencePaths();
}
@ -205,11 +211,6 @@ namespace Semmle.Extraction.CSharp
File.GetLastWriteTime(dest) >= File.GetLastWriteTime(src);
}
bool FileIsCached(string src, string dest)
{
return options.Cache && FileIsUpToDate(src, dest);
}
/// <summary>
/// Extracts compilation-wide entities, such as compilations and compiler diagnostics.
/// </summary>
@ -241,7 +242,7 @@ namespace Semmle.Extraction.CSharp
}
public void LogPerformance(Entities.PerformanceMetrics p) => compilationEntity.PopulatePerformance(p);
/// <summary>
/// Extract an assembly to a new trap file.
/// If the trap file exists, skip extraction to avoid duplicating
@ -259,7 +260,7 @@ namespace Semmle.Extraction.CSharp
var projectLayout = layout.LookupProjectOrDefault(assemblyPath);
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression))
{
var skipExtraction = FileIsCached(assemblyPath, trapWriter.TrapFile);
var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile);
if (!skipExtraction)
{
@ -429,30 +430,75 @@ namespace Semmle.Extraction.CSharp
/// </summary>
public int TotalErrors => CompilationErrors + ExtractorErrors;
/// <summary>
/// Logs information about the extractor.
/// </summary>
public void LogExtractorInfo(string extractorVersion)
{
Logger.Log(Severity.Info, " Extractor: {0}", Environment.GetCommandLineArgs().First());
Logger.Log(Severity.Info, " Extractor version: {0}", extractorVersion);
Logger.Log(Severity.Info, " Current working directory: {0}", Directory.GetCurrentDirectory());
}
/// <summary>
/// Logs information about the extractor, as well as the arguments to Roslyn.
/// </summary>
/// <param name="roslynArgs">The arguments passed to Roslyn.</param>
/// <returns>A Boolean indicating whether to proceed with extraction.</returns>
public bool LogRoslynArgs(string[] roslynArgs, string extractorVersion)
{
LogExtractorInfo(extractorVersion);
Logger.Log(Severity.Info, $" Arguments to Roslyn: {string.Join(' ', roslynArgs)}");
var csharpLogDir = Extractor.GetCSharpLogDirectory();
var tempFile = Path.Combine(csharpLogDir, $"csharp.{Path.GetRandomFileName()}.txt");
bool argsWritten;
using (var streamWriter = new StreamWriter(new FileStream(tempFile, FileMode.Append, FileAccess.Write)))
{
streamWriter.WriteLine($"Arguments to Roslyn: {string.Join(' ', roslynArgs)}");
argsWritten = roslynArgs.WriteCommandLine(streamWriter);
}
var hash = FileUtils.ComputeFileHash(tempFile);
var argsFile = Path.Combine(csharpLogDir, $"csharp.{hash}.txt");
if (argsWritten)
Logger.Log(Severity.Info, $" Arguments have been written to {argsFile}");
if (File.Exists(argsFile))
{
try
{
File.Delete(tempFile);
}
catch (IOException e)
{
Logger.Log(Severity.Warning, $" Failed to remove {tempFile}: {e.Message}");
}
return false;
}
try
{
File.Move(tempFile, argsFile);
}
catch (IOException e)
{
Logger.Log(Severity.Warning, $" Failed to move {tempFile} to {argsFile}: {e.Message}");
}
return true;
}
/// <summary>
/// Logs detailed information about this invocation,
/// in the event that errors were detected.
/// </summary>
/// <param name="roslynArgs">The arguments passed to Roslyn.</param>
public void LogDiagnostics(string[] roslynArgs)
/// <returns>A Boolean indicating whether to proceed with extraction.</returns>
public void LogDiagnostics()
{
Logger.Log(Severity.Info, " Extractor: {0}", Environment.GetCommandLineArgs().First());
if (extractor != null)
Logger.Log(Severity.Info, " Extractor version: {0}", extractor.Version);
Logger.Log(Severity.Info, " Current working directory: {0}", Directory.GetCurrentDirectory());
if (roslynArgs != null)
{
Logger.Log(Severity.Info, $" Arguments to Roslyn: {string.Join(' ', roslynArgs)}");
// Create a new file in the log folder.
var argsFile = Path.Combine(Extractor.GetCSharpLogDirectory(), $"csharp.{Path.GetRandomFileName()}.txt");
if (roslynArgs.ArchiveCommandLine(argsFile))
Logger.Log(Severity.Info, $" Arguments have been written to {argsFile}");
}
foreach (var error in FilteredDiagnostics)
{
Logger.Log(Severity.Error, " Compilation error: {0}", error);

Просмотреть файл

@ -109,6 +109,12 @@ namespace Semmle.Extraction.CSharp
return ExitCode.Failed;
}
if (!analyser.Initialize(compilerArguments, commandLineArguments, compilerVersion.ArgsWithResponse, out var finalizeInit))
{
logger.Log(Severity.Info, "Skipping extraction since files have already been extracted");
return ExitCode.Ok;
}
var referenceTasks = ResolveReferences(compilerArguments, analyser, canonicalPathCache, references);
var syntaxTrees = new List<SyntaxTree>();
@ -131,7 +137,6 @@ namespace Semmle.Extraction.CSharp
{
logger.Log(Severity.Error, " No source files");
++analyser.CompilationErrors;
analyser.LogDiagnostics(compilerVersion.ArgsWithResponse);
return ExitCode.Failed;
}
@ -149,7 +154,7 @@ namespace Semmle.Extraction.CSharp
// already.
);
analyser.Initialize(compilerArguments, compilation, commandLineArguments, compilerVersion.ArgsWithResponse);
finalizeInit(compilation);
analyser.AnalyseCompilation(cwd, args);
analyser.AnalyseReferences();
@ -175,7 +180,7 @@ namespace Semmle.Extraction.CSharp
{
Frontend = new Entities.Timings() { Elapsed = sw1.Elapsed, Cpu = cpuTime1, User = userTime1 },
Extractor = new Entities.Timings() { Elapsed = sw2.Elapsed, Cpu = cpuTime2 - cpuTime1, User = userTime2 - userTime1 },
Total = new Entities.Timings() { Elapsed = stopwatch.Elapsed, Cpu=cpuTime2, User = userTime2 },
Total = new Entities.Timings() { Elapsed = stopwatch.Elapsed, Cpu = cpuTime2, User = userTime2 },
PeakWorkingSet = currentProcess.PeakWorkingSet64
};

Просмотреть файл

@ -183,26 +183,25 @@ namespace Semmle.Extraction.Tests
public void Fast()
{
Environment.SetEnvironmentVariable("LGTM_INDEX_EXTRACTOR", "--fast");
options = CSharp.Options.CreateWithEnvironment(new string[] {});
options = CSharp.Options.CreateWithEnvironment(new string[] { });
Assert.True(options.Fast);
}
[Fact]
public void ArchiveArguments()
{
var file1 = Path.GetTempFileName();
var file2 = Path.GetTempFileName();
var sw = new StringWriter();
var file = Path.GetTempFileName();
try
{
File.AppendAllText(file1, "Test");
new string[] { "/noconfig", "@" + file1 }.ArchiveCommandLine(file2);
Assert.Equal("Test", File.ReadAllText(file2));
File.AppendAllText(file, "Test");
new string[] { "/noconfig", "@" + file }.WriteCommandLine(sw);
Assert.Equal("Test\n", sw.ToString());
}
finally
{
File.Delete(file1);
File.Delete(file2);
File.Delete(file);
}
}
}

Просмотреть файл

@ -79,11 +79,6 @@ namespace Semmle.Extraction
/// </summary>
ILogger Logger { get; }
/// <summary>
/// The extractor SHA, obtained from the git log.
/// </summary>
string Version { get; }
/// <summary>
/// Creates a new context.
/// </summary>
@ -201,6 +196,6 @@ namespace Semmle.Extraction
public ILogger Logger { get; private set; }
public string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})";
public static string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})";
}
}

Просмотреть файл

@ -3,7 +3,6 @@ using Semmle.Util.Logging;
using System;
using System.IO;
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text;
namespace Semmle.Extraction
@ -182,8 +181,8 @@ namespace Semmle.Extraction
return;
}
var existingHash = ComputeHash(TrapFile);
var hash = ComputeHash(tmpFile);
var existingHash = FileUtils.ComputeFileHash(TrapFile);
var hash = FileUtils.ComputeFileHash(tmpFile);
if (existingHash != hash)
{
var root = TrapFile.Substring(0, TrapFile.Length - 8); // Remove trailing ".trap.gz"
@ -205,22 +204,6 @@ namespace Semmle.Extraction
emitter.EmitTrap(Writer);
}
/// <summary>
/// Computes the hash of <paramref name="filePath"/>.
/// </summary>
static string ComputeHash(string filePath)
{
using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read))
using (var shaAlg = new SHA256Managed())
{
var sha = shaAlg.ComputeHash(fileStream);
var hex = new StringBuilder(sha.Length * 2);
foreach (var b in sha)
hex.AppendFormat("{0:x2}", b);
return hex.ToString();
}
}
/// <summary>
/// Attempts to archive the specified input file to the normal area of the source archive.
/// The file's path must be sufficiently short so as to render the path of its copy in the

Просмотреть файл

@ -11,13 +11,16 @@ namespace Semmle.Util
/// Subsequent "@" arguments are ignored.
/// </summary>
/// <param name="commandLineArguments">The raw command line arguments.</param>
/// <param name="filename">The full filename to write to.</param>
/// <param name="textWriter">The writer to archive to.</param>
/// <returns>True iff the file was written.</returns>
public static bool ArchiveCommandLine(this IEnumerable<string> commandLineArguments, string filename)
public static bool WriteCommandLine(this IEnumerable<string> commandLineArguments, TextWriter textWriter)
{
foreach (var arg in commandLineArguments.Where(arg => arg[0] == '@').Select(arg => arg.Substring(1)))
{
File.Copy(arg, filename, true);
string line;
using (StreamReader file = new StreamReader(arg))
while ((line = file.ReadLine()) != null)
textWriter.WriteLine(line);
return true;
}
return false;

Просмотреть файл

@ -1,6 +1,8 @@
using System;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
namespace Semmle.Util
{
@ -78,5 +80,21 @@ namespace Semmle.Util
var candidates = paths?.Where(path => exes.Any(exe0 => File.Exists(Path.Combine(path, exe0))));
return candidates?.FirstOrDefault();
}
/// <summary>
/// Computes the hash of <paramref name="filePath"/>.
/// </summary>
public static string ComputeFileHash(string filePath)
{
using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read))
using (var shaAlg = new SHA256Managed())
{
var sha = shaAlg.ComputeHash(fileStream);
var hex = new StringBuilder(sha.Length * 2);
foreach (var b in sha)
hex.AppendFormat("{0:x2}", b);
return hex.ToString();
}
}
}
}