Merged PR 632405: Productization of packed execution logging framework

This change completes and productizes the optimized BuildXL log analysis work originally started a year ago.

[Original deck](https://microsoft-my.sharepoint.com/:p:/p/rjelling/EeAmA2JdNehEqQBCzMdc8lsBpfFbTzJtpzDHaG5RHDU4zQ?e=KMDaYx)
[Original PR](https://dev.azure.com/mseng/Domino/_git/BuildXL.Internal/pullrequest/575957)

[Current deck with updated status](https://microsoft-my.sharepoint.com/:p:/p/rjelling/EQQrdyMUe4tAktNAs4AkaKcBCSNPE4LR0Gbly5TmERPY-w?e=hRPx1C)
[Recorded presentation to Windows devs](https://microsoft-my.sharepoint.com/✌️/p/rjelling/EQ8g2kCU5HVGge3GpuHjcF8BA5dEcpo6qxlg8B6B1jy2EA?email=rjelling%40microsoft.com)

This code has been tested with a full (non-cached) Windows build:

|Data|XLG analysis|PackedExecution|
|-----|--------------|-----------------|
|Uncompressed file sizes|22.4GB (21.9GB XLG + 0.5GB tables)|1.6GB|
|Compressed file sizes|9.2GB (8.7GB zipped XLG + 0.5GB zipped tables)|0.35GB (357MB)|
|Time to load and analyze after the build|46 minutes (dev workstation, FileConsumptionAnalyzer only)|26 seconds (dev workstation, all file consumption & pip execution data)|
|Duration of uncached all layers build|10 hrs 13 mins|10 hrs 12 mins (within normal variability, e.g. no visible overhead)|

The FileConsumptionAnalyzer results for this build were verified to be exactly consistent with the data from the PackedExecutionExporter. Details on request.

A viewer for this data format has been built, with a demo available here:
```
\\winbuilds\buildfile\PCV\BuildXL_Demo\1007_090757_demo_2.zip
```
Copy locally, unzip, run runme.cmd from Windows Explorer. This is all the data for a full all layers build. Will take about 8GB of memory and loads in 26 seconds on a 12 core dev box. Ping me if you cannot access this file, or if you have questions about the viewer or the data.

(A subsequent PR will hopefully bring the viewer code into the BuildXL repository as well, but it requires .NET Core 5 so I have to sort that out with Sergey.)

There is nothing Windows-specific about this work; in principle this viewer and this data set can be useful to and used by all BuildXL customers. The viewer is Windows Forms-based but the Forms dependency is minimal (just tree views and menus, basically).

I hope to enable this for all BuildXL builds of Windows before the end of October.

Related work items: #1858158
This commit is contained in:
Rob Jellinghaus 2021-10-22 21:21:32 +00:00
Родитель 52de2ff51a
Коммит 09365989aa
42 изменённых файлов: 4280 добавлений и 771 удалений

Просмотреть файл

@ -1343,7 +1343,7 @@ namespace BuildXL.Scheduler
BuildManifestStoreTarget buildManifestStoreTarget = null;
var executionLogPath = configuration.Logging.ExecutionLog;
if (configuration.Logging.LogPackedExecution && executionLogPath.IsValid && IsDistributedOrchestrator)
if (configuration.Logging.LogPackedExecution && executionLogPath.IsValid)
{
var packedExecutionPath = Path.ChangeExtension(executionLogPath.ToString(Context.PathTable), "PXL"); // Packed eXecution Log
m_packedExecutionExporter = new PackedExecutionExporter(PipGraph, packedExecutionPath);

Просмотреть файл

@ -37,9 +37,9 @@ namespace BuildXL.Scheduler.Tracing
/// <inheritdoc />
public virtual IExecutionLogTarget CreateWorkerTarget(uint workerId)
{
return null;
}
// If it can handle worker events, it is its own worker target;
// CurrentEventWorkerId will carry the worker ID dynamically
=> CanHandleWorkerEvents ? this : null;
/// <inheritdoc/>
public virtual void PipExecutionDirectoryOutputs(PipExecutionDirectoryOutputs data)

Просмотреть файл

@ -1,24 +1,24 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#if NET_CORE
using BuildXL.Utilities.PackedExecution;
using BuildXL.Utilities.PackedTable;
#endif
using BuildXL.Pips;
using BuildXL.Pips.Graph;
using BuildXL.Pips.Operations;
using BuildXL.Scheduler.Fingerprints;
using BuildXL.Utilities;
using BuildXL.Utilities.ParallelAlgorithms;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics.ContractsLight;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using BuildXL.Cache.ContentStore.Hashing;
using BuildXL.Pips;
using BuildXL.Pips.Graph;
using BuildXL.Pips.Operations;
using BuildXL.Scheduler.Fingerprints;
using BuildXL.Utilities;
using BuildXL.Utilities.ParallelAlgorithms;
#if NET_CORE
using BuildXL.Utilities.PackedExecution;
using BuildXL.Utilities.PackedTable;
#endif
namespace BuildXL.Scheduler.Tracing
{
@ -35,6 +35,62 @@ namespace BuildXL.Scheduler.Tracing
using P_PipType = Utilities.PackedExecution.PipType;
#endif
/// <summary>
/// Statistics for a given export, for cross-checking against analyzer.
/// </summary>
/// <remarks>
/// The fields here are deliberately all public for ease of calling Interlocked methods with refs to any field;
/// not a great pattern in general, but adequate for this purpose.
/// </remarks>
[Newtonsoft.Json.JsonObject(MemberSerialization = Newtonsoft.Json.MemberSerialization.Fields | Newtonsoft.Json.MemberSerialization.OptOut)]
public class PackedExecutionExportStatistics
{
/// <summary>Stat</summary>
public int FileArtifactContentDecidedEventCount;
/// <summary>Stat</summary>
public int FileArtifactOutputWithKnownLengthCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedEventCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedExecutionCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedStrongFingerprintCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedConsumedPathCount;
/// <summary>Stat</summary>
public int PipExecutionDirectoryOutputsEventCount;
/// <summary>Stat</summary>
public int PipExecutionDirectoryOutputsOutputCount;
/// <summary>Stat</summary>
public int PipExecutionDirectoryOutputsFileCount;
/// <summary>Stat</summary>
public int ProcessExecutionMonitoringReportedEventCount;
/// <summary>Stat</summary>
public int ProcessExecutionMonitoringReportedNonNullCount;
/// <summary>Stat</summary>
public int ProcessExecutionMonitoringReportedProcessCount;
/// <summary>Stat</summary>
public int PipListCount;
/// <summary>Stat</summary>
public int PipDependencyCount;
/// <summary>Stat</summary>
public int ProcessPipInfoCount;
/// <summary>Stat</summary>
public int DeclaredInputFileCount;
/// <summary>Stat</summary>
public int DeclaredInputDirectoryCount;
/// <summary>Stat</summary>
public int ConsumedFileCount;
/// <summary>Stat</summary>
public int ConsumedFileUnknownSizeCount;
/// <summary>Stat</summary>
public int DecidedFileCount;
/// <summary>Stat</summary>
public int DecidedFileValidProducerCount;
/// <summary>Stat</summary>
public int DecidedFileProducerConflictCount;
}
/// <summary>
/// Exports the build graph and execution data in PackedExecution format.
/// </summary>
@ -46,6 +102,9 @@ namespace BuildXL.Scheduler.Tracing
public class PackedExecutionExporter : ExecutionAnalyzerBase
{
#if NET_CORE
#region Fields
private readonly string m_outputDirectoryPath;
/// <summary>
@ -70,42 +129,51 @@ namespace BuildXL.Scheduler.Tracing
private readonly PackedExecution.Builder m_packedExecutionBuilder;
/// <summary>
/// Side dictionary tracking the BuildXL FileArtifact objects, for use when finding the producing pip.
/// List of decided files (either materialized from cache, or produced).
/// </summary>
private readonly Dictionary<AbsolutePath, (FileId fileId, FileArtifact fileArtifact)> m_pathsToFiles =
new Dictionary<AbsolutePath, (FileId fileId, FileArtifact fileArtifact)>();
/// <summary>
/// Side dictionary tracking the BuildXL DictionaryArtifact objects, for use when finding the producing pip.
/// </summary>
private readonly Dictionary<AbsolutePath, (DirectoryId directoryId, DirectoryArtifact directoryArtifact)> m_pathsToDirectories =
new Dictionary<AbsolutePath, (DirectoryId directoryId, DirectoryArtifact directoryArtifact)>();
/// <summary>
/// List of paths to decided files (either materialized from cache, or produced).
/// </summary>
private readonly HashSet<AbsolutePath> m_decidedFiles = new HashSet<AbsolutePath>();
private readonly HashSet<FileArtifact> m_decidedFiles = new HashSet<FileArtifact>();
/// <summary>
/// Upwards index from files to their containing director(ies).
/// </summary>
private readonly Dictionary<AbsolutePath, List<DirectoryArtifact>> m_parentOutputDirectory =
new Dictionary<AbsolutePath, List<DirectoryArtifact>>();
private readonly Dictionary<FileArtifact, List<DirectoryArtifact>> m_parentOutputDirectory = new Dictionary<FileArtifact, List<DirectoryArtifact>>();
/// <summary>
/// Count of processed pips.
/// Index to optimize AbsolutePath->FileId lookup.
/// </summary>
private int m_processedPipCount;
/// <remarks>
/// Note that this arguably should be keyed off FileArtifact rather than AbsolutePath, but this is made
/// difficult by the fact that the ObservedInput type (which represents pip-consumed files) has only
/// a Path attribute, and doesn't track the source FileArtifacts. So we have to have some way to map from
/// path to FileId for purposes of determining the per-pip ConsumedFiles.
/// </remarks>
private readonly Dictionary<AbsolutePath, FileId> m_fileIndex = new Dictionary<AbsolutePath, FileId>();
/// <summary>
/// Count of processed files.
/// Index to optimize AbsolutePath->DirectoryId lookup.
/// </summary>
private int m_processedFileCount;
private readonly Dictionary<DirectoryArtifact, DirectoryId> m_directoryIndex = new Dictionary<DirectoryArtifact, DirectoryId>();
/// <summary>
/// The list of WorkerAnalyzer instances which consume per-worker data.
/// The processor object which consumes pip execution data and analyzes it for file provenance.
/// </summary>
private readonly List<WorkerAnalyzer> m_workerAnalyzers = new List<WorkerAnalyzer>();
private readonly ConcurrentPipProcessor m_concurrentPipProcessor;
/// <summary>
/// Buffer for accessing the data of a hash as an array of ulongs.
/// </summary>
/// <remarks>
/// This is 256 bits as that is the most we ever save in this format.
/// </remarks>
private readonly ulong[] m_hashBuffer = new ulong[4];
/// <summary>
/// The statistics for this run; not thread-safe, only access from a serial context.
/// </summary>
private readonly PackedExecutionExportStatistics m_statistics = new PackedExecutionExportStatistics();
#endregion
#endif
/// <summary>
@ -127,13 +195,12 @@ namespace BuildXL.Scheduler.Tracing
m_packedExecution.ConstructRelationTables();
m_packedExecutionBuilder = new PackedExecution.Builder(m_packedExecution);
m_concurrentPipProcessor = new ConcurrentPipProcessor(this);
if (threadSafe)
{
m_lockObject = this;
}
Console.WriteLine($"PackedExecutionExporter: Constructed at {DateTime.Now}.");
#endif
}
@ -162,13 +229,26 @@ namespace BuildXL.Scheduler.Tracing
BuildFileProducers();
m_packedExecutionBuilder.Complete();
// and write it out
m_packedExecution.SaveToDirectory(m_outputDirectoryPath);
// and the stats
File.WriteAllText(
Path.Combine(m_outputDirectoryPath, "statistics.json"),
Newtonsoft.Json.JsonConvert.SerializeObject(m_statistics, Newtonsoft.Json.Formatting.Indented));
return 0;
}
#region Analyzer event handlers
#region Analyzer event handlers
/// <summary>
/// Get the WorkerAnalyzer instance for the current worker ID (available as a base field on the analyzer).
/// </summary>
/// <returns></returns>
private ConcurrentPipProcessor GetConcurrentPipProcessor() => m_concurrentPipProcessor;
/// <summary>
/// Call the action from within a lock if m_lockObject is set.
@ -177,60 +257,48 @@ namespace BuildXL.Scheduler.Tracing
/// The "this" object and the action's argument are passed separately, to allow the
/// action to be static.
/// </remarks>
private void CallSerialized<T>(Action<PackedExecutionExporter, T> action, T argument)
private void CallSerialized<T>(Action<T> action, T argument)
{
if (m_lockObject != null)
{
lock (m_lockObject)
{
action(this, argument);
action(argument);
}
}
else
{
action(this, argument);
action(argument);
}
}
private static readonly Action<PackedExecutionExporter, WorkerListEventData> s_workerListAction =
(exporter, data) => exporter.WorkerListInternal(data);
/// <summary>
/// Handle a list of workers.
/// </summary>
public override void WorkerList(WorkerListEventData data)
{
CallSerialized(s_workerListAction, data);
}
public override void WorkerList(WorkerListEventData data)
=> CallSerialized(WorkerListInternal, data);
private void WorkerListInternal(WorkerListEventData data)
{
foreach (string workerName in data.Workers)
{
WorkerId workerId = m_packedExecutionBuilder.WorkerTableBuilder.GetOrAdd(workerName);
m_workerAnalyzers.Add(new WorkerAnalyzer(this, workerName, workerId));
m_packedExecutionBuilder.WorkerTableBuilder.GetOrAdd(workerName);
}
}
private static readonly Action<PackedExecutionExporter, FileArtifactContentDecidedEventData> s_fileArtifactContentDecidedAction =
(exporter, data) => exporter.FileArtifactContentDecidedInternal(data);
/// <summary>
/// File artifact content (size, origin) is now known; create a FileTable entry for this file.
/// </summary>
public override void FileArtifactContentDecided(FileArtifactContentDecidedEventData data)
{
CallSerialized(s_fileArtifactContentDecidedAction, data);
}
=> CallSerialized(FileArtifactContentDecidedInternal, data);
private void FileArtifactContentDecidedInternal(FileArtifactContentDecidedEventData data)
{
m_statistics.FileArtifactContentDecidedEventCount++;
if (data.FileArtifact.IsOutputFile && data.FileContentInfo.HasKnownLength)
{
if (((++m_processedFileCount) % 1000000) == 0)
{
Console.WriteLine($"Processed {m_processedFileCount} files...");
}
m_statistics.FileArtifactOutputWithKnownLengthCount++;
ContentFlags contentFlags = default;
switch (data.OutputOrigin)
@ -246,119 +314,225 @@ namespace BuildXL.Scheduler.Tracing
// PipOutputOrigin.UpToDate - not relevant to this analyzer
}
// TODO: evaluate optimizing this with a direct hierarchical BXL-Path-to-PackedTable-Name mapping
string pathString = data.FileArtifact.Path.ToString(PathTable).ToCanonicalizedPath();
FileId fileId = m_packedExecutionBuilder.FileTableBuilder.UpdateOrAdd(
pathString,
data.FileContentInfo.Length,
default,
contentFlags);
UpdateOrAddFile(
data.FileArtifact,
data.FileContentInfo.Length,
contentFlags,
FromContentHash(data.FileContentInfo.Hash),
data.FileArtifact.RewriteCount);
// And save the FileId and the FileArtifact for later use when searching for producing pips.
m_pathsToFiles[data.FileArtifact.Path] = (fileId, data.FileArtifact);
m_decidedFiles.Add(data.FileArtifact.Path);
m_decidedFiles.Add(data.FileArtifact);
}
}
private static readonly Action<PackedExecutionExporter, ProcessFingerprintComputationEventData> s_processFingerprintComputedAction =
(exporter, data) => exporter.ProcessFingerprintComputedInternal(data);
/// <summary>
/// Get a FileHash from the first 256 bits of the content hash.
/// </summary>
private FileHash FromContentHash(ContentHash contentHash)
{
for (int i = 0; i < m_hashBuffer.Length; i++)
{
m_hashBuffer[i] = 0;
}
for (int i = 0; i < Math.Min(m_hashBuffer.Length * sizeof(ulong), contentHash.ByteLength); i++)
{
byte nextByte = contentHash[i];
ulong nextByteAsUlong = (ulong)nextByte;
ulong shiftedLeft = nextByteAsUlong << (56 - (i % 8) * 8);
m_hashBuffer[i / 8] |= shiftedLeft;
}
return new FileHash(m_hashBuffer);
}
private DirectoryId GetOrAddDirectory(DirectoryArtifact directoryArtifact, B_PipId producerPip)
// this cast is safe because BuildPips() confirms that B_PipId and P_PipId Values are the same
=> GetOrAddDirectory(directoryArtifact, new P_PipId((int)producerPip.Value));
private DirectoryId GetOrAddDirectory(DirectoryArtifact directoryArtifact, P_PipId producerPip)
{
if (m_directoryIndex.TryGetValue(directoryArtifact, out DirectoryId result))
{
return result;
}
result = m_packedExecutionBuilder.DirectoryTableBuilder.GetOrAdd(
directoryArtifact.Path.ToString(PathTable).ToCanonicalizedPath(),
producerPip: producerPip,
contentFlags: default,
isSharedOpaque: directoryArtifact.IsSharedOpaque,
partialSealId: directoryArtifact.PartialSealId);
m_directoryIndex[directoryArtifact] = result;
return result;
}
private FileId GetOrAddFile(AbsolutePath path)
{
if (m_fileIndex.TryGetValue(path, out FileId result))
{
return result;
}
result = m_packedExecutionBuilder.FileTableBuilder.GetOrAdd(
path.ToString(PathTable).ToCanonicalizedPath(),
sizeInBytes: default,
contentFlags: default,
hash: default,
rewriteCount: default);
m_fileIndex[path] = result;
return result;
}
private FileId UpdateOrAddFile(FileArtifact fileArtifact, long length, ContentFlags contentFlags, FileHash hash, int rewriteCount)
{
FileId result = m_packedExecutionBuilder.FileTableBuilder.UpdateOrAdd(
fileArtifact.Path.ToString(PathTable).ToCanonicalizedPath(),
length,
contentFlags,
hash,
rewriteCount);
m_fileIndex[fileArtifact] = result;
return result;
}
/// <summary>
/// A process fingerprint was computed; use the execution data.
/// </summary>
public override void ProcessFingerprintComputed(ProcessFingerprintComputationEventData data)
{
CallSerialized(s_processFingerprintComputedAction, data);
}
=> CallSerialized(ProcessFingerprintComputedInternal, data);
private void ProcessFingerprintComputedInternal(ProcessFingerprintComputationEventData data)
{
if (data.Kind == FingerprintComputationKind.Execution)
{
if (((++m_processedPipCount) % 1000) == 0)
{
Console.WriteLine($"Processed {m_processedPipCount} pips...");
}
}
GetWorkerAnalyzer()?.ProcessFingerprintComputed(data);
m_statistics.ProcessFingerprintComputedEventCount++;
GetConcurrentPipProcessor()?.ProcessFingerprintComputed(data);
}
private static readonly Action<PackedExecutionExporter, PipExecutionDirectoryOutputs> s_pipExecutionDirectoryOutputsAction =
(exporter, data) => exporter.PipExecutionDirectoryOutputsInternal(data);
/// <summary>
/// The directory outputs of a pip are now known; index the directory contents
/// </summary>
/// <param name="data"></param>
public override void PipExecutionDirectoryOutputs(PipExecutionDirectoryOutputs data)
{
CallSerialized(s_pipExecutionDirectoryOutputsAction, data);
}
=> CallSerialized(PipExecutionDirectoryOutputsInternal, data);
private void PipExecutionDirectoryOutputsInternal(PipExecutionDirectoryOutputs data)
{
m_statistics.PipExecutionDirectoryOutputsEventCount++;
foreach (var kvp in data.DirectoryOutputs)
{
DirectoryId directoryId = m_packedExecutionBuilder.DirectoryTableBuilder.GetOrAdd(
kvp.directoryArtifact.Path.ToString(PathTable).ToCanonicalizedPath(),
default,
default);
m_statistics.PipExecutionDirectoryOutputsOutputCount++;
m_pathsToDirectories[kvp.directoryArtifact.Path] = (directoryId, kvp.directoryArtifact);
DirectoryId directoryId = GetOrAddDirectory(kvp.directoryArtifact, data.PipId);
foreach (FileArtifact fileArtifact in kvp.fileArtifactArray)
{
m_statistics.PipExecutionDirectoryOutputsFileCount++;
// The XLG file can wind up constructing a given file instance either here or in
// FileArtifactContentDecided. If it publishes it in both places, that place's entry
// should win, which is why this uses GetOrAdd and the other location uses UpdateOrAdd.
FileId fileId = m_packedExecutionBuilder.FileTableBuilder.GetOrAdd(
fileArtifact.Path.ToString(PathTable).ToCanonicalizedPath(),
default, default, default);
m_pathsToFiles[fileArtifact.Path] = (fileId, fileArtifact);
// should win, which is why this calls GetOrAdd rather than UpdateOrAdd.
FileId fileId = GetOrAddFile(fileArtifact);
m_packedExecutionBuilder.DirectoryContentsBuilder.Add(directoryId, fileId);
// make the index from files up to containing directories
// TODO: when can there be more than one entry in this list?
if (!m_parentOutputDirectory.TryGetValue(fileArtifact.Path, out List<DirectoryArtifact> parents))
if (!m_parentOutputDirectory.TryGetValue(fileArtifact, out List<DirectoryArtifact> parents))
{
parents = new List<DirectoryArtifact>();
m_parentOutputDirectory.Add(fileArtifact.Path, parents);
m_parentOutputDirectory.Add(fileArtifact, parents);
}
parents.Add(kvp.directoryArtifact);
}
}
}
#endregion
/// <summary>Collect pip performance data.</summary>
public override void PipExecutionPerformance(PipExecutionPerformanceEventData data)
=> CallSerialized(PipExecutionPerformanceInternal, data);
/// <summary>
/// Get the WorkerAnalyzer instance for the current worker ID (available as a base field on the analyzer).
/// </summary>
/// <returns></returns>
private WorkerAnalyzer GetWorkerAnalyzer()
internal void PipExecutionPerformanceInternal(PipExecutionPerformanceEventData data)
{
// If GetWorkerAnalyzer() is called while m_workerAnalyzers is empty,
// we presume this is because this is a local BXL execution with no WorkerList.
// So we add one analyzer for the "local" worker.
if (m_workerAnalyzers.Count == 0)
if (data.ExecutionPerformance != null)
{
m_workerAnalyzers.Add(new WorkerAnalyzer(this, "", default(WorkerId)));
}
PipExecutionEntry pipEntry = new PipExecutionEntry(
(Utilities.PackedExecution.PipExecutionLevel)(int)data.ExecutionPerformance.ExecutionLevel,
data.ExecutionPerformance.ExecutionStart,
data.ExecutionPerformance.ExecutionStop,
new WorkerId((int)data.ExecutionPerformance.WorkerId + 1));
if (m_workerAnalyzers.Count == 1)
{
// no choice (local case)
return m_workerAnalyzers[0];
}
m_packedExecutionBuilder.PipExecutionTableBuilder.Add(new P_PipId((int)data.PipId.Value), pipEntry);
return m_workerAnalyzers[(int)CurrentEventWorkerId];
ProcessPipExecutionPerformance processPerformance = data.ExecutionPerformance as ProcessPipExecutionPerformance;
if (processPerformance != null)
{
ProcessPipExecutionEntry processPipEntry = new ProcessPipExecutionEntry(
new IOCounters(
processPerformance.IO.ReadCounters.OperationCount,
processPerformance.IO.ReadCounters.TransferCount,
processPerformance.IO.WriteCounters.OperationCount,
processPerformance.IO.WriteCounters.TransferCount,
processPerformance.IO.OtherCounters.OperationCount,
processPerformance.IO.OtherCounters.TransferCount),
processPerformance.KernelTime,
new MemoryCounters(
processPerformance.MemoryCounters.AverageCommitSizeMb,
processPerformance.MemoryCounters.AverageWorkingSetMb,
processPerformance.MemoryCounters.PeakCommitSizeMb,
processPerformance.MemoryCounters.PeakWorkingSetMb),
processPerformance.NumberOfProcesses,
processPerformance.ProcessExecutionTime,
processPerformance.ProcessorsInPercents,
processPerformance.SuspendedDurationMs,
processPerformance.UserTime);
m_packedExecutionBuilder.ProcessPipExecutionTableBuilder.Add(new P_PipId((int)data.PipId.Value), processPipEntry);
}
}
}
#region Pips
/// <summary>Collect process performance data.</summary>
public override void ProcessExecutionMonitoringReported(ProcessExecutionMonitoringReportedEventData data)
=> CallSerialized(ProcessExecutionMonitoringReportedInternal, data);
private void ProcessExecutionMonitoringReportedInternal(ProcessExecutionMonitoringReportedEventData data)
{
m_statistics.ProcessExecutionMonitoringReportedEventCount++;
if (data.ReportedProcesses != null)
{
m_statistics.ProcessExecutionMonitoringReportedNonNullCount++;
foreach (var reportedProcess in data.ReportedProcesses)
{
m_statistics.ProcessExecutionMonitoringReportedProcessCount++;
ProcessExecutionEntry processEntry = new ProcessExecutionEntry(
reportedProcess.CreationTime,
reportedProcess.ExitCode,
reportedProcess.ExitTime,
new IOCounters(
reportedProcess.IOCounters.ReadCounters.OperationCount,
reportedProcess.IOCounters.ReadCounters.TransferCount,
reportedProcess.IOCounters.WriteCounters.OperationCount,
reportedProcess.IOCounters.WriteCounters.TransferCount,
reportedProcess.IOCounters.OtherCounters.OperationCount,
reportedProcess.IOCounters.OtherCounters.TransferCount),
reportedProcess.KernelTime,
reportedProcess.ParentProcessId,
m_packedExecutionBuilder.FileTableBuilder.PathTableBuilder.GetOrAdd(reportedProcess.Path),
reportedProcess.ProcessId,
reportedProcess.UserTime);
m_packedExecutionBuilder.ProcessExecutionTableBuilder.Add(new P_PipId((int)data.PipId.Value), processEntry);
}
}
}
#endregion
#region Building
private List<PipReference> BuildPips()
{
@ -377,9 +551,6 @@ namespace BuildXL.Scheduler.Tracing
}
}
// ensure the PipExecutionTable is ready to be pipulated (heh)
m_packedExecution.PipExecutionTable.FillToBaseTableCount();
return pipList;
}
@ -401,7 +572,8 @@ namespace BuildXL.Scheduler.Tracing
}
P_PipType pipType = (P_PipType)(int)pip.PipType;
P_PipId g_pipId = pipBuilder.Add(pip.SemiStableHash, pipName, pipType);
long semiStableHash = PipTable.GetPipSemiStableHash(pip.PipId);
P_PipId g_pipId = pipBuilder.Add(semiStableHash, pipName, pipType);
return g_pipId;
}
@ -412,6 +584,9 @@ namespace BuildXL.Scheduler.Tracing
// Since we added all the pips in pipList order to PipTable, we can traverse them again in the same order
// to build the relation.
SpannableList<P_PipId> buffer = new SpannableList<P_PipId>(); // to accumulate the IDs we add to the relation
m_statistics.PipListCount = pipList.Count;
for (int i = 0; i < pipList.Count; i++)
{
IEnumerable<P_PipId> pipDependencies = PipGraph
@ -426,6 +601,8 @@ namespace BuildXL.Scheduler.Tracing
buffer.Clear();
buffer.AddRange(pipDependencies);
m_statistics.PipDependencyCount += buffer.Count;
// don't need to use a builder here, we're adding all dependencies in PipId order
m_packedExecution.PipDependencies.Add(buffer.AsSpan());
}
@ -433,61 +610,68 @@ namespace BuildXL.Scheduler.Tracing
private void BuildProcessPipRelations()
{
int totalProcessPips = 0;
int totalDeclaredInputFiles = 0, totalDeclaredInputDirectories = 0, totalConsumedFiles = 0;
m_concurrentPipProcessor.Complete();
foreach (var worker in m_workerAnalyzers)
foreach (ConcurrentPipProcessor.ProcessPipInfo processPipInfo in m_concurrentPipProcessor.ProcessPipInfoList)
{
Console.WriteLine($"Completing worker {worker.Name}");
worker.Complete();
m_statistics.ProcessPipInfoCount++;
foreach (WorkerAnalyzer.ProcessPipInfo processPipInfo in worker.ProcessPipInfoList)
foreach (FileArtifact declaredInputFile in processPipInfo.DeclaredInputFiles)
{
foreach (AbsolutePath declaredInputFilePath in processPipInfo.DeclaredInputFiles)
m_statistics.DeclaredInputFileCount++;
FileId fileId = GetOrAddFile(declaredInputFile);
m_packedExecutionBuilder.DeclaredInputFilesBuilder.Add(processPipInfo.PipId, fileId);
}
foreach (DirectoryArtifact directoryArtifact in processPipInfo.DeclaredInputDirectories)
{
m_statistics.DeclaredInputDirectoryCount++;
DirectoryId directoryId = GetOrAddDirectory(directoryArtifact, processPipInfo.PipId);
m_packedExecutionBuilder.DeclaredInputDirectoriesBuilder.Add(processPipInfo.PipId, directoryId);
}
foreach (AbsolutePath consumedInputPath in processPipInfo.ConsumedFiles)
{
m_statistics.ConsumedFileCount++;
FileId fileId = GetOrAddFile(consumedInputPath);
if (m_packedExecution.FileTable[fileId].SizeInBytes == 0)
{
m_packedExecutionBuilder.DeclaredInputFilesBuilder.Add(processPipInfo.PipId, m_pathsToFiles[declaredInputFilePath].fileId);
m_statistics.ConsumedFileUnknownSizeCount++;
}
totalDeclaredInputFiles += processPipInfo.DeclaredInputFiles.Count;
foreach (DirectoryArtifact directoryArtifact in processPipInfo.DeclaredInputDirectories)
{
m_packedExecutionBuilder.DeclaredInputDirectoriesBuilder.Add(processPipInfo.PipId, m_pathsToDirectories[directoryArtifact.Path].directoryId);
}
totalDeclaredInputDirectories += processPipInfo.DeclaredInputDirectories.Count;
foreach (AbsolutePath consumedInputPath in processPipInfo.ConsumedFiles)
{
m_packedExecutionBuilder.ConsumedFilesBuilder.Add(processPipInfo.PipId, m_pathsToFiles[consumedInputPath].fileId);
}
totalConsumedFiles += processPipInfo.ConsumedFiles.Count;
m_packedExecution.PipExecutionTable[processPipInfo.PipId] = new PipExecutionEntry(processPipInfo.Worker);
totalProcessPips++;
m_packedExecutionBuilder.ConsumedFilesBuilder.Add(processPipInfo.PipId, fileId);
}
}
m_packedExecutionBuilder.DeclaredInputFilesBuilder.Complete();
m_packedExecutionBuilder.DeclaredInputDirectoriesBuilder.Complete();
m_packedExecutionBuilder.ConsumedFilesBuilder.Complete();
}
/// <summary>
/// Build the FileProducer relation.
/// </summary>
private void BuildFileProducers()
{
// set file producer
m_packedExecution.FileProducer.FillToBaseTableCount();
// Object we lock when checking for duplicate producers
object localLock = new object();
m_statistics.DecidedFileCount = m_decidedFiles.Count;
// set file producers, concurrently.
Parallel.ForEach(
m_decidedFiles,
new ParallelOptions() { MaxDegreeOfParallelism = 1 }, // B4PR: Environment.ProcessorCount },
path =>
new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
fileArtifact =>
{
var tuple = m_pathsToFiles[path];
// try to find a producer of this file
var producer = PipGraph.TryGetProducer(tuple.fileArtifact);
var producer = PipGraph.TryGetProducer(fileArtifact);
if (!producer.IsValid)
{
// it's not a statically declared file, so it must be produced as a part of an output directory
if (m_parentOutputDirectory.TryGetValue(path, out var containingDirectories))
if (m_parentOutputDirectory.TryGetValue(fileArtifact, out var containingDirectories))
{
foreach (var directory in containingDirectories)
{
@ -502,15 +686,24 @@ namespace BuildXL.Scheduler.Tracing
if (producer.IsValid)
{
// Theoretically this is safe if we never update the same file twice with different producers.
// BXL: should we have an interlock to guard against this?
// BXL: can a file have multiple producers legitimately? (PipGraph has some kind of method for getting all producers of a file)
FileEntry entry = m_packedExecution.FileTable[tuple.fileId];
// Now that we know who it was (the time-consuming part), take a lock before checking/modifying the producer table.
lock (localLock)
{
m_statistics.DecidedFileValidProducerCount++;
Contract.Assert(entry.ProducerPip.Equals(default(P_PipId)),
$"Should not have set producer pip {entry.ProducerPip} yet for file {m_packedExecution.PathTable.GetText(entry.Path)}");
FileId fileId = GetOrAddFile(fileArtifact);
m_packedExecution.FileTable[tuple.fileId] = entry.WithProducerPip(new P_PipId((int)producer.Value));
// Theoretically this is safe if we never update the same file twice with different producers.
// BXL: should we have an interlock to guard against this?
// BXL: can a file have multiple producers legitimately? (PipGraph has some kind of method for getting all producers of a file)
P_PipId currentProducer = m_packedExecution.FileProducer[fileId];
if (currentProducer != default)
{
m_statistics.DecidedFileProducerConflictCount++;
}
m_packedExecution.FileProducer[fileId] = new P_PipId((int)producer.Value);
}
}
});
}
@ -523,61 +716,44 @@ namespace BuildXL.Scheduler.Tracing
/// <remarks>
/// Finding the producer pip for a file is evidently expensive and shouldn't be done on the execution log event thread.
/// </remarks>
private class WorkerAnalyzer
private class ConcurrentPipProcessor
{
/// <summary>
/// The information about an executed process pip.
/// </summary>
/// <summary>The information about an executed process pip.</summary>
/// <remarks>
/// TODO: Consider making this less object-y and more columnar... store all the declared input files
/// in just one big list indexed by PipId, etc.
/// </remarks>
public struct ProcessPipInfo
{
/// <summary>
/// The pip ID.
/// </summary>
/// <summary>The pip ID.</summary>
public readonly P_PipId PipId;
/// <summary>
/// Input files declared for the pip.
/// </summary>
public readonly List<AbsolutePath> DeclaredInputFiles;
/// <summary>
/// Input directories declared for the pip.
/// </summary>
public readonly List<DirectoryArtifact> DeclaredInputDirectories;
/// <summary>
/// Consumed files declared for the pip.
/// </summary>
public readonly List<AbsolutePath> ConsumedFiles;
/// <summary>
/// The worker that executed the pip.
/// </summary>
public readonly WorkerId Worker;
/// <summary>The numeric semistable hash.</summary>
public readonly long SemiStableHash;
/// <summary>Input files declared for the pip.</summary>
public readonly ICollection<FileArtifact> DeclaredInputFiles;
/// <summary>Input directories declared for the pip.</summary>
public readonly ICollection<DirectoryArtifact> DeclaredInputDirectories;
/// <summary>Consumed files declared for the pip.</summary>
public readonly ICollection<AbsolutePath> ConsumedFiles;
/// <summary>
/// Construct a ProcessPipInfo.
/// </summary>
public ProcessPipInfo(
P_PipId pipId,
List<AbsolutePath> declaredInputFiles,
List<DirectoryArtifact> declaredInputDirs,
List<AbsolutePath> consumedFiles,
WorkerId worker)
long semiStableHash,
ICollection<FileArtifact> declaredInputFiles,
ICollection<DirectoryArtifact> declaredInputDirs,
ICollection<AbsolutePath> consumedFiles)
{
PipId = pipId;
SemiStableHash = semiStableHash;
DeclaredInputFiles = declaredInputFiles;
DeclaredInputDirectories = declaredInputDirs;
ConsumedFiles = consumedFiles;
Worker = worker;
}
}
/// <summary>
/// ID of this analyzer's worker.
/// </summary>
private readonly WorkerId m_workerId;
/// <summary>
/// The parent exporter.
/// </summary>
@ -601,13 +777,12 @@ namespace BuildXL.Scheduler.Tracing
/// <summary>
/// Construct a WorkerAnalyzer.
/// </summary>
public WorkerAnalyzer(PackedExecutionExporter exporter, string name, WorkerId workerId)
public ConcurrentPipProcessor(PackedExecutionExporter exporter)
{
m_exporter = exporter;
Name = name;
m_workerId = workerId;
m_processingBlock = new ActionBlockSlim<ProcessFingerprintComputationEventData>(1, ProcessFingerprintComputedCore);
m_processingBlock = new ActionBlockSlim<ProcessFingerprintComputationEventData>(
degreeOfParallelism: -1, // default
processItemAction: ProcessFingerprintComputedCore);
}
/// <summary>
@ -626,42 +801,60 @@ namespace BuildXL.Scheduler.Tracing
m_processingBlock.Post(data);
}
private static readonly ICollection<AbsolutePath> s_noPaths = new List<AbsolutePath>();
/// <summary>
/// Really handle the incoming fingerprint computation.
/// </summary>
public void ProcessFingerprintComputedCore(ProcessFingerprintComputationEventData data)
/// <remarks>
/// This is a concurrent method, not a serial method, so beware of shared mutable state.
/// </remarks>
internal void ProcessFingerprintComputedCore(ProcessFingerprintComputationEventData data)
{
var pip = m_exporter.GetPip(data.PipId) as Process;
Contract.Assert(pip != null);
// only interested in the events generated after a corresponding pip was executed
// however, we still need to save pip description so there would be no missing entries in pips.csv
P_PipId packedPipId = new P_PipId((int)data.PipId.Value);
if (data.Kind != FingerprintComputationKind.Execution)
{
return;
}
Interlocked.Increment(ref m_exporter.m_statistics.ProcessFingerprintComputedExecutionCount);
// part 1: collect requested inputs
// count only output files/directories
// TODO: use a builder here? 400K or so objects seems livable though....
var declaredInputFiles = pip.Dependencies.Where(f => f.IsOutputFile).Select(f => f.Path).ToList();
var declaredInputDirs = pip.DirectoryDependencies.Where(d => d.IsOutputDirectory()).ToList();
var packedExecution = m_exporter.m_packedExecution;
var fileTable = packedExecution.FileTable;
var pathsToFiles = m_exporter.m_pathsToFiles;
var declaredInputFiles = pip.Dependencies.ToList();
var declaredInputDirs = pip.DirectoryDependencies.ToList();
// part 2: collect actual inputs
var consumedPaths = data.StrongFingerprintComputations.Count == 0
? new List<AbsolutePath>()
: data.StrongFingerprintComputations[0].ObservedInputs
ICollection<AbsolutePath> consumedPaths = data.StrongFingerprintComputations.Count == 0
? s_noPaths
: data
.StrongFingerprintComputations[0]
.ObservedInputs
.Where(input => input.Type == ObservedInputType.FileContentRead || input.Type == ObservedInputType.ExistingFileProbe)
.Select(input => input.Path)
.Where(path => pathsToFiles.TryGetValue(path, out var tuple) && fileTable[tuple.fileId].SizeInBytes > 0)
.ToList();
P_PipId packedPipId = new P_PipId((int)data.PipId.Value);
ProcessPipInfoList.Add(new ProcessPipInfo(packedPipId, declaredInputFiles, declaredInputDirs, consumedPaths, m_workerId));
Interlocked.Add(
ref m_exporter.m_statistics.ProcessFingerprintComputedStrongFingerprintCount,
data.StrongFingerprintComputations.Count);
Interlocked.Add(
ref m_exporter.m_statistics.ProcessFingerprintComputedConsumedPathCount,
consumedPaths.Count);
lock (ProcessPipInfoList)
{
ProcessPipInfoList.Add(new ProcessPipInfo(
packedPipId,
pip.SemiStableHash,
declaredInputFiles,
declaredInputDirs,
consumedPaths));
}
}
}
#endif

Просмотреть файл

@ -61,6 +61,54 @@ namespace BuildXL.Execution.Analyzer
}
}
/// <summary>
/// Statistics for a given export, for cross-checking against analyzer.
/// </summary>
/// <remarks>
/// The fields here are deliberately all public for ease of calling Interlocked methods with references to any field;
/// not a great pattern in general, but adequate for this purpose.
/// </remarks>
[Newtonsoft.Json.JsonObject(MemberSerialization = Newtonsoft.Json.MemberSerialization.Fields | Newtonsoft.Json.MemberSerialization.OptOut)]
internal class FileConsumptionAnalyzerStatistics
{
/// <summary>Stat</summary>
public int FileArtifactContentDecidedEventCount;
/// <summary>Stat</summary>
public int FileArtifactOutputWithKnownLengthCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedEventCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedExecutionCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedStrongFingerprintCount;
/// <summary>Stat</summary>
public int ProcessFingerprintComputedConsumedPathCount;
/// <summary>Stat</summary>
public int PipExecutionDirectoryOutputsEventCount;
/// <summary>Stat</summary>
public int PipExecutionDirectoryOutputsOutputCount;
/// <summary>Stat</summary>
public int PipExecutionDirectoryOutputsFileCount;
/// <summary>Stat</summary>
public int PipCachedCount;
/// <summary>Stat</summary>
public int ProcessPipInfoCount;
/// <summary>Stat</summary>
public int DeclaredInputFileCount;
/// <summary>Stat</summary>
public int DeclaredInputDirectoryCount;
/// <summary>Stat</summary>
public int ConsumedFileCount;
/// <summary>Stat</summary>
public int ConsumedFileUnknownSizeCount;
/// <summary>Stat</summary>
public int DecidedFileCount;
/// <summary>Stat</summary>
public int DecidedFileValidProducerCount;
/// <summary>Stat</summary>
public int DecidedFileProducerConflictCount;
}
internal sealed class FileConsumptionAnalyzer : Analyzer
{
/// <summary>
@ -96,6 +144,8 @@ namespace BuildXL.Execution.Analyzer
/// </summary>
public string OutputDirectoryPath;
private readonly FileConsumptionAnalyzerStatistics m_statistics = new FileConsumptionAnalyzerStatistics();
public override bool CanHandleWorkerEvents => true;
public FileConsumptionAnalyzer(AnalysisInput input)
@ -131,8 +181,12 @@ namespace BuildXL.Execution.Analyzer
public override void FileArtifactContentDecided(FileArtifactContentDecidedEventData data)
{
m_statistics.FileArtifactContentDecidedEventCount++;
if (data.FileArtifact.IsOutputFile && data.FileContentInfo.HasKnownLength)
{
m_statistics.FileArtifactOutputWithKnownLengthCount++;
if (!m_fileSizes.ContainsKey(data.FileArtifact.Path))
{
m_fileSizes.Add(data.FileArtifact.Path, data.FileContentInfo.Length);
@ -154,9 +208,11 @@ namespace BuildXL.Execution.Analyzer
public override void ProcessFingerprintComputed(ProcessFingerprintComputationEventData data)
{
m_statistics.ProcessFingerprintComputedEventCount++;
if (data.Kind == FingerprintComputationKind.Execution)
{
if ((Interlocked.Increment(ref m_processedPips) % 1000) == 0)
if ((m_processedPips++ % 1000) == 0)
{
Console.WriteLine($"Processing {m_processedPips}");
}
@ -167,12 +223,18 @@ namespace BuildXL.Execution.Analyzer
public override void PipExecutionDirectoryOutputs(PipExecutionDirectoryOutputs data)
{
m_statistics.PipExecutionDirectoryOutputsEventCount++;
foreach (var kvp in data.DirectoryOutputs)
{
m_statistics.PipExecutionDirectoryOutputsOutputCount++;
var paths = kvp.fileArtifactArray.Select(fa => fa.Path).ToList();
m_dynamicDirectoryContent[kvp.directoryArtifact] = paths;
foreach (var path in paths)
{
m_statistics.PipExecutionDirectoryOutputsFileCount++;
m_parentOutputDirectory.Add(path, kvp.directoryArtifact);
}
}
@ -189,9 +251,11 @@ namespace BuildXL.Execution.Analyzer
int totalDeclaredInputFiles = 0, totalDeclaredInputDirectories = 0, totalConsumedFiles = 0, totalActualProcessPips = 0;
m_statistics.ProcessPipInfoCount = m_executedProcessPips.Count;
Parallel.ForEach(
m_executedProcessPips.Keys,
new ParallelOptions() { MaxDegreeOfParallelism = 1 },//Environment.ProcessorCount },
new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
pipId =>
{
var pip = m_executedProcessPips[pipId];
@ -200,9 +264,12 @@ namespace BuildXL.Execution.Analyzer
if (pip.Worker == null)
{
Interlocked.Increment(ref m_statistics.PipCachedCount);
// this pip was not executed (i.e., cache hit)
pip.ConsumedInputSize = -1;
pip.DeclaredInputSize = -1;
return;
}
@ -214,6 +281,8 @@ namespace BuildXL.Execution.Analyzer
var inputFiles = pooledSetInputFiles.Instance;
var consumedFiles = pooledSetConsumedFiles.Instance;
Interlocked.Add(ref m_statistics.DeclaredInputFileCount, pip.DeclaredInputFiles.Count);
// BXL executed this pip, so we can safely assume that its inputs were materialized.
foreach (var path in pip.DeclaredInputFiles)
{
@ -226,6 +295,8 @@ namespace BuildXL.Execution.Analyzer
}
totalDeclaredInputFiles += pip.DeclaredInputFiles.Count;
Interlocked.Add(ref m_statistics.DeclaredInputFileCount, pip.DeclaredInputDirectories.Count);
foreach (var directoryArtifact in pip.DeclaredInputDirectories)
{
if (m_dynamicDirectoryContent.TryGetValue(directoryArtifact, out var directoryContent))
@ -243,6 +314,8 @@ namespace BuildXL.Execution.Analyzer
}
totalDeclaredInputDirectories += pip.DeclaredInputDirectories.Count;
Interlocked.Add(ref m_statistics.ConsumedFileCount, pip.ConsumedFiles.Count);
foreach (var path in pip.ConsumedFiles)
{
if (m_fileSizes.TryGetValue(path, out var size))
@ -261,6 +334,10 @@ namespace BuildXL.Execution.Analyzer
});
}
}
else
{
Interlocked.Increment(ref m_statistics.ConsumedFileUnknownSizeCount);
}
}
totalConsumedFiles += pip.ConsumedFiles.Count;
@ -279,6 +356,8 @@ namespace BuildXL.Execution.Analyzer
Console.WriteLine($"FileConsumptionAnalyzer: Analyzed {totalActualProcessPips} executed process pips ({totalDeclaredInputFiles} declared input files, {totalDeclaredInputDirectories} declared input directories, {totalConsumedFiles} consumed files) at {DateTime.Now}.");
m_statistics.DecidedFileCount = m_producedFiles.Count;
// set file producer
Parallel.ForEach(
m_producedFiles.Keys,
@ -305,6 +384,16 @@ namespace BuildXL.Execution.Analyzer
}
}
if (producer.IsValid)
{
Interlocked.Increment(ref m_statistics.DecidedFileValidProducerCount);
}
if (outputFile.Producer.IsValid)
{
Interlocked.Increment(ref m_statistics.DecidedFileProducerConflictCount);
}
outputFile.Producer = producer;
});
@ -316,6 +405,10 @@ namespace BuildXL.Execution.Analyzer
worker.Complete();
}
File.WriteAllText(
Path.Combine(OutputDirectoryPath, "statistics.json"),
Newtonsoft.Json.JsonConvert.SerializeObject(m_statistics, Newtonsoft.Json.Formatting.Indented));
const int MaxConsumers = 10;
m_writerFiles.WriteLine($"PathId,Path,Size,Producer,Consumers,Hash");
foreach (var path in m_producedFiles.Keys.ToListSorted(PathTable.ExpandedPathComparer))

Просмотреть файл

@ -1,37 +1,66 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics.ContractsLight;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>
/// Boilerplate ID type to avoid ID confusion in code.
/// IDs of directories; corresponds to BuildXL DirectoryArtifact.
/// </summary>
public struct DirectoryId : Id<DirectoryId>, IEqualityComparer<DirectoryId>
#pragma warning disable CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
#pragma warning disable CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
public struct DirectoryId : Id<DirectoryId>
#pragma warning restore CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
#pragma warning restore CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
{
/// <summary>Value as int.</summary>
public readonly int Value;
/// <summary>Constructor.</summary>
public DirectoryId(int value) { Id<StringId>.CheckNotZero(value); Value = value; }
/// <summary>Eliminator.</summary>
public int FromId() => Value;
/// <summary>Introducer.</summary>
public DirectoryId ToId(int value) => new DirectoryId(value);
/// <summary>Debugging.</summary>
/// <nodoc />
public readonly struct EqualityComparer : IEqualityComparer<DirectoryId>
{
/// <nodoc />
public bool Equals(DirectoryId x, DirectoryId y) => x.Value == y.Value;
/// <nodoc />
public int GetHashCode(DirectoryId obj) => obj.Value;
}
private readonly int m_value;
/// <nodoc />
public int Value => m_value;
/// <nodoc />
public DirectoryId(int value)
{
Id<DirectoryId>.CheckValidId(value);
m_value = value;
}
/// <nodoc />
public DirectoryId CreateFrom(int value) => new(value);
/// <nodoc />
public override string ToString() => $"DirectoryId[{Value}]";
/// <summary>Comparison.</summary>
public bool Equals([AllowNull] DirectoryId x, [AllowNull] DirectoryId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode([DisallowNull] DirectoryId obj) => obj.Value;
/// <nodoc />
public static bool operator ==(DirectoryId x, DirectoryId y) => x.Value == y.Value;
/// <nodoc />
public static bool operator !=(DirectoryId x, DirectoryId y) => !(x == y);
/// <nodoc />
public IEqualityComparer<DirectoryId> Comparer => default(EqualityComparer);
/// <nodoc />
public int CompareTo([AllowNull] DirectoryId other) => Value.CompareTo(other.Value);
}
/// <summary>
/// Information about a single file.
/// Information about a single directory.
/// </summary>
public struct DirectoryEntry
public readonly struct DirectoryEntry
{
/// <summary>
/// The directory path.
@ -45,37 +74,69 @@ namespace BuildXL.Utilities.PackedExecution
/// The content flags for this directory.
/// </summary>
public readonly ContentFlags ContentFlags;
/// <summary>
/// This corresponds exactly to the (internal) DirectoryArtifact field with the same name.
/// </summary>
public readonly uint IsSharedOpaquePlusPartialSealId;
private const byte IsSharedOpaqueShift = 31;
private const uint IsSharedOpaqueBit = 1U << IsSharedOpaqueShift;
private const uint PartialSealIdMask = (1U << IsSharedOpaqueShift) - 1;
/// <summary>
/// Construct a DirectoryEntry.
/// </summary>
public DirectoryEntry(NameId path, PipId producerPip, ContentFlags contentFlags)
{
public DirectoryEntry(NameId path, PipId producerPip, ContentFlags contentFlags, bool isSharedOpaque, uint partialSealId)
{
Contract.Requires(!isSharedOpaque || (partialSealId > 0), "A shared opaque directory should always have a proper seal id");
Contract.Requires((partialSealId & ~PartialSealIdMask) == 0, "The most significant bit of a partial seal id should not be used");
Path = path;
ProducerPip = producerPip;
ContentFlags = contentFlags;
IsSharedOpaquePlusPartialSealId = partialSealId | (isSharedOpaque ? IsSharedOpaqueBit : 0);
}
/// <summary>
/// Construct a DirectoryEntry with an already-encoded partial seal field.
/// </summary>
public DirectoryEntry(NameId path, PipId producerPip, ContentFlags contentFlags, uint isSharedOpaquePlusPartialSealId)
{
Path = path;
ProducerPip = producerPip;
ContentFlags = contentFlags;
IsSharedOpaquePlusPartialSealId = isSharedOpaquePlusPartialSealId;
}
/// <summary>
/// Construct a DirectoryEntry with replaced content flags.
/// </summary>
public DirectoryEntry WithContentFlags(ContentFlags contentFlags) { return new DirectoryEntry(Path, ProducerPip, contentFlags); }
public DirectoryEntry WithContentFlags(ContentFlags contentFlags)
=> new DirectoryEntry(Path, ProducerPip, contentFlags, IsSharedOpaquePlusPartialSealId);
/// <summary>
/// Equality comparison.
/// The unique id for partially sealed directories
/// </summary>
public uint PartialSealId => IsSharedOpaquePlusPartialSealId & PartialSealIdMask;
/// <summary>
/// Whether this directory represents a shared opaque directory
/// </summary>
public bool IsSharedOpaque => (IsSharedOpaquePlusPartialSealId & IsSharedOpaqueBit) != 0;
/// <nodoc />
public struct EqualityComparer : IEqualityComparer<DirectoryEntry>
{
/// <summary>
/// Equality.
/// </summary>
public bool Equals(DirectoryEntry x, DirectoryEntry y) => x.Path.Equals(y.Path);
/// <summary>
/// Hashing.
/// </summary>
/// <param name="obj"></param>
/// <returns></returns>
public int GetHashCode([DisallowNull] DirectoryEntry obj) => obj.Path.GetHashCode();
/// <nodoc />
public bool Equals(DirectoryEntry x, DirectoryEntry y)
=> x.Path.Equals(y.Path)
&& x.IsSharedOpaquePlusPartialSealId == y.IsSharedOpaquePlusPartialSealId;
/// <nodoc />
public int GetHashCode([DisallowNull] DirectoryEntry obj)
=> obj.Path.GetHashCode() ^ obj.IsSharedOpaquePlusPartialSealId.GetHashCode();
}
}
@ -88,10 +149,10 @@ namespace BuildXL.Utilities.PackedExecution
public class DirectoryTable : SingleValueTable<DirectoryId, DirectoryEntry>
{
/// <summary>
/// The names of files in this DirectoryTable.
/// The pathnames of directories in this DirectoryTable.
/// </summary>
/// <remarks>
/// This sub-table is owned by this DirectoryTable; the DirectoryTable constructs it, and saves and loads it.
/// This table is shared between this table and the FileTable.
/// </remarks>
public readonly NameTable PathTable;
@ -129,9 +190,19 @@ namespace BuildXL.Utilities.PackedExecution
/// The only time that value can be set is when adding a new file not previously recorded.
/// TODO: consider failing if this happens?
/// </remarks>
public DirectoryId GetOrAdd(string directoryPath, PipId producerPip, ContentFlags contentFlags)
public DirectoryId GetOrAdd(
string directoryPath,
PipId producerPip,
ContentFlags contentFlags,
bool isSharedOpaque,
uint partialSealId)
{
DirectoryEntry entry = new DirectoryEntry(PathTableBuilder.GetOrAdd(directoryPath), producerPip, contentFlags);
DirectoryEntry entry = new DirectoryEntry(
PathTableBuilder.GetOrAdd(directoryPath),
producerPip,
contentFlags,
isSharedOpaque,
partialSealId);
return GetOrAdd(entry);
}
}

Просмотреть файл

@ -1,89 +1,140 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>
/// Boilerplate ID type to avoid ID confusion in code.
/// IDs of files; corresponds to BuildXL FileArtifact.
/// </summary>
public struct FileId : Id<FileId>, IEqualityComparer<FileId>
#pragma warning disable CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
#pragma warning disable CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
public readonly struct FileId : Id<FileId>
#pragma warning restore CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
#pragma warning restore CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
{
/// <nodoc />
public struct EqualityComparer : IEqualityComparer<FileId>
{
/// <nodoc />
public bool Equals(FileId x, FileId y) => x.Value == y.Value;
/// <nodoc />
public int GetHashCode(FileId obj) => obj.Value;
}
private readonly int m_value;
/// <summary>Value as int.</summary>
public readonly int Value;
/// <summary>Constructor.</summary>
public FileId(int value) { Id<FileId>.CheckNotZero(value); Value = value; }
/// <summary>Eliminator.</summary>
public int FromId() => Value;
/// <summary>Introducer.</summary>
public FileId ToId(int value) => new FileId(value);
/// <summary>Debugging.</summary>
public int Value => m_value;
/// <nodoc />
public FileId(int value)
{
Id<FileId>.CheckValidId(value);
m_value = value;
}
/// <nodoc />
public FileId CreateFrom(int value) => new(value);
/// <nodoc />
public override string ToString() => $"FileId[{Value}]";
/// <summary>Comparison.</summary>
public bool Equals(FileId x, FileId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(FileId obj) => obj.Value;
/// <nodoc />
public static bool operator ==(FileId x, FileId y) => x.Value == y.Value;
/// <nodoc />
public static bool operator !=(FileId x, FileId y) => !(x == y);
/// <nodoc />
public IEqualityComparer<FileId> Comparer => default(EqualityComparer);
/// <nodoc />
public int CompareTo([AllowNull] FileId other) => Value.CompareTo(other.Value);
}
/// <summary>
/// 256-bit (max) file hash, encoded as four ulongs.
/// </summary>
/// <remarks>
/// It appears that the VSO0 33-byte hash actually has zero as the last byte almost all the time, so 32 bytes
/// seems adequate in practice.
/// </remarks>
public readonly struct FileHash
{
/// <summary>First 64 bits of hash.</summary>
public readonly ulong Data0;
/// <summary>Second 64 bits of hash.</summary>
public readonly ulong Data1;
/// <summary>Third 64 bits of hash.</summary>
public readonly ulong Data2;
/// <summary>Fourth 64 bits of hash.</summary>
public readonly ulong Data3;
/// <summary>Construct a FileHash from a ulong[4] array.</summary>
public FileHash(ulong[] hashBuffer)
{
Data0 = hashBuffer[0];
Data1 = hashBuffer[1];
Data2 = hashBuffer[2];
Data3 = hashBuffer[3];
}
}
/// <summary>
/// Information about a single file.
/// </summary>
public struct FileEntry
public readonly struct FileEntry
{
/// <summary>
/// The file's path.
/// </summary>
/// <summary>The file's path.</summary>
/// <remarks>
/// Since paths are long hierarchical names with lots of sharing with other paths, we use a
/// NameTable to store them, and hence the path is identified by NameId.
/// </remarks>
public readonly NameId Path;
/// <summary>
/// File size in bytes.
/// </summary>
/// <summary>File size in bytes.</summary>
public readonly long SizeInBytes;
/// <summary>
/// The pip that produced the file.
/// </summary>
public readonly PipId ProducerPip;
/// <summary>
/// The file's content flags.
/// </summary>
/// <summary>The file's content flags.</summary>
public readonly ContentFlags ContentFlags;
/// <summary>The file's content hash.</summary>
public readonly FileHash Hash;
/// <summary>The file's rewrite count (see </summary>
public readonly int RewriteCount;
/// <summary>
/// Construct a FileEntry.
/// </summary>
public FileEntry(NameId name, long sizeInBytes, PipId producerPip, ContentFlags contentFlags)
public FileEntry(NameId name, long sizeInBytes, ContentFlags contentFlags, FileHash hash, int rewriteCount)
{
Path = name;
SizeInBytes = sizeInBytes;
ProducerPip = producerPip;
ContentFlags = contentFlags;
Hash = hash;
RewriteCount = rewriteCount;
}
/// <summary>
/// Construct a new FileEntry with a new producer pip.
/// Create a clone of this FileEntry with updated content flags.
/// </summary>
public FileEntry WithProducerPip(PipId producerPip) { return new FileEntry(Path, SizeInBytes, producerPip, ContentFlags); }
/// <summary>
/// Construct a new FileEntry with new content flags.
/// </summary>
public FileEntry WithContentFlags(ContentFlags contentFlags) { return new FileEntry(Path, SizeInBytes, ProducerPip, contentFlags); }
public FileEntry WithContentFlags(ContentFlags contentFlags)
=> new FileEntry(Path, SizeInBytes, contentFlags, Hash, RewriteCount);
/// <summary>
/// Equality comparison.
/// Equality comparison (based on path, not hash).
/// </summary>
public struct EqualityComparer : IEqualityComparer<FileEntry>
{
/// <summary>
/// Equality.
/// </summary>
public bool Equals(FileEntry x, FileEntry y) => x.Path.Equals(y.Path);
public bool Equals(FileEntry x, FileEntry y) => x.Path == y.Path && x.RewriteCount == y.RewriteCount;
/// <summary>
/// Hashing.
/// </summary>
public int GetHashCode([DisallowNull] FileEntry obj) => obj.Path.GetHashCode();
public int GetHashCode([DisallowNull] FileEntry obj) => obj.Path.GetHashCode() ^ obj.RewriteCount.GetHashCode();
}
}
@ -96,7 +147,7 @@ namespace BuildXL.Utilities.PackedExecution
public class FileTable : SingleValueTable<FileId, FileEntry>
{
/// <summary>
/// The names of files in this FileTable.
/// The pathnames of files in this FileTable.
/// </summary>
/// <remarks>
/// This table is shared between this table and the DirectoryTable.
@ -133,9 +184,6 @@ namespace BuildXL.Utilities.PackedExecution
bool eitherMaterializedFromCache = ((oldFlags & ContentFlags.MaterializedFromCache) != 0 || (newFlags & ContentFlags.MaterializedFromCache) != 0);
bool eitherMaterialized = ((oldFlags & ContentFlags.Materialized) != 0 || (newFlags & ContentFlags.Materialized) != 0);
// System should never tell us the file was both produced and materialized from cache
//Contract.Assert(!(eitherProduced && eitherMaterializedFromCache));
return newEntry.WithContentFlags(
eitherProduced
? ContentFlags.Produced
@ -158,30 +206,35 @@ namespace BuildXL.Utilities.PackedExecution
/// Get or add an entry for the given file path.
/// </summary>
/// <remarks>
/// If the entry already exists, the sizeInBytes value passed here will be ignored!
/// The only time that value can be set is when adding a new file not previously recorded.
/// TODO: consider failing if this happens?
/// If the entry already exists, its existing data will be retained, and the arguments passed here
/// will be ignored.
/// </remarks>
public FileId GetOrAdd(string filePath, long sizeInBytes, PipId producerPip, ContentFlags contentFlags)
public FileId GetOrAdd(string filePath, long sizeInBytes, ContentFlags contentFlags, FileHash hash, int rewriteCount)
{
FileEntry entry = new FileEntry(
PathTableBuilder.GetOrAdd(filePath),
sizeInBytes,
producerPip,
contentFlags);
contentFlags,
hash,
rewriteCount);
return GetOrAdd(entry);
}
/// <summary>
/// Update or add an entry for the given file path.
/// </summary>
public FileId UpdateOrAdd(string filePath, long sizeInBytes, PipId producerPip, ContentFlags contentFlags)
/// <remarks>
/// If the entry already exists, its content flags will be merged with these, and the other file attributes
/// will be updated to the values passed here.
/// </remarks>
public FileId UpdateOrAdd(string filePath, long sizeInBytes, ContentFlags contentFlags, FileHash hash, int rewriteCount)
{
FileEntry entry = new FileEntry(
PathTableBuilder.GetOrAdd(filePath),
sizeInBytes,
producerPip,
contentFlags);
contentFlags,
hash,
rewriteCount);
return UpdateOrAdd(entry, m_mergeFunc);
}
}

Просмотреть файл

@ -0,0 +1,39 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>I/O counters for both process pips and individual processes.</summary>
public readonly struct IOCounters
{
/// <nodoc/>
public readonly ulong ReadOperationCount;
/// <nodoc/>
public readonly ulong ReadByteCount;
/// <nodoc/>
public readonly ulong WriteOperationCount;
/// <nodoc/>
public readonly ulong WriteByteCount;
/// <nodoc/>
public readonly ulong OtherOperationCount;
/// <nodoc/>
public readonly ulong OtherByteCount;
/// <nodoc/>
public IOCounters(
ulong readOpCount,
ulong readByteCount,
ulong writeOpCount,
ulong writeByteCount,
ulong otherOpCount,
ulong otherByteCount)
{
ReadOperationCount = readOpCount;
ReadByteCount = readByteCount;
WriteOperationCount = writeOpCount;
WriteByteCount = writeByteCount;
OtherOperationCount = otherOpCount;
OtherByteCount = otherByteCount;
}
}
}

Просмотреть файл

@ -0,0 +1,34 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>Memory counters for process pips.</summary>
public readonly struct MemoryCounters
{
/// <summary>Average commit size (in MB) considering all processes.</summary>
public readonly int AverageCommitSizeMb;
/// <summary>Average working set (in MB) considering all processes.</summary>
public readonly int AverageWorkingSetMb;
/// <summary>Peak commit size (in MB) considering all processes (highest point-in-time sum of the memory usage of the process tree).</summary>
public readonly int PeakCommitSizeMb;
/// <summary>Peak working set (in MB) considering all processes (highest point-in-time sum of the memory usage of the process tree).</summary>
public readonly int PeakWorkingSetMb;
/// <summary>Construct IOCounters.</summary>
public MemoryCounters(
int avgCommit,
int avgWorking,
int peakCommit,
int peakWorking)
{
AverageCommitSizeMb = avgCommit;
AverageWorkingSetMb = avgWorking;
PeakCommitSizeMb = peakCommit;
PeakWorkingSetMb = peakWorking;
}
}
}

Просмотреть файл

@ -1,8 +1,8 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using System.IO;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
@ -18,81 +18,55 @@ namespace BuildXL.Utilities.PackedExecution
{
#region Tables
/// <summary>
/// The directories.
/// </summary>
/// <summary>The directories.</summary>
public readonly DirectoryTable DirectoryTable;
/// <summary>
/// The files.
/// </summary>
/// <summary>The files.</summary>
public readonly FileTable FileTable;
/// <summary>
/// The paths.
/// </summary>
/// <remarks>
/// Shared by FileTable and DirectoryTable.
/// </remarks>
/// <summary>The paths.</summary>
/// <remarks>Shared by FileTable and DirectoryTable.</remarks>
public readonly NameTable PathTable;
/// <summary>
/// The pips.
/// </summary>
/// <summary>The pips.</summary>
public readonly PipTable PipTable;
/// <summary>
/// The pip executions.
/// </summary>
/// <remarks>
/// Currently this is stored sparsely -- most entries will be empty (uninitialized), since most pips are not
/// process pips (and if they are, may not get executed).
///
/// TODO: keep an eye on space usage here, and either support sparse derived tables, or make this data its own
/// base table and add a joining relation to the pip table.
/// </remarks>
/// <summary>The pip executions.</summary>
public readonly PipExecutionTable PipExecutionTable;
/// <summary>
/// The strings.
/// </summary>
/// <remarks>
/// Shared by everything that contains strings (mainly PathTable).
/// </remarks>
/// <summary>The process executions.</summary>
public readonly ProcessExecutionTable ProcessExecutionTable;
/// <summary>The process pip executions.</summary>
public readonly ProcessPipExecutionTable ProcessPipExecutionTable;
/// <summary>The strings.</summary>
/// <remarks>Shared by everything that contains strings (mainly PathTable and PipTable.PipNameTable).</remarks>
public readonly PackedTable.StringTable StringTable;
/// <summary>
/// The workers.
/// </summary>
/// <summary>The workers.</summary>
public readonly WorkerTable WorkerTable;
#endregion
#region Relations
/// <summary>
/// The produced file relation (from executed pips towards the files they produced).
/// </summary>
/// <summary>The file producer relation (from each file to the single pip that produced it).</summary>
public SingleValueTable<FileId, PipId> FileProducer { get; private set; }
/// <summary>The consumed file relation (from executed pips to the files they consumed).</summary>
public RelationTable<PipId, FileId> ConsumedFiles { get; private set; }
/// <summary>
/// The static input directory relation (from executed pips towards their statically declared input directories).
/// </summary>
/// <summary>The static input directory relation (from executed pips to their statically declared input directories).</summary>
public RelationTable<PipId, DirectoryId> DeclaredInputDirectories { get; private set; }
/// <summary>
/// The static input file relation (from executed pips towards their statically declared input files).
/// </summary>
/// <summary>The static input file relation (from executed pips to their statically declared input files).</summary>
public RelationTable<PipId, FileId> DeclaredInputFiles { get; private set; }
/// <summary>
/// The directory contents relation (from directories towards the files they contain).
/// </summary>
/// <summary>The directory contents relation (from directories to the files they contain).</summary>
public RelationTable<DirectoryId, FileId> DirectoryContents { get; private set; }
/// <summary>
/// The pip dependency relation (from the dependent pip, towards the dependency pip).
/// </summary>
/// <summary>The pip dependency relation (from the dependent pip, to the dependency pip).</summary>
public RelationTable<PipId, PipId> PipDependencies { get; private set; }
#endregion
@ -113,6 +87,8 @@ namespace BuildXL.Utilities.PackedExecution
FileTable = new FileTable(PathTable);
PipTable = new PipTable(StringTable);
PipExecutionTable = new PipExecutionTable(PipTable);
ProcessExecutionTable = new ProcessExecutionTable(PipTable);
ProcessPipExecutionTable = new ProcessPipExecutionTable(PipTable);
WorkerTable = new WorkerTable(StringTable);
}
@ -121,22 +97,22 @@ namespace BuildXL.Utilities.PackedExecution
private static readonly string s_pathTableFileName = $"{nameof(PathTable)}.bin";
private static readonly string s_pipTableFileName = $"{nameof(PipTable)}.bin";
private static readonly string s_pipExecutionTableFileName = $"{nameof(PipExecutionTable)}.bin";
private static readonly string s_processExecutionTableFileName = $"{nameof(ProcessExecutionTable)}.bin";
private static readonly string s_processPipExecutionTableFileName = $"{nameof(ProcessPipExecutionTable)}.bin";
private static readonly string s_stringTableFileName = $"{nameof(StringTable)}.bin";
private static readonly string s_workerTableFileName = $"{nameof(WorkerTable)}.bin";
private static readonly string s_fileProducerFileName = $"{nameof(FileProducer)}.bin";
private static readonly string s_consumedFilesFileName = $"{nameof(ConsumedFiles)}.bin";
private static readonly string s_declaredInputDirectoriesFileName = $"{nameof(DeclaredInputDirectories)}.bin";
private static readonly string s_declaredInputFilesFileName = $"{nameof(DeclaredInputFiles)}.bin";
private static readonly string s_directoryContentsFileName = $"{nameof(DirectoryContents)}.bin";
private static readonly string s_pipDependenciesFileName = $"{nameof(PipDependencies)}.bin";
/// <summary>
/// After the base tables are populated, construct the (now properly sized) relation tables.
/// </summary>
/// <summary>After the base tables are populated, construct the (now properly sized) relation tables.</summary>
public void ConstructRelationTables()
{
//System.Diagnostics.ContractsLight.Contract.Requires(ConsumedFiles == null, "Must only construct relation tables once");
FileProducer = new SingleValueTable<FileId, PipId>(FileTable);
ConsumedFiles = new RelationTable<PipId, FileId>(PipTable, FileTable);
DeclaredInputDirectories = new RelationTable<PipId, DirectoryId>(PipTable, DirectoryTable);
DeclaredInputFiles = new RelationTable<PipId, FileId>(PipTable, FileTable);
@ -144,9 +120,7 @@ namespace BuildXL.Utilities.PackedExecution
PipDependencies = new RelationTable<PipId, PipId>(PipTable, PipTable);
}
/// <summary>
/// Save the whole data set as a series of files in the given directory.
/// </summary>
/// <summary>Save all tables to the given directory.</summary>
public void SaveToDirectory(string directory)
{
DirectoryTable.SaveToFile(directory, s_directoryTableFileName);
@ -154,9 +128,12 @@ namespace BuildXL.Utilities.PackedExecution
PathTable.SaveToFile(directory, s_pathTableFileName);
PipTable.SaveToFile(directory, s_pipTableFileName);
PipExecutionTable.SaveToFile(directory, s_pipExecutionTableFileName);
ProcessExecutionTable.SaveToFile(directory, s_processExecutionTableFileName);
ProcessPipExecutionTable.SaveToFile(directory, s_processPipExecutionTableFileName);
StringTable.SaveToFile(directory, s_stringTableFileName);
WorkerTable.SaveToFile(directory, s_workerTableFileName);
FileProducer?.SaveToFile(directory, s_fileProducerFileName);
ConsumedFiles?.SaveToFile(directory, s_consumedFilesFileName);
DeclaredInputDirectories?.SaveToFile(directory, s_declaredInputDirectoriesFileName);
DeclaredInputFiles?.SaveToFile(directory, s_declaredInputFilesFileName);
@ -164,9 +141,7 @@ namespace BuildXL.Utilities.PackedExecution
PipDependencies?.SaveToFile(directory, s_pipDependenciesFileName);
}
/// <summary>
/// Load the whole data set from a series of files in the given directory.
/// </summary>
/// <summary>Load all tables from the given directory.</summary>
public void LoadFromDirectory(string directory)
{
DirectoryTable.LoadFromFile(directory, s_directoryTableFileName);
@ -174,11 +149,18 @@ namespace BuildXL.Utilities.PackedExecution
PathTable.LoadFromFile(directory, s_pathTableFileName);
PipTable.LoadFromFile(directory, s_pipTableFileName);
PipExecutionTable.LoadFromFile(directory, s_pipExecutionTableFileName);
ProcessExecutionTable.LoadFromFile(directory, s_processExecutionTableFileName);
ProcessPipExecutionTable.LoadFromFile(directory, s_processPipExecutionTableFileName);
StringTable.LoadFromFile(directory, s_stringTableFileName);
WorkerTable.LoadFromFile(directory, s_workerTableFileName);
ConstructRelationTables();
if (File.Exists(Path.Combine(directory, s_fileProducerFileName)))
{
FileProducer.LoadFromFile(directory, s_fileProducerFileName);
}
loadRelationTableIfExists(directory, s_consumedFilesFileName, ConsumedFiles);
loadRelationTableIfExists(directory, s_declaredInputDirectoriesFileName, DeclaredInputDirectories);
loadRelationTableIfExists(directory, s_declaredInputFilesFileName, DeclaredInputFiles);
@ -196,68 +178,41 @@ namespace BuildXL.Utilities.PackedExecution
}
}
/// <summary>
/// Build an entire PackedExecution by collecting all the builders for each piece.
/// </summary>
/// <summary>Build a PackedExecution (by providing builders for all its tables).</summary>
public class Builder
{
/// <summary>
/// The PackedExecution being built.
/// </summary>
/// <summary>The full data set.</summary>
public readonly PackedExecution PackedExecution;
/// <summary>
/// Builder for DirectoryTable.
/// </summary>
/// <nodoc />
public readonly DirectoryTable.CachingBuilder DirectoryTableBuilder;
/// <summary>
/// Builder for FileTable.
/// </summary>
/// <nodoc />
public readonly FileTable.CachingBuilder FileTableBuilder;
/// <summary>
/// Builder for PathTable.
/// </summary>
/// <nodoc />
public readonly NameTable.Builder PathTableBuilder;
/// <summary>
/// Builder for PipTable.
/// </summary>
/// <nodoc />
public readonly PipTable.Builder PipTableBuilder;
// There is deliberately no PipExecutionTableBuilder; just call FillToBaseTableCount on it and then set values in it.
/// <summary>
/// Builder for StringTable..
/// </summary>
/// <nodoc />
public readonly PipExecutionTable.Builder<PipExecutionTable> PipExecutionTableBuilder;
/// <nodoc />
public readonly ProcessExecutionTable.Builder<ProcessExecutionTable> ProcessExecutionTableBuilder;
/// <nodoc />
public readonly ProcessPipExecutionTable.Builder<ProcessPipExecutionTable> ProcessPipExecutionTableBuilder;
/// <nodoc />
public readonly PackedTable.StringTable.CachingBuilder StringTableBuilder;
/// <summary>
/// Builder for WOrkerTable.
/// </summary>
/// <nodoc />
public readonly WorkerTable.CachingBuilder WorkerTableBuilder;
/// <summary>
/// Builder for ConsumedFiles relation.
/// </summary>
/// <nodoc />
public readonly RelationTable<PipId, FileId>.Builder ConsumedFilesBuilder;
/// <summary>
/// Builder for DeclaredInputDirectories relation.
/// </summary>
/// <nodoc />
public readonly RelationTable<PipId, DirectoryId>.Builder DeclaredInputDirectoriesBuilder;
/// <summary>
/// Builder for DeclaredInputFiles relation.
/// </summary>
/// <nodoc />
public readonly RelationTable<PipId, FileId>.Builder DeclaredInputFilesBuilder;
/// <summary>
/// Builder for DirectoryContents relation.
/// </summary>
/// <nodoc />
public readonly RelationTable<DirectoryId, FileId>.Builder DirectoryContentsBuilder;
/// <summary>
/// Builder for PipDependencies relation.
/// </summary>
public readonly RelationTable<PipId, PipId>.Builder PipDependenciesBuilder;
/// <summary>
/// Construct a Builder.
/// </summary>
/// <nodoc />
public Builder(PackedExecution packedExecution)
{
PackedExecution = packedExecution;
@ -267,7 +222,11 @@ namespace BuildXL.Utilities.PackedExecution
PathTableBuilder = new NameTable.Builder(PackedExecution.PathTable, StringTableBuilder);
DirectoryTableBuilder = new DirectoryTable.CachingBuilder(PackedExecution.DirectoryTable, PathTableBuilder);
FileTableBuilder = new FileTable.CachingBuilder(PackedExecution.FileTable, PathTableBuilder);
PipTableBuilder = new PipTable.Builder(PackedExecution.PipTable, StringTableBuilder);
PipTableBuilder = new PipTable.Builder(PackedExecution.PipTable, StringTableBuilder);
PipExecutionTableBuilder = new PipExecutionTable.Builder<PipExecutionTable>(PackedExecution.PipExecutionTable);
ProcessExecutionTableBuilder = new ProcessExecutionTable.Builder<ProcessExecutionTable>(PackedExecution.ProcessExecutionTable);
ProcessPipExecutionTableBuilder = new ProcessPipExecutionTable.Builder<ProcessPipExecutionTable>(PackedExecution.ProcessPipExecutionTable);
PipTableBuilder = new PipTable.Builder(PackedExecution.PipTable, StringTableBuilder);
WorkerTableBuilder = new WorkerTable.CachingBuilder(PackedExecution.WorkerTable, StringTableBuilder);
if (packedExecution.ConsumedFiles != null)
@ -276,7 +235,27 @@ namespace BuildXL.Utilities.PackedExecution
DeclaredInputDirectoriesBuilder = new RelationTable<PipId, DirectoryId>.Builder(packedExecution.DeclaredInputDirectories);
DeclaredInputFilesBuilder = new RelationTable<PipId, FileId>.Builder(packedExecution.DeclaredInputFiles);
DirectoryContentsBuilder = new RelationTable<DirectoryId, FileId>.Builder(packedExecution.DirectoryContents);
PipDependenciesBuilder = new RelationTable<PipId, PipId>.Builder(packedExecution.PipDependencies);
}
}
/// <summary>
/// Complete the builders that need completing.
/// </summary>
/// <remarks>
/// The builders that need completing are MultiValueTable.Builders, or builders derived therefrom.
/// </remarks>
public void Complete()
{
PipExecutionTableBuilder.Complete();
ProcessExecutionTableBuilder.Complete();
ProcessPipExecutionTableBuilder.Complete();
if (ConsumedFilesBuilder != null)
{
ConsumedFilesBuilder.Complete();
DeclaredInputFilesBuilder.Complete();
DeclaredInputDirectoriesBuilder.Complete();
DirectoryContentsBuilder.Complete();
}
}
}

Просмотреть файл

@ -0,0 +1,104 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>
/// Supplementary data structure providing indices over a PackedExecution.
/// </summary>
/// <remarks>
/// Constructed from a PackedExecution, but actually populating it requires calling the async Initialize
/// method (because we want to build all the pieces in parallel).
/// </remarks>
public class PackedIndex
{
#region Fields
/// <summary>
/// The underlying execution data.
/// </summary>
public PackedExecution PackedExecution { get; private set; }
/// <summary>
/// The index of all Strings.
/// </summary>
public StringIndex StringIndex { get; private set; }
/// <summary>Pip name index.</summary>
public NameIndex PipNameIndex { get; private set; }
/// <summary>File path index.</summary>
public NameIndex PathIndex { get; private set; }
/// <summary>Derived relationship: the inverse of PipDependencies</summary>
public RelationTable<PipId, PipId> PipDependents { get; private set; }
/// <summary>Derived relationship: the inverse of ConsumedFiles</summary>
public RelationTable<FileId, PipId> FileConsumers { get; private set; }
/// <summary>Derived relationship: the inverse of FileProducer</summary>
public RelationTable<PipId, FileId> ProducedFiles { get; private set; }
/// <summary>Derived relationship: the inverse of DeclaredInputFiles</summary>
public RelationTable<FileId, PipId> InputFileDeclarers { get; private set; }
/// <summary>Derived relationship: the inverse of DeclaredInputDirectories</summary>
public RelationTable<DirectoryId, PipId> InputDirectoryDeclarers { get; private set; }
/// <summary>Derived relationship; the inverse of DirectoryContents</summary>
public RelationTable<FileId, DirectoryId> ParentDirectories { get; private set; }
#endregion
/// <summary>Construct an initially empty PackedIndex; you must await Initialize() for the data to be populated.</summary>
public PackedIndex(PackedExecution packedExecution)
{
PackedExecution = packedExecution;
}
/// <summary>
/// Initialize all the elements of this index, as concurrently as possible.
/// </summary>
/// <param name="progressAction">Action called (from an arbitrary Task thread) to report completion of various index parts.</param>
/// <returns>A task which is complete when the index is fully built.</returns>
public Task InitializeAsync(Action<string> progressAction)
{
// All the things we want to do concurrently to index our data.
// This is broken out as a separate list since it is useful to run these serially sometimes when debugging
// (e.g. when the hacky Span sort code you found online starts hitting stack overflows, whereas the .NET 5
// Span.Sort method just works...).
List<(string, Action)> actions = new List<(string, Action)>
{
("Sorted strings", () => StringIndex = new StringIndex(PackedExecution.StringTable)),
("Indexed pip names", () => PipNameIndex = new NameIndex(PackedExecution.PipTable.PipNameTable)),
("Indexed paths", () => PathIndex = new NameIndex(PackedExecution.PathTable)),
("Indexed pip dependents", () => PipDependents = PackedExecution.PipDependencies.Invert()),
("Indexed file consumers", () => FileConsumers = PackedExecution.ConsumedFiles.Invert()),
("Indexed produced files", () => ProducedFiles =
RelationTable<FileId, PipId>
.FromSingleValueTable(PackedExecution.FileProducer, PackedExecution.PipTable)
.Invert()),
("Indexed input-file-declaring pips", () => InputFileDeclarers = PackedExecution.DeclaredInputFiles.Invert()),
("Indexed input-directory-declaring pips", () => InputDirectoryDeclarers = PackedExecution.DeclaredInputDirectories.Invert()),
("Indexed parent directories", () => ParentDirectories = PackedExecution.DirectoryContents.Invert())
};
// Concurrently generate all the sorted strings, name indices, and inverse relationships that we need.
return Task.WhenAll(
actions
.Select(tuple => Task.Run(
() =>
{
tuple.Item2();
progressAction(tuple.Item1);
}))
.ToArray());
}
}
}

Просмотреть файл

@ -1,53 +1,70 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using BuildXL.Utilities.PackedTable;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>
/// Information about a process pip's execution.
/// </summary>
/// <remarks>
/// Right now this is the most rudimentary information imaginable.
/// </remarks>
public struct PipExecutionEntry
/// <summary>Indicates the manner in which a pip executed.</summary>
public enum PipExecutionLevel
{
/// <summary>The pip's full work was performed.</summary>
Executed,
/// <summary>The pip was cached, and some work was performed to deploy it from cache.</summary>
Cached,
/// <summary>The pip was fully up to date.</summary>
UpToDate,
/// <summary>The pip failed.</summary>
Failed,
}
/// <summary>Information about a pip's execution.</summary>
/// <remarks>Based on the BuildXL PipExecutionPerformance type.</remarks>
public readonly struct PipExecutionEntry
{
/// <summary>
/// The worker on which this pip executed.
/// Indicates the manner in which a pip executed.
/// </summary>
public readonly WorkerId Worker;
public readonly PipExecutionLevel ExecutionLevel;
/// <summary>
/// Construct a PipExecutionEntry.
/// Start time in UTC.
/// </summary>
public readonly DateTime ExecutionStart;
/// <summary>
/// Stop time in UTC.
/// </summary>
public readonly DateTime ExecutionStop;
/// <summary>Worker identifier.</summary>
public readonly WorkerId WorkerId;
/// <summary>Construct a PipExecutionEntry.
/// </summary>
public PipExecutionEntry(
WorkerId worker)
PipExecutionLevel executionLevel,
DateTime executionStart,
DateTime executionStop,
WorkerId workerId)
{
Worker = worker;
}
/// <summary>
/// Has this execution entry been initialized?
/// </summary>
/// <returns></returns>
public bool IsInitialized()
{
return Worker.FromId() > 0;
ExecutionLevel = executionLevel;
ExecutionStart = executionStart;
ExecutionStop = executionStop;
WorkerId = workerId;
}
}
/// <summary>
/// Table of pip execution data.
/// </summary>
/// <summary>Table of pip execution data.</summary>
/// <remarks>
/// Since this table has the master PipTable as its base table, this table will have as many entries as
/// the PipTable. Since most pips in the overall graph are not process pips, that means most entries in
/// this table will be empty (e.g.
/// This will generally have exactly one entry per pip, so it could be a SingleValueTable, but it is more
/// convenient to construct as a MultiValueTable.
/// </remarks>
public class PipExecutionTable : SingleValueTable<PipId, PipExecutionEntry>
public class PipExecutionTable : MultiValueTable<PipId, PipExecutionEntry>
{
/// <summary>
/// Construct a PipExecutionTable.

Просмотреть файл

@ -1,9 +1,10 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
@ -71,29 +72,56 @@ namespace BuildXL.Utilities.PackedExecution
/// <summary>
/// Boilerplate ID type to avoid ID confusion in code.
/// </summary>
public readonly struct PipId : Id<PipId>, IEqualityComparer<PipId>
#pragma warning disable CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
#pragma warning disable CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
public readonly struct PipId : Id<PipId>, IComparable<PipId>
#pragma warning restore CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
#pragma warning restore CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
{
/// <summary>Value as int.</summary>
public readonly int Value;
/// <summary>Constructor.</summary>
public PipId(int value) { Id<StringId>.CheckNotZero(value); Value = value; }
/// <summary>Eliminator.</summary>
public int FromId() => Value;
/// <summary>Introducer.</summary>
public PipId ToId(int value) => new PipId(value);
/// <summary>Debugging.</summary>
public override string ToString() => $"PipId[{Value}]";
/// <summary>Comparison.</summary>
public bool Equals(PipId x, PipId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(PipId obj) => obj.Value;
/// <summary>Comparer.</summary>
public struct EqualityComparer : IEqualityComparer<PipId>
{
/// <summary>Comparison.</summary>
public bool Equals(PipId x, PipId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(PipId obj) => obj.Value;
}
private readonly int m_value;
/// <nodoc/>
public int Value => m_value;
/// <nodoc/>
public PipId(int value)
{
Id<PipId>.CheckValidId(value);
m_value = value;
}
/// <nodoc/>
public PipId CreateFrom(int value) => new(value);
/// <nodoc/>
public override string ToString() => $"PipId[{Value}]";
/// <nodoc/>
public static bool operator ==(PipId x, PipId y) => x.Value == y.Value;
/// <nodoc/>
public static bool operator !=(PipId x, PipId y) => !(x == y);
/// <nodoc/>
public IEqualityComparer<PipId> Comparer => default(EqualityComparer);
/// <nodoc/>
public int CompareTo([AllowNull] PipId other) => Value.CompareTo(other.Value);
}
/// <summary>
/// Core data about a pip in the pip graph.
/// </summary>
public struct PipEntry
public readonly struct PipEntry
{
/// <summary>
/// Semi-stable hash.
@ -123,18 +151,12 @@ namespace BuildXL.Utilities.PackedExecution
PipType = type;
}
/// <summary>
/// Compare PipEntries by SemiStableHash value.
/// </summary>
/// <nodoc/>
public struct EqualityComparer : IEqualityComparer<PipEntry>
{
/// <summary>
/// Equality.
/// </summary>
/// <nodoc/>
public bool Equals(PipEntry x, PipEntry y) => x.SemiStableHash.Equals(y.SemiStableHash);
/// <summary>
/// Hashing.
/// </summary>
/// <nodoc/>
public int GetHashCode([DisallowNull] PipEntry obj) => obj.SemiStableHash.GetHashCode();
}
}

Просмотреть файл

@ -0,0 +1,80 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>Information about a process's execution.</summary>
/// <remarks>Based on the BuildXL ReportedProcess class; please keep sorted alphabetically.</remarks>
public readonly struct ProcessExecutionEntry
{
/// <summary>The time this reported process object was created.</summary>
public readonly DateTime CreationTime;
/// <summary>The process exit code. 0xBAAAAAAD means DllProcessDetach was not called on DetoursServices.dll, so the value is not initialized.</summary>
public readonly uint ExitCode;
/// <summary>The time this reported process object exited.</summary>
public readonly DateTime ExitTime;
/// <summary>The IO this process is responsible for.</summary>
public readonly IOCounters IOCounters;
/// <summary>The amount of time the process spent in kernel mode code.</summary>
public readonly TimeSpan KernelTime;
/// <summary>The process Id of the current process's parent.</summary>
public readonly uint ParentProcessId;
/// <summary>The path of the executable file of the process.</summary>
public readonly NameId Path;
/// <summary>The (not necessarily unique) process id</summary>
public readonly uint ProcessId;
/* TODO: not capturing process args yet for fear they will blow up in storage. Look at a better encoding?
/// <summary>The command line arguments of the process</summary>
public readonly string ProcessArgs;
*/
/// <summary>The amount of time the process spent in user mode code.</summary>
public readonly TimeSpan UserTime;
/// <summary>Constructor.</summary>
public ProcessExecutionEntry(
DateTime creationTime,
uint exitCode,
DateTime exitTime,
IOCounters ioCounters,
TimeSpan kernelTime,
uint parentProcessId,
NameId path,
uint processId,
TimeSpan userTime)
{
CreationTime = creationTime;
ExitCode = exitCode;
ExitTime = exitTime;
IOCounters = ioCounters;
KernelTime = kernelTime;
ParentProcessId = parentProcessId;
Path = path;
ProcessId = processId;
UserTime = userTime;
}
}
/// <summary>Table of process execution data.</summary>
/// <remarks>Since most pips in the overall graph are not process pips, most entries in this table will be empty.</remarks>
public class ProcessExecutionTable : MultiValueTable<PipId, ProcessExecutionEntry>
{
/// <summary>
/// Construct a ProcessExecutionTable.
/// </summary>
public ProcessExecutionTable(PipTable pipTable) : base(pipTable)
{
}
}
}

Просмотреть файл

@ -0,0 +1,81 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>Information about a process pip's execution.</summary>
/// <remarks>
/// Based on the BuildXL ProcessPipExecutionPerformance type; note that some fields are not yet supported.
///
/// Please keep fields sorted alphabetically to ease maintenance.
/// </remarks>
public readonly struct ProcessPipExecutionEntry
{
/// <summary>I/O counters.</summary>
public readonly IOCounters IOCounters;
/// <summary>Kernel-mode execution time. Note that this counter increases as threads in the process tree execute (it is not equivalent to wall clock time).</summary>
public readonly TimeSpan KernelTime;
/// <summary>Memory counters.</summary>
public readonly MemoryCounters MemoryCounters;
/// <summary>Process count launched by this pip.</summary>
public readonly uint NumberOfProcesses;
/// <summary>Time spent executing the entry-point process (possibly zero, such as if this execution was cached).</summary>
public readonly TimeSpan ProcessExecutionTime;
/// <summary>Processor used in % (150 means one processor fully used and the other half used)</summary>
public readonly ushort ProcessorsInPercents;
/// <summary>Suspended duration in ms</summary>
public readonly long SuspendedDurationMs;
/// <summary>User-mode execution time. Note that this counter increases as threads in the process tree execute (it is not equivalent to wall clock time).</summary>
public readonly TimeSpan UserTime;
/// <summary>
/// Construct a PipExecutionEntry.
/// </summary>
public ProcessPipExecutionEntry(
IOCounters ioCounters,
TimeSpan kernelTime,
MemoryCounters memoryCounters,
uint numberOfProcesses,
TimeSpan processExecutionTime,
ushort processorsInPercents,
long suspendedDurationMs,
TimeSpan userTime)
{
IOCounters = ioCounters;
KernelTime = kernelTime;
MemoryCounters = memoryCounters;
NumberOfProcesses = numberOfProcesses;
ProcessExecutionTime = processExecutionTime;
ProcessorsInPercents = processorsInPercents;
SuspendedDurationMs = suspendedDurationMs;
UserTime = userTime;
}
}
/// <summary>
/// Table of process pip execution data.
/// </summary>
/// <remarks>
/// This will generally have zero or one PipExecutionEntries per pip; if it has more than one,
/// it indicates BuildXL produced more than one, which is unusual and which we should discuss with 1ES.
/// </remarks>
public class ProcessPipExecutionTable : MultiValueTable<PipId, ProcessPipExecutionEntry>
{
/// <summary>
/// Construct a PipExecutionTable.
/// </summary>
public ProcessPipExecutionTable(PipTable pipTable) : base(pipTable)
{
}
}
}

Просмотреть файл

@ -1,31 +1,59 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using BuildXL.Utilities.PackedTable;
namespace BuildXL.Utilities.PackedExecution
{
/// <summary>
/// Boilerplate ID type to avoid ID confusion in code.
/// </summary>
public struct WorkerId : Id<WorkerId>, IEqualityComparer<WorkerId>
#pragma warning disable CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
#pragma warning disable CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
public readonly struct WorkerId : Id<WorkerId>
#pragma warning restore CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
#pragma warning restore CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
{
/// <summary>Value as int.</summary>
public readonly int Value;
/// <summary>Constructor.</summary>
public WorkerId(int value) { Value = value; }
/// <summary>Eliminator.</summary>
public int FromId() => Value;
/// <summary>Introducer.</summary>
public WorkerId ToId(int value) => new WorkerId(value);
/// <summary>Debugging.</summary>
/// <nodoc/>
public readonly struct EqualityComparer : IEqualityComparer<WorkerId>
{
/// <nodoc/>
public bool Equals(WorkerId x, WorkerId y) => x.Value == y.Value;
/// <nodoc/>
public int GetHashCode(WorkerId obj) => obj.Value;
}
private readonly int m_value;
/// <nodoc/>
public int Value => m_value;
/// <nodoc/>
public WorkerId(int value)
{
Id<WorkerId>.CheckValidId(value);
m_value = value;
}
/// <nodoc/>
public WorkerId CreateFrom(int value) => new(value);
/// <nodoc/>
public override string ToString() => $"WorkerId[{Value}]";
/// <summary>Comparison.</summary>
public bool Equals(WorkerId x, WorkerId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(WorkerId obj) => obj.Value;
/// <nodoc/>
public static bool operator ==(WorkerId x, WorkerId y) => x.Value == y.Value;
/// <nodoc/>
public static bool operator !=(WorkerId x, WorkerId y) => !(x == y);
/// <nodoc/>
public IEqualityComparer<WorkerId> Comparer => default(EqualityComparer);
/// <nodoc/>
public int CompareTo([AllowNull] WorkerId other) => Value.CompareTo(other.Value);
}
/// <summary>
@ -55,7 +83,7 @@ namespace BuildXL.Utilities.PackedExecution
/// <summary>
/// Build a WorkerTable by caching worker machine names.
/// </summary>
public class CachingBuilder : CachingBuilder<PackedTable.StringId>
public class CachingBuilder : CachingBuilder<PackedTable.StringId.EqualityComparer>
{
private readonly PackedTable.StringTable.CachingBuilder m_stringTableBuilder;

Просмотреть файл

@ -1,4 +1,7 @@
using System;
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
@ -68,7 +71,10 @@ namespace BuildXL.Utilities.PackedTable
/// </remarks>
public CharSpan(StringId s)
{
if (s.Equals(default)) { throw new ArgumentException("Cannot construct CharSpan from default StringId"); }
if (s == default)
{
throw new ArgumentException("Cannot construct CharSpan from default StringId");
}
m_string = default;
m_stringId = s;
m_start = default;
@ -77,9 +83,17 @@ namespace BuildXL.Utilities.PackedTable
private static void Check(string s, int start, int length)
{
if (s == null) { throw new ArgumentException($"String may not be null"); }
if (start < 0) { throw new ArgumentException($"Both start {start} and length {length} must be >= 0"); }
if (s.Length < start + length) { throw new ArgumentException($"String length {s.Length} must be <= the sum of start {start} and length {length}"); }
if (s == null)
{
throw new ArgumentException($"String may not be null");
}
if (start < 0)
{
throw new ArgumentException($"Both start {start} and length {length} must be >= 0");
}
if (s.Length < start + length) {
throw new ArgumentException($"String length {s.Length} must be <= the sum of start {start} and length {length}");
}
}
/// <summary>
@ -88,8 +102,14 @@ namespace BuildXL.Utilities.PackedTable
/// <param name="table">The table to look up strings in, if this is a StringId CharSpan.</param>
public ReadOnlySpan<char> AsSpan(StringTable table)
{
if (m_string != null) { return m_string.AsSpan().Slice(m_start, m_length); }
else return table[m_stringId];
if (m_string != null)
{
return m_string.AsSpan().Slice(m_start, m_length);
}
else
{
return table[m_stringId];
}
}
/// <summary>

Просмотреть файл

@ -7,12 +7,7 @@ using System.Runtime.InteropServices;
namespace BuildXL.Utilities.PackedTable
{
/// <summary>
/// Generic Span[T] methods for saving and loading spans of unmanaged values.
/// </summary>
/// <remarks>
/// Actually amazingly high-performance, given how convenient and generic the code pattern is.
/// </remarks>
/// <summary>Generic Span[T] methods for saving and loading spans of unmanaged values.</summary>
public static class FileSpanUtilities
{
/// <summary>

Просмотреть файл

@ -2,6 +2,7 @@
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
namespace BuildXL.Utilities.PackedTable
{
@ -15,30 +16,60 @@ namespace BuildXL.Utilities.PackedTable
/// Note that ID values are 1-based; IDs range from [1 .. Count] inclusive, rather than [0 .. Count-1] inclusive
/// as with zero-based indices. This is deliberate, to allow the default ID value to indicate "no ID" (and
/// to catch bugs relating to uninitialized IDs).
///
/// Note that a subtlety of struct types implementing interfaces is that the default Object methods, especially
/// Equals, behave differently. Code style warnings at one point advised implementing Object.Equals on these
/// struct ID types. Unfortunately it turned out this breaks this assertion:
///
/// SomeId defaultSomeId = default(SomeId);
/// Assert.True(defaultSomeId.Equals(default));
///
/// If SomeId is a struct type, this will wind up comparing default(SomeId) to default(object), in other
/// words comparing a struct to null. Which is always false, whereas this comparison was expected to return
/// true.
///
/// Note that this is fine:
///
/// Assert.True(defaultSomeId.Equals(default(SomeId)));
///
/// But the pit of failure above from just using "default" is real (we fell into it).
///
/// So we deliberately do not implement Object.Equals (or Object.GetHashCode) on struct types implementing
/// this interface. Instead, all ID types define IEqualityComparer on themselves; this turns out to be much
/// more convenient for comparing instances in generic code. It doesn't prevent the user from writing
/// "SomeId.Equals(default)" but at least it provides a better pattern instead.
/// </remarks>
public interface Id<TId>
public interface Id<TId> : IComparable<TId>
where TId : unmanaged
{
/// <summary>
/// Convert the ID to an integer value.
/// The underlying 1-based integer value.
/// </summary>
/// <remarks>
/// Note that the return value is still 1-based; to convert into an array index, you must subtract 1.
/// To convert into an array index, you must subtract 1.
/// </remarks>
public int FromId();
public int Value { get; }
/// <summary>
/// Convert an integer value to this type of ID.
/// Convert a 1-based integer value to this type of ID.
/// </summary>
/// <remarks>
/// This is a bit roundabout but is a generic way of constructing the appropriate kind of ID struct.
/// Note that the value here must be 1-based; this method does not modify the value.
/// Note that the argument here must be 1-based; this method does not modify the value.
/// </remarks>
public TId ToId(int value);
public TId CreateFrom(int value);
/// <summary>
/// Get a comparer usable on this type of ID.
/// </summary>
/// <remarks>
/// Best not to call this in a tight loop inn case the result struct is boxed.
/// </remarks>
public IEqualityComparer<TId> Comparer { get; }
/// <summary>
/// Check that the value is not zero; throw ArgumentException if it is.
/// </summary>
public static void CheckNotZero(int value) { if (value == 0) { throw new ArgumentException("Cannot create ID with value 0 (use default instead)"); } }
public static void CheckValidId(int value) { if (value < 1) { throw new ArgumentException("Cannot create ID with zero or negative value (use default instead)"); } }
}
}

Просмотреть файл

@ -4,6 +4,7 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
namespace BuildXL.Utilities.PackedTable
{
@ -13,15 +14,25 @@ namespace BuildXL.Utilities.PackedTable
/// <remarks>
/// The MultiValueTable's state is three lists:
/// - Values: the per-id count of how many relationships each TFromId has.
/// - m_offsets: the per-id index into m_relations for each TFromId; calculated by a scan over Values.
/// - m_relations: the collection of all TToIds for all relationships; sorted by TFromId then TToId.
/// - Offsets: the per-id index into m_relations for each TFromId; calculated by a scan over Values.
/// - MultiValues: the collection of all TValues.
///
/// For example, if we have ID 1 with values 10 and 11, ID 2 with no values, and ID 3 with values 12 and 13:
/// SingleValues: [2, 0, 2]
/// m_offsets: [0, 2, 2]
/// m_multiValues: [10, 11, 12, 13]
/// Offsets: [0, 2, 2]
/// MultiValues: [10, 11, 12, 13]
///
/// Note that SingleValues is used just as implementation and isn't exposed publicly at all.
/// Note that there are three ways to construct a MultiValueTable:
///
/// 1. The Add method can be called on an empty table (with zero Count). This lets a table be constructed
/// in ID order, such that it can be saved to disk..
///
/// 2. The AddUnordered method can be called on a filled table (one which has had FillToBaseTableCount()
/// called on it). This supports adding data out of strict ID order, but results in a table that can't
/// be saved directly to disk.
///
/// 3. The Builder class allows data to be accumulated in any order, and then added all at once in the
/// Complete() method; however, the data can't be queried before Complete() is called.
/// </remarks>
public class MultiValueTable<TId, TValue> : Table<TId, int>, IMultiValueTable<TId, TValue>
where TId : unmanaged, Id<TId>
@ -31,9 +42,10 @@ namespace BuildXL.Utilities.PackedTable
/// List of offsets per ID.
/// </summary>
/// <remarks>
/// Computed from a scan over SingleValues, which is the per-ID count of MultiValues per element.
/// When building incrementally, this list grows progressively; if this list has fewer elements
/// than Count, it means only a prefix of all IDs have had their relations added yet.
/// In the common (ordered) case, this is computed from a scan over SingleValues, which is the
/// per-ID count of MultiValues per element. When building incrementally with Add, this list grows
/// progressively; if this list has fewer elements than Count, it means only a prefix of all IDs
/// have had their relations added yet.
/// </remarks>
protected readonly SpannableList<int> Offsets;
@ -41,10 +53,16 @@ namespace BuildXL.Utilities.PackedTable
/// List of all per-ID values.
/// </summary>
/// <remarks>
/// Stored in order of ID..
/// Stored in order of ID in the default case; if AddUnordered has been called, these may be in any order.
/// </remarks>
protected readonly SpannableList<TValue> MultiValues;
/// <summary>
/// Set to true if data has been appended to this MultiValueTable in a non-ordered way
/// (e.g. using AddUnordered).
/// </summary>
public bool MayBeUnordered { get; private set; }
/// <summary>
/// Construct a MultiValueTable.
/// </summary>
@ -89,8 +107,15 @@ namespace BuildXL.Utilities.PackedTable
/// </summary>
public override void SaveToFile(string directory, string name)
{
// Only save it if it's known to be ordered
if (MayBeUnordered)
{
// TODO: make a way to reorder it!
throw new Exception("Can't save MultiValueTable that may contain unordered data");
}
base.SaveToFile(directory, name);
// we don't need to save m_offsets since it is calculated from the counts in SingleValues
// we don't need to save Offsets since it is calculated from the counts in SingleValues
FileSpanUtilities.SaveToFile(directory, InsertSuffix(name, "MultiValue"), MultiValues);
}
@ -110,6 +135,9 @@ namespace BuildXL.Utilities.PackedTable
CalculateOffsets();
}
/// <summary>Get read-only access to all the values.</summary>
public ReadOnlySpan<TValue> MultiValueSpan => MultiValues.AsSpan();
/// <summary>
/// Calculate all the offsets for all IDs based on their counts.
/// </summary>
@ -124,10 +152,10 @@ namespace BuildXL.Utilities.PackedTable
/// <summary>
/// Get a span of values at the given ID.
/// </summary>
protected Span<TValue> GetSpan(TId id)
private Span<TValue> GetSpan(TId id)
{
int offset = Offsets[id.FromId() - 1];
int count = SingleValues[id.FromId() - 1];
int offset = Offsets[id.Value - 1];
int count = SingleValues[id.Value - 1];
return MultiValues.AsSpan().Slice(offset, count);
}
@ -161,6 +189,11 @@ namespace BuildXL.Utilities.PackedTable
/// </remarks>
public virtual TId Add(ReadOnlySpan<TValue> multiValues)
{
if (BaseTableOpt != null && SingleValues.Count == BaseTableOpt.Count)
{
throw new Exception("Can't add to end of a table that has already been filled");
}
if (SingleValues.Count > 0)
{
Offsets.Add(Offsets[Count - 1] + SingleValues[Count - 1]);
@ -174,7 +207,48 @@ namespace BuildXL.Utilities.PackedTable
MultiValues.AddRange(multiValues);
return default(TId).ToId(Count);
return default(TId).CreateFrom(Count);
}
/// <summary>
/// Add a set of values for an ID that has no values yet, regardless of whether the ID is next in order
/// to be added.
/// </summary>
/// <remarks>
/// This results in out-of-order data, e.g. MayBeOutOfOrder is set to true when this method is called.
/// TODO: create a way to re-order a MultiValueTable.
///
/// Note that this also breaks other assumptions of the Add method above. Specifically, this method
/// requires that FillToBaseTableCount() has been called on this table already, whereas the normal
/// Add method requires the opposite. In fact, this method requires having a BaseTable.
/// </remarks>
public virtual void AddUnordered(TId id, ReadOnlySpan<TValue> multiValues)
{
MayBeUnordered = true;
if (BaseTableOpt == null)
{
// There must be a base table or we don't know how big this table should be, and hence can't safely
// add random IDs to it.
throw new InvalidOperationException("Can only call AddUnordered on a table with a base table");
}
if (SingleValues.Count < BaseTableOpt.Count)
{
// We must have already filled to the base table count.
throw new Exception("MultiValueTable must be filled to base table count before calling AddUnordered");
}
int index = id.Value - 1;
if (SingleValues[index] > 0)
{
// Can only add actual data for a given index once.
throw new Exception($"MultiValueTable.AddUnordered can't add data twice for the same ID {id}");
}
Offsets[id.Value - 1] = MultiValueCount;
SingleValues[id.Value - 1] = multiValues.Length;
MultiValues.AddRange(multiValues);
}
/// <summary>
@ -184,14 +258,14 @@ namespace BuildXL.Utilities.PackedTable
{
base.FillToBaseTableCount();
if (BaseTableOpt.Count > Count)
if (BaseTableOpt.Count > Offsets.Count)
{
int lastOffsetValue = 0;
if (Offsets.Count > 0)
{
lastOffsetValue = Offsets[^1];
}
Offsets.Fill(BaseTableOpt.Count - Count, lastOffsetValue);
Offsets.Fill(BaseTableOpt.Count - Offsets.Count, lastOffsetValue);
}
}
@ -208,8 +282,115 @@ namespace BuildXL.Utilities.PackedTable
/// </summary>
public IEnumerable<TValue> Enumerate(TId id)
{
int index = id.FromId() - 1;
int index = id.Value - 1;
return MultiValues.Enumerate(Offsets[index], SingleValues[index]);
}
/// <summary>
/// Enumerate all values at the given ID, returning tuples including the ID; very useful for LINQ.
/// </summary>
public IEnumerable<(TId, TValue)> EnumerateWithId(TId id)
=> Enumerate(id).Select(value => (id, value));
/// <summary>
/// Enumerate the whole relation as tuples of (id, value).
/// </summary>
public IEnumerable<(TId, TValue)> EnumerateWithIds()
=> Ids.SelectMany(id => EnumerateWithId(id));
/// <summary>Build a MultiValueTable by adding unordered tuples and finally completing, which sorts by ID and builds the final table.</summary>
public class Builder<TTable>
where TTable : MultiValueTable<TId, TValue>
{
/// <summary>The table being built.</summary>
public readonly TTable Table;
private readonly SpannableList<(TId id, TValue value)> m_list;
/// <summary>Construct a Builder.</summary>
public Builder(TTable table, int capacity = DefaultCapacity)
{
Table = table ?? throw new ArgumentException("Table argument must not be null");
m_list = new SpannableList<(TId, TValue)>(capacity);
}
/// <summary>Add this datum.</summary>
public void Add(TId id, TValue value)
{
m_list.Add((id, value));
}
/// <summary>Compare TValues if they need to be sorted.</summary>
/// <remarks>
/// This is never the case for an ordinary MultiValueTable.Builder, but the subtype RelationTable.Builder
/// does sort its values.
/// </remarks>
public virtual int Compare(TValue value1, TValue value2) => 0;
/// <summary>Are these two values distinct?</summary>
/// <remarks>
/// If so, then the second value will be added. This allows subclasses of this builder to deduplicate;
/// the default is not to deduplicate (e.g. consider all values distinct, without checking).
/// </remarks>
public virtual bool IsConsideredDistinct(TValue value1, TValue value2) => true;
/// <summary>Static comparer instance to avoid possible repeated allocation.</summary>
private static readonly IEqualityComparer<TId> s_idComparer = default(TId).Comparer;
/// <summary>
/// All relationships have been added; sort them all and build the final relation table.
/// </summary>
public void Complete()
{
Comparison<(TId id, TValue value)> tupleComparison =
(tuple1, tuple2) =>
{
int fromIdCompare = tuple1.id.Value.CompareTo(tuple2.id.Value);
if (fromIdCompare != 0)
{
return fromIdCompare;
}
return Compare(tuple1.value, tuple2.value);
};
m_list.AsSpan().Sort(tupleComparison);
// and bin them by groups
int listIndex = 0;
SpannableList<TValue> buffer = new SpannableList<TValue>();
int listCount = m_list.Count;
Table.SetMultiValueCapacity(listCount);
foreach (TId baseId in Table.BaseTableOpt.Ids)
{
// Count up how many are for id.
int count = 0;
buffer.Clear();
while (listIndex + count < m_list.Count)
{
var (id, value) = m_list[listIndex + count];
if (s_idComparer.Equals(baseId, id))
{
if (buffer.Count == 0 || IsConsideredDistinct(buffer[buffer.Count - 1], value))
{
buffer.Add(value);
}
count++;
}
else
{
// ok we're done with this baseId, let's move to the next one.
break;
}
}
Table.Add(buffer.AsSpan());
listIndex += count;
}
// and finish by filling out to the full number of (blank) entries.
Table.FillToBaseTableCount();
}
}
}
}

Просмотреть файл

@ -0,0 +1,211 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
namespace BuildXL.Utilities.PackedTable
{
/// <summary>
/// Create a filter that searches for entries by (potentially multi-part) name.
/// </summary>
/// <remarks>
/// Specifically the filter supports filtering entries by either:
/// - one substring of one atom, or
/// - multiple atoms, matched by suffix on the first atom, by prefix on the last, and
/// exactly for all in between.
///
/// Effectively, the filter string is matched as if it were a substring of the entire
/// expanded name.
///
/// No wildcards are supported yet.
///
/// So for example, in the pip name scenario where the delimiter is '.':
/// - Searching for "a" will find all pips that have any atoms with "a" or "A"
/// in their names.
/// - Searching for "3.x" will find all pips that have an atom ending with "3",
/// followed by an atom starting with "x" or "X".
/// - Searching for "foo.bar.pass3" will find all pips that have an atom ending
/// with "foo", followed by the atom "bar", followed by an atom starting with "pass3"
/// (all case-insensitively).
/// </remarks>
public class NameFilter<TId>
where TId : unmanaged, Id<TId>
{
/// <summary>Type of substring match, based on location of the substring in the overall match string.</summary>
private enum MatchType
{
/// <summary>Match the atom exactly.</summary>
Equals,
/// <summary>Match the end of the atom.</summary>
EndsWith,
/// <summary>Match the start of the atom.</summary>
StartsWith,
/// <summary>Match anywhere in the atom.</summary>
Contains,
}
/// <summary>The base table we're filtering.</summary>
private readonly ITable<TId> m_table;
/// <summary>The name index we're filtering.</summary>
private readonly NameIndex m_nameIndex;
/// <summary>Mapping from a table ID to the name of that table entry.</summary>
private readonly Func<TId, NameId> m_namerFunc;
/// <summary>The delimiter between parts of a name.</summary>
/// <remarks>Typically either '.' (for pip names) or '\' (for paths)</remarks>
private readonly char m_delimiter;
/// <summary>The string being matched.</summary>
private readonly string m_matchString;
/// <summary>Construct a NameFilter.</summary>
/// <param name="table">The table with the entries being filtered.</param>
/// <param name="nameIndex">The name index containing the names we will filter.</param>
/// <param name="namerFunc">Function to obtain a name ID from a table ID.</param>
/// <param name="delimiter">The character delimiter applicable to this filter.</param>
/// <param name="matchString">The (possibly delimited) match string.</param>
public NameFilter(ITable<TId> table, NameIndex nameIndex, Func<TId, NameId> namerFunc, char delimiter, string matchString)
{
m_table = table;
m_nameIndex = nameIndex;
m_namerFunc = namerFunc;
m_delimiter = delimiter;
m_matchString = matchString;
}
/// <summary>Actually perform the filtering and return the result.</summary>
public IEnumerable<TId> Filter()
{
// Break up the match string into delimited pieces, and get all string atoms matching each piece.
IEnumerable<IEnumerable<StringId>> matches = GetMatchingAtoms();
// Now we have a LIST of bags of StringIds. We now want to filter all names in the index for names
// which have a sequence of atoms that are contained in each respective bag in the sequence.
// We therefore really want HashSets here rather than ConcurrentBags.
List<HashSet<StringId>> matchSets = new List<HashSet<StringId>>();
foreach (IEnumerable<StringId> bag in matches)
{
matchSets.Add(new HashSet<StringId>(bag));
}
// Now, in parallel (and unordered), traverse all names in the index to find ones which match.
HashSet<NameId> matchingNames = m_nameIndex.Ids
.AsParallel()
.Where(nid =>
{
ReadOnlySpan<NameEntry> atoms = m_nameIndex[nid];
if (atoms.Length < matchSets.Count)
{
// not enough atoms to be a match; continue
return false;
}
// we match on the matchSets starting at the end of the name. This is because we expect names to
// be more similar towards their beginnings (e.g. names share prefixes more than suffixes), so
// matching from the end should reject more names more quickly.
// i is the index into the start of the atoms subsequence being matched; j is the index into matchSets.
for (int i = atoms.Length - matchSets.Count; i >= 0; i--)
{
bool isMatch = true;
for (int j = matchSets.Count - 1; j >= 0; j--)
{
HashSet<StringId> matchSet = matchSets[j];
isMatch = matchSet.Contains(atoms[i + j].Atom);
if (!isMatch)
{
break;
}
}
if (isMatch)
{
return true;
}
}
// no match found
return false;
})
.ToHashSet();
// Now we need to traverse the whole original table, finding the IDs for the names we matched.
IEnumerable<TId> result = m_table.Ids
.AsParallel()
.Where(id => matchingNames.Contains(m_namerFunc(id)));
return result;
}
/// <summary>Split the delimited match string, and find all the atoms that match each part of the split.</summary>
private IEnumerable<IEnumerable<StringId>> GetMatchingAtoms()
{
// First decompose the match string by delimiter.
string[] matchPieces = m_matchString.Trim().Split(m_delimiter);
// The sub-pieces of each match: the kind of match, the string to match, and the bag to store the matching StringIds.
// Note that since we will filter each string only once, we will wind up with no duplicates in any bags.
List<(MatchType matchType, string toMatch, ConcurrentBag<StringId> bag)> matches
= new List<(MatchType, string, ConcurrentBag<StringId>)>();
if (matchPieces.Length == 0)
{
// we treat this as "no matches" (more useful than "match everything")
// but really this should be caught in the UI
return new List<ConcurrentBag<StringId>>();
}
if (matchPieces.Length == 1)
{
// match a substring of any atom
matches.Add((MatchType.Contains, matchPieces[0], new ConcurrentBag<StringId>()));
}
else
{
for (int i = 0; i < matchPieces.Length; i++)
{
MatchType matchType = i == 0
? MatchType.EndsWith
: i == matchPieces.Length - 1
? MatchType.StartsWith
: MatchType.Equals;
matches.Add((matchType, matchPieces[i], new ConcurrentBag<StringId>()));
}
}
// Now scan the whole string table in parallel.
StringTable stringTable = m_nameIndex.NameTable.StringTable;
stringTable.Ids.AsParallel().ForAll(sid =>
{
ReadOnlySpan<char> atom = stringTable[sid];
for (int i = 0; i < matches.Count; i++)
{
bool isMatch = matches[i].matchType switch
{
MatchType.Contains => MemoryExtensions.Contains(atom, matches[i].toMatch.AsSpan(), StringComparison.InvariantCultureIgnoreCase),
MatchType.StartsWith => MemoryExtensions.StartsWith(atom, matches[i].toMatch.AsSpan(), StringComparison.InvariantCultureIgnoreCase),
MatchType.EndsWith => MemoryExtensions.EndsWith(atom, matches[i].toMatch.AsSpan(), StringComparison.InvariantCultureIgnoreCase),
MatchType.Equals => MemoryExtensions.Equals(atom, matches[i].toMatch.AsSpan(), StringComparison.InvariantCultureIgnoreCase),
_ => throw new InvalidOperationException($"Unknown MatchType {matches[i].Item1}"),
};
if (isMatch)
{
matches[i].bag.Add(sid);
}
}
});
// now all the bags have any and all matching string IDs for those atoms in the name.
// Return only the bags.
return matches.Select(tuple => tuple.bag);
}
}
}

Просмотреть файл

@ -0,0 +1,140 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
namespace BuildXL.Utilities.PackedTable
{
/// <summary>Flatten the names in the table, such that each atom in each name is O(1) accessible;
/// optionally also build a sorted name index.</summary>
/// <remarks>
/// The base NameTable representation is basically a "linked list" of atoms (a suffix
/// table). This "index" flattens the list, such that each NameId maps to the list of
/// in-order StringIds directly.
/// This trades more memory usage for faster O(1) lookup of any atom in a name.
/// </remarks>
public class NameIndex : MultiValueTable<NameId, NameEntry>, IComparer<NameId>
{
private const int MaximumNameLength = 100;
/// <summary>For each name, its sort order.</summary>
private SingleValueTable<NameId, int> m_nameSortOrder;
/// <summary>Construct a NameIndex over a base table of names.</summary>
public NameIndex(NameTable baseTable) : base(baseTable)
{
NameEntry[] entryArray = new NameEntry[MaximumNameLength];
foreach (NameId id in baseTable.Ids)
{
// The length of each name is one longer than the length of its prefix.
// Since the name table is constructed in prefix order (e.g. the prefix of a name
// must always already exist before a suffix referencing that prefix can be added),
// we can rely on the atom length of the prefix already being in this table.
NameEntry entry = baseTable[id];
if (entry.Prefix == default)
{
// this is a root name
entryArray[0] = entry;
Add(entryArray.AsSpan(0, 1));
}
else
{
// copy from the existing prefix
ReadOnlySpan<NameEntry> prefixEntries = this[entry.Prefix];
prefixEntries.CopyTo(entryArray.AsSpan());
// add this atom
entryArray[prefixEntries.Length] = entry;
Add(entryArray.AsSpan(0, prefixEntries.Length + 1));
}
}
}
/// <summary>The underlying NameTable indexed by this object.</summary>
public NameTable NameTable => (NameTable)BaseTableOpt;
/// <summary>Internal comparer used during sorting.</summary>
private class NameSortComparer : IComparer<NameId>
{
private readonly NameIndex m_nameIndex;
private readonly StringIndex m_stringIndex;
internal NameSortComparer(NameIndex nameIndex, StringIndex stringIndex)
{
m_nameIndex = nameIndex;
m_stringIndex = stringIndex;
}
public int Compare(NameId x, NameId y)
{
if (x == default)
{
if (y == default)
{
return 0;
}
else
{
return -1;
}
}
else if (y == default)
{
return 1;
}
int minLength = Math.Min(m_nameIndex[x].Length, m_nameIndex[y].Length);
for (int i = 0; i < minLength; i++)
{
int atomComparison = m_stringIndex.Compare(m_nameIndex[x][i].Atom, m_nameIndex[y][i].Atom);
if (atomComparison != 0)
{
return atomComparison;
}
}
return m_nameIndex[x].Length.CompareTo(m_nameIndex[y].Length);
}
}
/// <summary>Sort this NameIndex.</summary>
/// <remarks>Sorting at the name level isn't needed by all apps, so it is optional;
/// after calling this, HasBeenSorted becomes true and Compare can be called.</remarks>
public void Sort(StringIndex stringIndex)
{
if (HasBeenSorted)
{
// only need to sort once
return;
}
NameId[] ids = new NameId[BaseTableOpt.Count + 1];
for (int i = 1; i <= BaseTableOpt.Count; i++)
{
ids[i] = new NameId(i);
}
NameSortComparer comparer = new NameSortComparer(this, stringIndex);
ids.AsMemory().ParallelSort(comparer);
// build out the table of sort orders by id
m_nameSortOrder = new SingleValueTable<NameId, int>(NameTable);
m_nameSortOrder.FillToBaseTableCount();
for (int i = 1; i < ids.Length; i++)
{
m_nameSortOrder[ids[i]] = i;
}
}
/// <summary>True iff this NameIndex has had Sort() called on it.</summary>
public bool HasBeenSorted => m_nameSortOrder != null;
/// <summary>Compare two NameIds from this index; requires HasBeenSorted.</summary>
public int Compare(NameId x, NameId y)
{
if (!HasBeenSorted)
{
throw new InvalidOperationException("Must call Sort() on NameIndex before calling Compare()");
}
return m_nameSortOrder[x].CompareTo(m_nameSortOrder[y]);
}
}
}

Просмотреть файл

@ -11,23 +11,38 @@ namespace BuildXL.Utilities.PackedTable
/// <summary>
/// Boilerplate ID type to avoid ID confusion in code.
/// </summary>
#pragma warning disable CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
#pragma warning disable CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
public struct NameId : Id<NameId>
#pragma warning restore CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
#pragma warning restore CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
{
/// <summary>Comparer.</summary>
public struct EqualityComparer : IEqualityComparer<NameId>
{
/// <summary>Comparison.</summary>
public bool Equals(NameId x, NameId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(NameId obj) => obj.Value;
}
private readonly int m_value;
/// <summary>Value as int.</summary>
public readonly int Value;
public int Value => m_value;
/// <summary>Constructor.</summary>
internal NameId(int value) { Id<NameId>.CheckNotZero(value); Value = value; }
/// <summary>Eliminator.</summary>
public int FromId() => Value;
/// <summary>Introducer.</summary>
public NameId ToId(int value) => new NameId(value);
public NameId(int value) { Id<NameId>.CheckValidId(value); m_value = value; }
/// <summary>Constructor via interface.</summary>
public NameId CreateFrom(int value) => new(value);
/// <summary>Debugging.</summary>
public override string ToString() => $"NameId[{Value}]";
/// <summary>Comparison.</summary>
public bool Equals(NameId other) => Value == other.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(NameId obj) => obj.Value;
public static bool operator ==(NameId x, NameId y) => x.Value == y.Value;
/// <summary>Comparison.</summary>
public static bool operator !=(NameId x, NameId y) => !(x == y);
/// <summary>Comparison.</summary>
public IEqualityComparer<NameId> Comparer => default(EqualityComparer);
/// <summary>Comparison via IComparable.</summary>
public int CompareTo([AllowNull] NameId other) => Value.CompareTo(other.Value);
}
/// <summary>
@ -57,7 +72,7 @@ namespace BuildXL.Utilities.PackedTable
/// <summary>
/// Equality.
/// </summary>
public bool Equals(NameEntry x, NameEntry y) => x.Prefix.Equals(y.Prefix) && x.Atom.Equals(y.Atom);
public bool Equals(NameEntry x, NameEntry y) => x.Prefix == y.Prefix && x.Atom == y.Atom;
/// <summary>
/// Hashing.
/// </summary>
@ -120,10 +135,10 @@ namespace BuildXL.Utilities.PackedTable
{
// Walk up the prefix chain to the end.
entry = this[id];
if (entry.Atom.Equals(default)) { throw new Exception($"Invalid atom for id {entry.Atom}"); }
if (entry.Atom == default) { throw new Exception($"Invalid atom for id {entry.Atom}"); }
// Are we at the end yet?
atEnd = entry.Prefix.Equals(default);
atEnd = entry.Prefix == default;
len += StringTable[entry.Atom].Length;
@ -148,11 +163,11 @@ namespace BuildXL.Utilities.PackedTable
{
NameEntry entry = this[nameId];
ReadOnlySpan<char> prefixSpan;
if (!entry.Prefix.Equals(default))
if (entry.Prefix != default)
{
// recurse on the prefix, which will result in it getting written into the first part of span
prefixSpan = GetText(entry.Prefix, span);
// add the separator
// add the separators
span[prefixSpan.Length] = Separator;
prefixSpan = span.Slice(0, prefixSpan.Length + 1);

Просмотреть файл

@ -0,0 +1,222 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
namespace BuildXL.Utilities.PackedTable
{
/// <summary>
/// Parallel sorting extension method for Memory[T]
/// </summary>
/// <remarks>
/// Hand coded using our weak assumptions of "OK to temporarily allocate an O(N) output buffer".
/// </remarks>
public static class ParallelMemorySortExtensions
{
internal delegate void SpanSort<T>(Span<T> span);
/// <summary>
/// Sort this Span.
/// </summary>
public static void ParallelSort<T>(this Memory<T> memory, IComparer<T> comparer, int minimumSubspanSize = 1024)
{
new ParallelSortHelper<T>(memory, minimumSubspanSize).Sort(s => s.Sort(comparer), (i, j) => comparer.Compare(i, j));
}
/// <summary>
/// Sort this Span.
/// </summary>
public static void ParallelSort<T>(this Memory<T> memory, Comparison<T> comparison, int minimumSubspanSize = 1024)
{
new ParallelSortHelper<T>(memory, minimumSubspanSize).Sort(s => s.Sort(comparison), comparison);
}
/// <summary>
/// Helper class for parallel sorting.
/// </summary>
internal class ParallelSortHelper<T>
{
/// <summary>
/// The minimum size of subspans (to avoid merge overhead when small spans are being sorted).
/// </summary>
/// <remarks>
/// This is not hardcoded because setting a small value here is useful when testing.
/// </remarks>
int minimumSubspanSize;
/// <summary>
/// The Memory we're sorting.
/// </summary>
private readonly Memory<T> memory;
/// <summary>
/// The number of subspans we'll concurrently sort.
/// </summary>
private readonly int subspanCount;
/// <summary>
/// The length of each subspan.
/// </summary>
private readonly int elementsPerSubspan;
/// <summary>
/// The start indices of each subspan (when merging).
/// </summary>
private readonly List<int> subspanStartOffsets;
/// <summary>
/// The ordered subspan indices during merging.
/// </summary>
private readonly List<int> subspanSortedIndices;
internal ParallelSortHelper(Memory<T> memory, int minimumSubspanSize)
{
if (memory.IsEmpty)
{
throw new ArgumentOutOfRangeException("Memory must not be empty");
}
if (minimumSubspanSize <= 0)
{
throw new ArgumentOutOfRangeException($"Minimum subspan size {minimumSubspanSize} must be greater than 0");
}
this.minimumSubspanSize = minimumSubspanSize;
this.memory = memory;
// What subspan size do we want?
int length = memory.Length;
int minimumSizedSubspanCount = (int)Math.Ceiling((float)length / minimumSubspanSize);
if (minimumSizedSubspanCount > Environment.ProcessorCount)
{
subspanCount = Environment.ProcessorCount;
}
else
{
subspanCount = minimumSizedSubspanCount;
}
elementsPerSubspan = (int)Math.Ceiling((float)memory.Length / subspanCount);
// List of the start index in each subspan (e.g. how many elements of that subspan have been merged).
subspanStartOffsets = new List<int>(subspanCount);
// Sorted list of subspan indices, ordered by first item of each subspan.
subspanSortedIndices = new List<int>(subspanCount);
// initialize subspanIndexOrder
for (int i = 0; i < subspanCount; i++)
{
subspanStartOffsets.Add(0);
subspanSortedIndices.Add(i);
}
}
internal void Sort(SpanSort<T> subspanSortDelegate, Comparison<T> comparison)
{
// Sort each subspan, in parallel.
Parallel.For(0, subspanCount, i => subspanSortDelegate(GetSubspan(i)));
// Now memory consists of N subspans, which are all sorted.
// We want to perform an in-place merge sort over all of these subspans.
// The optimal space algorithm is http://akira.ruc.dk/~keld/teaching/algoritmedesign_f04/Artikler/04/Huang88.pdf
// but for our purposes we can afford to allocate a whole copy of the input, so we don't have to merge in place.
Memory<T> output = new Memory<T>(new T[memory.Length]);
// sort subspan indices in order of their first items
subspanSortedIndices.Sort((i, j) => comparison(GetFirstSubspanElement(i), GetFirstSubspanElement(j)));
// now walk through all of memory, merging into it as we go
for (int i = 0; i < memory.Length; i++)
{
// the first element of subspanSortedIndices is the next subspan to pick from
int firstSubspanIndex = subspanSortedIndices[0];
T firstSubspanElement = GetFirstSubspanElement(firstSubspanIndex);
output.Span[i] = firstSubspanElement;
subspanStartOffsets[firstSubspanIndex]++;
// Now firstSubspan may be out of order in the subspanSortedIndices list, or it may even be empty.
if (subspanStartOffsets[firstSubspanIndex] == GetSubspanLength(firstSubspanIndex))
{
// just remove it, that subspan's done
subspanSortedIndices.RemoveAt(0);
}
else
{
// There are more elements in firstSubspanIndex.
T firstSubspanNextElement = GetFirstSubspanElement(firstSubspanIndex);
// Find the next subspan with a first element that is bigger than firstSubspanNextElement,
// and move firstSubspanIndex to that location in subspanIndexOrder.
for (int j = 1; j <= subspanSortedIndices.Count; j++)
{
if (j == subspanSortedIndices.Count)
{
// we reached the end and all subspans had first elements that were smaller than firstSubspanElement.
// So, move firstSubspanIndex to the end, and we're done.
subspanSortedIndices.RemoveAt(0);
subspanSortedIndices.Add(firstSubspanIndex);
}
else
{
T nextSubspanFirstElement = GetFirstSubspanElement(subspanSortedIndices[j]);
if (comparison(firstSubspanNextElement, nextSubspanFirstElement) <= 0)
{
// We found a subspan with a bigger first element.
// Move firstSubspanIndex to be just before it.
// (If j == 1 here, then we don't actually need to move firstSubspanIndex.)
if (j > 1)
{
subspanSortedIndices.Insert(j, firstSubspanIndex);
subspanSortedIndices.RemoveAt(0);
}
break;
}
}
}
}
// invariant check: first element of first subspan must be equal to or greater than element we just added
if (i < memory.Length - 1)
{
// there should still be a first subspan
T firstSubspanElementAfterReordering = GetFirstSubspanElement(subspanSortedIndices[0]);
int order = comparison(firstSubspanElement, firstSubspanElementAfterReordering);
if (order > 0)
{
// oops, we have a bug
throw new Exception("Wrong order");
}
}
}
// now copy output back to memory
output.Span.CopyTo(memory.Span);
// and we're done!
}
internal Span<T> GetSubspan(int subspanIndex)
{
int startIndex = subspanIndex * elementsPerSubspan;
int length = GetSubspanLength(subspanIndex);
return memory.Span.Slice(startIndex, length);
}
internal int GetSubspanLength(int subspanIndex)
{
if (subspanIndex < subspanCount - 1)
{
return elementsPerSubspan;
}
else
{
return memory.Length - (elementsPerSubspan * (subspanCount - 1));
}
}
internal T GetFirstSubspanElement(int subspanIndex)
{
return GetSubspan(subspanIndex)[subspanStartOffsets[subspanIndex]];
}
}
}
}

Просмотреть файл

@ -2,6 +2,7 @@
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
namespace BuildXL.Utilities.PackedTable
{
@ -33,6 +34,38 @@ namespace BuildXL.Utilities.PackedTable
RelatedTable = relatedTable;
}
private static readonly IEqualityComparer<TToId> s_toComparer = default(TToId).Comparer;
/// <summary>
/// Construct a RelationTable from a one-to-one SingleValueTable.
/// </summary>
/// <remarks>
/// The only real point of doing this is to be able to invert the resulting relation.
/// </remarks>
public static RelationTable<TFromId, TToId> FromSingleValueTable(
SingleValueTable<TFromId, TToId> baseTable,
ITable<TToId> relatedTable)
{
RelationTable<TFromId, TToId> result = new RelationTable<TFromId, TToId>(baseTable, relatedTable);
TToId[] buffer = new TToId[1];
TToId[] empty = new TToId[0];
foreach (TFromId id in baseTable.Ids)
{
if (!s_toComparer.Equals(baseTable[id], default))
{
buffer[0] = baseTable[id];
result.Add(buffer);
}
else
{
result.Add(empty);
}
}
return result;
}
/// <summary>
/// Get the span of IDs related to the given ID.
/// </summary>
@ -68,8 +101,8 @@ namespace BuildXL.Utilities.PackedTable
// Ensure newRelations are sorted.
for (int i = 1; i < newRelations.Length; i++)
{
int previous = newRelations[i - 1].FromId();
int current = newRelations[i].FromId();
int previous = newRelations[i - 1].Value;
int current = newRelations[i].Value;
if (previous >= current)
{
throw new ArgumentException($"Cannot add unsorted and/or duplicate data to RelationTable: data[{i - 1}] = {previous}; data[{i}] = {current}");
@ -100,7 +133,7 @@ namespace BuildXL.Utilities.PackedTable
{
foreach (TToId relatedId in this[id])
{
result.SingleValues[relatedId.FromId() - 1]++;
result.SingleValues[relatedId.Value - 1]++;
sum++;
}
}
@ -121,8 +154,8 @@ namespace BuildXL.Utilities.PackedTable
{
foreach (TToId relatedId in this[id])
{
int relatedIdInt = relatedId.FromId() - 1;
int idInt = id.FromId() - 1;
int relatedIdInt = relatedId.Value - 1;
int idInt = id.Value - 1;
int offset = result.Offsets[relatedIdInt];
int position = positions[relatedIdInt];
int relationIndex = result.Offsets[relatedIdInt] + positions[relatedIdInt];
@ -139,7 +172,7 @@ namespace BuildXL.Utilities.PackedTable
// all the relations for this ID are known. now, we have to sort them.
Span<TFromId> finalSpan =
result.MultiValues.AsSpan().Slice(result.Offsets[relatedIdInt], result.SingleValues[relatedIdInt]);
SpanUtilities.Sort(finalSpan, (id1, id2) => id1.FromId().CompareTo(id2.FromId()));
finalSpan.Sort((id1, id2) => id1.Value.CompareTo(id2.Value));
}
}
}
@ -151,93 +184,27 @@ namespace BuildXL.Utilities.PackedTable
/// <summary>
/// Build a Relation by adding unordered (from, to) tuples, and then finally completing the collection, which sorts
/// and populates the Relation.
/// and deduplicates the Relation.
/// </summary>
public class Builder
/// <remarks>
/// This type is more strict than the MultiValueTable.Builder it derives from; it ensures that all relations are sorted
/// by TToId, and it deduplicates to ensure no duplicated TToId entries.
/// </remarks>
public class Builder : Builder<RelationTable<TFromId, TToId>>
{
/// <summary>
/// The table being built.
/// </summary>
public readonly RelationTable<TFromId, TToId> Table;
private readonly SpannableList<(TFromId fromId, TToId toId)> m_list;
/// <summary>
/// Construct a Builder.
/// </summary>
public Builder(RelationTable<TFromId, TToId> table, int capacity = DefaultCapacity)
: base(table, capacity)
{
Table = table ?? throw new ArgumentException("Table argument must not be null");
m_list = new SpannableList<(TFromId, TToId)>(capacity);
}
/// <summary>
/// Add this relationship.
/// </summary>
public void Add(TFromId fromId, TToId toId)
{
m_list.Add((fromId, toId));
}
/// <summary>Compare these values; for relation tables, these must be sorted.</summary>
public override int Compare(TToId value1, TToId value2) => value1.Value.CompareTo(value2.Value);
/// <summary>
/// All relationships have been added; sort them all and build the final relation table.
/// </summary>
public void Complete()
{
m_list.AsSpan().Sort((tuple1, tuple2) =>
{
int fromIdCompare = tuple1.fromId.FromId().CompareTo(tuple2.fromId.FromId());
if (fromIdCompare != 0)
{
return fromIdCompare;
}
return tuple1.toId.FromId().CompareTo(tuple2.toId.FromId());
});
// and bin them by groups
int listIndex = 0;
SpannableList<TToId> buffer = new SpannableList<TToId>();
int listCount = m_list.Count;
Table.SetMultiValueCapacity(listCount);
foreach (TFromId id in Table.BaseTableOpt.Ids)
{
if (listIndex >= m_list.Count)
{
// ran outta entries, rest all 0
break;
}
// Count up how many are for id.
int count = 0;
buffer.Clear();
// create a to-ID that will never equal any other ID (even default)
TToId lastToId = default(TToId).ToId(-1);
while (listIndex + count < m_list.Count)
{
var (fromId, toId) = m_list[listIndex + count];
if (fromId.Equals(id))
{
// drop duplicates (silently...)
// TODO: are duplicates here a logic bug? Because they do happen in practice.
if (!toId.Equals(lastToId))
{
buffer.Add(toId);
}
count++;
lastToId = toId;
continue;
}
// ok we're done
break;
}
Table.Add(buffer.AsSpan());
listIndex += count;
}
}
/// <summary>Detect any duplicates; relation tables must not contain duplicate entries.</summary>
public override bool IsConsideredDistinct(TToId value1, TToId value2) => !value1.Comparer.Equals(value1, value2);
}
}
}

Просмотреть файл

@ -0,0 +1,150 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.Linq;
namespace BuildXL.Utilities.PackedTable
{
/// <summary>
/// The four kinds of outcomes from a traversal filter.
/// </summary>
/// <remarks>
/// When traversing a recursive relation, there are two independent choices:
/// 1) should this element be included in the results of the traversal?
/// 2) should the traversal continue to the relations of this element?
///
/// This enum captures all four possibilities for the answers to these two
/// questions.
/// </remarks>
public enum TraversalFilterResult
{
/// <summary>Accept the element in the results, and continue beyond it.</summary>
AcceptAndContinue,
/// <summary>Accept the element in the results, and do not continue beyond it.</summary>
AcceptAndHalt,
/// <summary>Reject the element, and continue beyond it.</summary>
RejectAndContinue,
/// <summary>Reject the element, and do not continue beyond it.</summary>
RejectAndHalt,
}
/// <summary>
/// Extension methods for RelationTable.
/// </summary>
public static class RelationTableExtensions
{
/// <summary>
/// Maximum number of times we will try to traverse before deciding we are in a cycle.
/// </summary>
public static readonly int MaximumTraverseLimit = 1000;
/// <summary>Is this an Accept result?</summary>
public static bool IsAccept(this TraversalFilterResult result)
{
return result == TraversalFilterResult.AcceptAndContinue
|| result == TraversalFilterResult.AcceptAndHalt;
}
/// <summary>Is this a Continue result?</summary>
public static bool IsContinue(this TraversalFilterResult result)
{
return result == TraversalFilterResult.AcceptAndContinue
|| result == TraversalFilterResult.RejectAndContinue;
}
/// <summary>
/// Recursively traverse this self-relationship, collecting all IDs that pass the isResult check.
/// </summary>
/// <remarks>
/// Note that this extension method only applies to relations from a given ID type to itself.
/// That is how the relation can be traversed multiple times.
///
/// This can be used, for instance, to collect all (and only) the process pips that are dependencies of a
/// given pip: just traverse the PipDependencies relation looking for PipType.ProcessPips only.
///
/// The traversal stops once there are no more IDs to traverse, because the traversal reached a
/// Halt result for all nodes, and/or because the traversal reached the end of the graph. If there
/// is a cycle in the graph, the algorithm will fail after MaximumTraverseLimit iterations.
/// </remarks>
/// <typeparam name="TId">The ID type of the self-relationship.</typeparam>
/// <param name="relation">The relation.</param>
/// <param name="initial">The initial value to start the iteration.</param>
/// <param name="filter">Returns whether to accept or reject the value, and whether to continue or halt the traversal.</param>
/// <returns>The collection of all IDs that pass the isResult check, transitively from the initial ID.</returns>
public static IEnumerable<TId> Traverse<TId>(
this RelationTable<TId, TId> relation,
TId initial,
Func<TId, TraversalFilterResult> filter)
where TId : unmanaged, Id<TId>
{
return relation.Traverse(new TId[] { initial }, filter);
}
/// <summary>
/// Recursively traverse this self-relationship, collecting all IDs that pass the isResult check.
/// </summary>
/// <remarks>
/// Note that this extension method only applies to relations from a given ID type to itself.
/// That is how the relation can be traversed multiple times.
///
/// This can be used, for instance, to collect all (and only) the process pips that are dependencies of a
/// given pip: just traverse the PipDependencies relation looking for PipType.ProcessPips only.
///
/// The traversal stops once there are no more IDs to traverse, because the traversal reached a
/// Halt result for all nodes, and/or because the traversal reached the end of the graph. If there
/// is a cycle in the graph, the algorithm will fail after MaximumTraverseLimit iterations.
/// </remarks>
/// <typeparam name="TId">The ID type of the self-relationship.</typeparam>
/// <param name="relation">The relation.</param>
/// <param name="initialValues">The initial values to start the iteration.</param>
/// <param name="filter">Returns whether to accept or reject the value, and whether to continue or halt the traversal.</param>
/// <returns>The collection of all IDs that pass the isResult check, transitively from the initial ID.</returns>
public static IEnumerable<TId> Traverse<TId>(
this RelationTable<TId, TId> relation,
IEnumerable<TId> initialValues,
Func<TId, TraversalFilterResult> filter)
where TId : unmanaged, Id<TId>
{
var prospects = new HashSet<TId>(initialValues, default(TId).Comparer);
var results = new HashSet<TId>(default(TId).Comparer);
var nextProspects = new HashSet<TId>(default(TId).Comparer);
var traverseCount = 0;
while (prospects.Count > 0)
{
nextProspects.Clear();
foreach (var next in prospects.SelectMany(p => relation.Enumerate(p)))
{
TraversalFilterResult result = filter(next);
if (result.IsAccept())
{
results.Add(next);
}
if (result.IsContinue())
{
nextProspects.Add(next);
}
}
// swap the collections
HashSet<TId> temp = prospects;
prospects = nextProspects;
nextProspects = temp;
if (++traverseCount > MaximumTraverseLimit)
{
throw new Exception($"Exceeded maximum relation traversal depth of {MaximumTraverseLimit}, probably due to cycle in data");
}
}
return results;
}
}
}

Просмотреть файл

@ -42,12 +42,12 @@ namespace BuildXL.Utilities.PackedTable
get
{
CheckValid(id);
return SingleValues[id.FromId() - 1];
return SingleValues[id.Value - 1];
}
set
{
CheckValid(id);
SingleValues[id.FromId() - 1] = value;
SingleValues[id.Value - 1] = value;
}
}
@ -57,7 +57,7 @@ namespace BuildXL.Utilities.PackedTable
public TId Add(TValue value)
{
SingleValues.Add(value);
return default(TId).ToId(Count);
return default(TId).CreateFrom(Count);
}
/// <summary>
@ -88,7 +88,7 @@ namespace BuildXL.Utilities.PackedTable
// Prepopulate the dictionary that does the caching
for (int i = 0; i < ValueTable.Count; i++)
{
TId id = default(TId).ToId(i + 1);
TId id = default(TId).CreateFrom(i + 1);
Entries.Add(ValueTable[id], id);
}
}
@ -115,7 +115,6 @@ namespace BuildXL.Utilities.PackedTable
/// </summary>
/// <param name="value">The updated value to use now.</param>
/// <param name="optCombiner">Function to combine old and new values to determine final updated value.</param>
/// <returns></returns>
public virtual TId UpdateOrAdd(TValue value, Func<TValue, TValue, TValue> optCombiner = null)
{
if (Entries.TryGetValue(value, out TId id))

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -60,91 +60,5 @@ namespace BuildXL.Utilities.PackedTable
return hash;
}
/// <summary>
/// Sorting for Spans.
/// </summary>
/// <remarks>
/// source: https://github.com/kevin-montrose/Cesil/blob/master/Cesil/Common/Utils.cs#L870
/// via https://github.com/dotnet/runtime/issues/19969
/// todo: once MemoryExtensions.Sort() lands we can remove all of this (tracking issue: https://github.com/kevin-montrose/Cesil/issues/29)
/// coming as part of .NET 5, as a consequence of https://github.com/dotnet/runtime/issues/19969
///
/// Editor: This is a suboptimal implementation and we don't currently guarantee in PackedExecution that we *don't* call this
/// on unsorted data.
/// </remarks>
public static void Sort<T>(this Span<T> span, Comparison<T> comparer)
{
// crummy quick sort implementation, all of this should get killed
var len = span.Length;
if (len <= 1)
{
return;
}
if (len == 2)
{
var a = span[0];
var b = span[1];
var res = comparer(a, b);
if (res > 0)
{
span[0] = b;
span[1] = a;
}
return;
}
// we only ever call this when the span isn't _already_ sorted,
// so our sort can be really dumb
// basically Lomuto (see: https://en.wikipedia.org/wiki/Quicksort#Lomuto_partition_scheme)
var splitIx = Partition(span, comparer);
var left = span[..splitIx];
var right = span[(splitIx + 1)..];
Sort(left, comparer);
Sort(right, comparer);
// re-order subSpan such that items before the returned index are less than the value
// at the returned index
static int Partition(Span<T> subSpan, Comparison<T> comparer)
{
var len = subSpan.Length;
var pivotIx = len - 1;
var pivotItem = subSpan[pivotIx];
var i = 0;
for (var j = 0; j < len; j++)
{
var item = subSpan[j];
var res = comparer(item, pivotItem);
if (res < 0)
{
Swap(subSpan, i, j);
i++;
}
}
Swap(subSpan, i, pivotIx);
return i;
}
static void Swap(Span<T> subSpan, int i, int j)
{
var oldI = subSpan[i];
subSpan[i] = subSpan[j];
subSpan[j] = oldI;
}
}
}
}

Просмотреть файл

@ -12,9 +12,10 @@ namespace BuildXL.Utilities.PackedTable
/// A List implementation that allows Spans to be built over its backing store.
/// </summary>
/// <remarks>
/// This should clearly be in the framework
/// Note that this is not actually an IList[T] because of the indexer; this type uses
/// a ref-returning indexer, which IList[T] does not have.
/// </remarks>
public class SpannableList<T> : IList<T>
public class SpannableList<T> : ICollection<T>
where T : unmanaged
{
private T[] m_elements;
@ -24,7 +25,7 @@ namespace BuildXL.Utilities.PackedTable
/// </summary>
public SpannableList(int capacity = 100)
{
if (capacity <= 0) { throw new ArgumentException($"Capacity {capacity} must be >= 0)"); }
if (capacity < 0) { throw new ArgumentException($"Capacity {capacity} must be >= 0)"); }
m_elements = new T[capacity];
}
@ -36,20 +37,17 @@ namespace BuildXL.Utilities.PackedTable
}
/// <summary>
/// Accessor.
/// Ref accessor.
/// </summary>
public T this[int index]
/// <remarks>
/// Note that this breaks compatibility with IList[T] which does not have a ref indexer.
/// </remarks>
public ref T this[int index]
{
get
{
CheckIndex(index);
return m_elements[index];
}
set
{
CheckIndex(index);
m_elements[index] = value;
return ref m_elements[index];
}
}
@ -65,6 +63,7 @@ namespace BuildXL.Utilities.PackedTable
private const float GrowthFactor = 1.4f; // 2 would eat too much when list gets very big
/// <summary>Ensure there is capacity to hold numItems more items.</summary>
private void EnsureCapacity(int numItems)
{
int nextSize = m_elements.Length;
@ -254,6 +253,18 @@ namespace BuildXL.Utilities.PackedTable
Count--;
}
/// <summary>
/// Remove items at the given index.
/// </summary>
public void RemoveRange(int index, int count)
{
for (int i = index; i < Count - count; i++)
{
m_elements[i] = m_elements[i + count];
}
Count -= count;
}
/// <summary>
/// Legacy GetEnumerator method.
/// </summary>

Просмотреть файл

@ -0,0 +1,157 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
namespace BuildXL.Utilities.PackedTable
{
/// <summary>
/// Sorts a list of StringIds, and allows accessing the sort order.
/// </summary>
public class StringIndex : IComparer<StringId>
{
private class StringIdComparer : IComparer<StringId>
{
private readonly StringTable m_stringTable;
public StringIdComparer(StringTable stringTable)
{
m_stringTable = stringTable;
}
public int Compare(StringId x, StringId y)
{
if (x == default)
{
if (y == default)
{
return 0;
}
else
{
return -1;
}
}
else if (y == default)
{
return 1;
}
ReadOnlySpan<char> xSpan = m_stringTable[x];
ReadOnlySpan<char> ySpan = m_stringTable[y];
return xSpan.CompareTo(ySpan, StringComparison.InvariantCulture);
}
}
private readonly StringTable m_stringTable;
private readonly StringId[] m_sortedStringIds;
/// <summary>
/// List indexed by StringId value, containing the sort ordering of each StringId.
/// </summary>
private readonly int[] m_stringIdSortOrder;
/// <summary>
/// Construct an index over the given table.
/// </summary>
public StringIndex(StringTable stringTable)
{
m_stringTable = stringTable;
// First, we sort all the strings in the StringTable, resulting in a list of StringIds
// in the sorted order of their strings.
// Then we invert that relation, giving us a list of sort-order values indexed by StringId.
// We then use that list when doing StringId comparisons.
// list of all string IDs in sorted order
m_sortedStringIds = new StringId[stringTable.Count + 1];
int i;
for (i = 1; i < stringTable.Count + 1; i++)
{
m_sortedStringIds[i] = new StringId(i);
}
// Sort this in parallel since it's the biggest table and takes the longest.
StringIdComparer stringIdComparer = new StringIdComparer(stringTable);
Memory<StringId> sortedStringIdMemory = m_sortedStringIds.AsMemory();
sortedStringIdMemory.ParallelSort(stringIdComparer);
// list of the sort order for each string ID
m_stringIdSortOrder = new int[stringTable.Count + 1];
for (i = 0; i < m_sortedStringIds.Length; i++)
{
m_stringIdSortOrder[m_sortedStringIds[i].Value] = i;
}
}
/// <summary>
/// Get the sort order for this StringId (relative to all other StringIds known to this object).
/// </summary>
/// <returns>the sorted order of the string with this ID</returns>
public int this[StringId stringId] => m_stringIdSortOrder[stringId.Value];
/// <summary>
/// Compare two StringIds based on their sort order.
/// </summary>
public int Compare(StringId x, StringId y) => this[x].CompareTo(this[y]);
/// <summary>
/// This comparer compares indices into stringIdSortOrder, with the reserved FindIndex
/// equating to the string we're looking for.
/// </summary>
/// <remarks>
/// Basically this lets us use the BinarySearch method on the stringIdSortOrder list,
/// so we can binary search through the
/// </remarks>
private class FindComparer : IComparer<StringId>
{
internal static readonly StringId FindId = new StringId(int.MaxValue);
private readonly StringIndex m_parent;
private readonly string m_toFind;
private readonly StringComparison m_stringComparison;
internal FindComparer(
StringIndex parent,
string toFind,
StringComparison stringComparison = StringComparison.InvariantCulture)
{
m_parent = parent;
m_toFind = toFind;
m_stringComparison = stringComparison;
}
public int Compare(StringId x, StringId y)
{
bool xDefault = x == default;
bool yDefault = y == default;
if (xDefault && yDefault)
{
return 0;
}
if (xDefault)
{
return 1;
}
if (yDefault)
{
return -1;
}
ReadOnlySpan<char> xSpan = x == FindId ? m_toFind : m_parent.m_stringTable[x];
ReadOnlySpan<char> ySpan = y == FindId ? m_toFind : m_parent.m_stringTable[y];
return xSpan.CompareTo(ySpan, m_stringComparison);
}
}
/// <summary>
/// Try to find this string in the table; return default if not found.
/// </summary>
public StringId Find(string toFind)
{
int index = Array.BinarySearch(m_sortedStringIds, FindComparer.FindId, new FindComparer(this, toFind));
return index >= 0 ? m_sortedStringIds[index] : default;
}
}
}

Просмотреть файл

@ -10,22 +10,38 @@ namespace BuildXL.Utilities.PackedTable
/// <summary>
/// Boilerplate ID type to avoid ID confusion in code.
/// </summary>
public struct StringId : Id<StringId>, IEqualityComparer<StringId>
#pragma warning disable CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
#pragma warning disable CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
public struct StringId : Id<StringId>
#pragma warning restore CS0661 // Type defines operator == or operator != but does not override Object.GetHashCode()
#pragma warning restore CS0660 // Type defines operator == or operator != but does not override Object.Equals(object o)
{
/// <summary>Comparer.</summary>
public struct EqualityComparer : IEqualityComparer<StringId>
{
/// <summary>Comparison.</summary>
public bool Equals(StringId x, StringId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(StringId obj) => obj.Value;
}
private readonly int m_value;
/// <summary>Value as int.</summary>
public readonly int Value;
public int Value => m_value;
/// <summary>Constructor.</summary>
public StringId(int value) { Id<StringId>.CheckNotZero(value); Value = value; }
/// <summary>Eliminator.</summary>
public int FromId() => Value;
/// <summary>Introducer.</summary>
public StringId ToId(int value) => new StringId(value);
public StringId(int value) { Id<StringId>.CheckValidId(value); m_value = value; }
/// <summary>Constructor via interface.</summary>
public StringId CreateFrom(int value) => new(value);
/// <summary>Debugging.</summary>
public override string ToString() => $"StringId[{Value}]";
/// <summary>Comparison.</summary>
public bool Equals(StringId x, StringId y) => x.Value == y.Value;
/// <summary>Hashing.</summary>
public int GetHashCode(StringId obj) => obj.Value;
public static bool operator ==(StringId x, StringId y) => x.Equals(y);
/// <summary>Comparison.</summary>
public static bool operator !=(StringId x, StringId y) => !x.Equals(y);
/// <summary>Comparison.</summary>
public IEqualityComparer<StringId> Comparer => default(EqualityComparer);
/// <summary>Comparison via IComparable.</summary>
public int CompareTo([AllowNull] StringId other) => Value.CompareTo(other.Value);
}
/// <summary>

Просмотреть файл

@ -63,7 +63,7 @@ namespace BuildXL.Utilities.PackedTable
/// <remarks>
/// Mainly useful for testing.
/// </remarks>
public IEnumerable<TId> Ids => Enumerable.Range(1, Count).Select(v => default(TId).ToId(v));
public IEnumerable<TId> Ids => Enumerable.Range(1, Count).Select(v => default(TId).CreateFrom(v));
/// <summary>
/// The base table (if any) that defines this table's ID range.
@ -78,7 +78,7 @@ namespace BuildXL.Utilities.PackedTable
/// </summary>
public bool IsValid(TId id)
{
return id.FromId() > 0 && id.FromId() <= Count;
return id.Value > 0 && id.Value <= Count;
}
/// <summary>

Просмотреть файл

@ -0,0 +1,66 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System.IO;
using BuildXL.Pips.Operations;
using BuildXL.Scheduler.Tracing;
using BuildXL.Utilities;
using BuildXL.Utilities.PackedExecution;
using Test.BuildXL.Executables.TestProcess;
using Test.BuildXL.Scheduler;
using Xunit.Abstractions;
namespace Test.Tool.Analyzers
{
/// <summary>
/// Tests for <see cref="PackedExecutionExporter"/>.
/// The construction and disposal of these tests rely on the fact that
/// Xunit uses a unique class instance for each test.
/// /// </summary>
public class LogPackedExecutionTests : AnalyzerTestBase
{
public LogPackedExecutionTests(ITestOutputHelper output) : base(output)
{
// the key feature under test
Configuration.Logging.LogExecution = true;
Configuration.Logging.LogPackedExecution = true;
}
// TODO: determine whether it is practical to get this style of test to work for what this is trying to cover,
// specifically XLG and PXL log creation. Right now this test does not work (the Scheduler invocation fails
// and the Configuration.Logging.ExecutionLog property is Invalid), and it is not clear how feasible it is to
// fix it.
//[Fact]
public void TestLogPackedExecution()
{
FileArtifact srcA = CreateSourceFile();
FileArtifact outA = CreateOutputFileArtifact();
Process pipA = CreateAndSchedulePipBuilder(new Operation[]
{
Operation.ReadFile(srcA),
Operation.WriteFile(outA)
}).Process;
// Make pipB dependent on pipA
FileArtifact srcB = CreateSourceFile();
Process pipB = CreateAndSchedulePipBuilder(new Operation[]
{
Operation.ReadFile(srcB),
Operation.ReadFile(outA),
Operation.WriteFile(CreateOutputFileArtifact())
}).Process;
System.Diagnostics.Debugger.Launch();
ScheduleRunResult result = RunScheduler(); // .AssertCacheMiss(pipA.PipId, pipB.PipId);
AbsolutePath executionLogPath = Configuration.Logging.ExecutionLog;
string packedExecutionPath = Path.ChangeExtension(executionLogPath.ToString(Context.PathTable), "PXL"); // Packed eXecution Log
// Try reading it
PackedExecution pex = new PackedExecution();
pex.LoadFromDirectory(packedExecutionPath);
}
}
}

Просмотреть файл

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedExecution;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
using Xunit.Abstractions;
namespace Test.Tool.Analyzers
{
public class PackedExecutionConsistentEnumTests : TemporaryStorageTestBase
{
public PackedExecutionConsistentEnumTests(ITestOutputHelper output) : base(output)
{
}
[Fact]
public void PackedExecution_PipType_enum_matches_BuildXL()
{
XAssert.AreEqual((int)PipType.CopyFile, (int)global::BuildXL.Pips.Operations.PipType.CopyFile);
XAssert.AreEqual((int)PipType.HashSourceFile, (int)global::BuildXL.Pips.Operations.PipType.HashSourceFile);
XAssert.AreEqual((int)PipType.Ipc, (int)global::BuildXL.Pips.Operations.PipType.Ipc);
XAssert.AreEqual((int)PipType.Max, (int)global::BuildXL.Pips.Operations.PipType.Max);
XAssert.AreEqual((int)PipType.Module, (int)global::BuildXL.Pips.Operations.PipType.Module);
XAssert.AreEqual((int)PipType.Process, (int)global::BuildXL.Pips.Operations.PipType.Process);
XAssert.AreEqual((int)PipType.SealDirectory, (int)global::BuildXL.Pips.Operations.PipType.SealDirectory);
XAssert.AreEqual((int)PipType.SpecFile, (int)global::BuildXL.Pips.Operations.PipType.SpecFile);
XAssert.AreEqual((int)PipType.Value, (int)global::BuildXL.Pips.Operations.PipType.Value);
XAssert.AreEqual((int)PipType.WriteFile, (int)global::BuildXL.Pips.Operations.PipType.WriteFile);
}
}
}

Просмотреть файл

@ -4,6 +4,7 @@
using BuildXL.Utilities.PackedExecution;
using BuildXL.Utilities.PackedTable;
using System;
using System.Collections.Generic;
using System.Linq;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
@ -95,31 +96,77 @@ namespace Test.Tool.Analyzers
PackedExecution.Builder packedExecutionBuilder = new PackedExecution.Builder(packedExecution);
long hash = 1;
string name = "ShellCommon.Shell.ShellCommon.Shell.Merged.Winmetadata";
PipId pipId = packedExecutionBuilder.PipTableBuilder.Add(hash, name, PipType.Process);
PipId pipId1 = packedExecutionBuilder.PipTableBuilder.Add(hash, name, PipType.Process);
PipId pipId2 = packedExecutionBuilder.PipTableBuilder.Add(hash + 1, $"{name}2", PipType.Process);
PipId pipId3 = packedExecutionBuilder.PipTableBuilder.Add(hash + 2, $"{name}3", PipType.Process);
PipId pipId3 = packedExecutionBuilder.PipTableBuilder.Add(hash + 2, $"{name}3", PipType.SealDirectory);
PipId pipId4 = packedExecutionBuilder.PipTableBuilder.Add(hash + 3, $"{name}4", PipType.Process);
packedExecution.ConstructRelationTables();
RelationTable<PipId, PipId> relationTable = packedExecution.PipDependencies;
RelationTable<PipId, PipId>.Builder builder = new RelationTable<PipId, PipId>.Builder(relationTable);
// add relations in any order
builder.Add(pipId3, pipId2);
builder.Add(pipId3, pipId);
builder.Add(pipId, pipId3);
builder.Add(pipId, pipId2);
// add relations in any order (but without cycles)
// 1 <- 2
// 1 <- 3
// 2 <- 4
// 3 <- 4
builder.Add(pipId2, pipId1);
builder.Add(pipId3, pipId1);
builder.Add(pipId4, pipId2);
builder.Add(pipId4, pipId3);
// done adding relations; flush to table
builder.Complete();
XAssert.AreArraysEqual(new[] { pipId2, pipId3 }, relationTable[pipId].ToArray(), true);
XAssert.AreArraysEqual(new PipId[0], relationTable[pipId2].ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId, pipId2 }, relationTable[pipId3].ToArray(), true);
CheckRelation(packedExecution, pipId1, pipId2, pipId3, pipId4, relationTable);
}
XAssert.AreArraysEqual(new[] { pipId2, pipId3 }, relationTable.Enumerate(pipId).ToArray(), true);
XAssert.AreArraysEqual(new PipId[0], relationTable.Enumerate(pipId2).ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId, pipId2 }, relationTable.Enumerate(pipId3).ToArray(), true);
private static void CheckRelation(PackedExecution packedExecution, PipId pipId1, PipId pipId2, PipId pipId3, PipId pipId4, RelationTable<PipId, PipId> relationTable)
{
XAssert.AreArraysEqual(new PipId[0], relationTable[pipId1].ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId1 }, relationTable[pipId2].ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId1 }, relationTable[pipId3].ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId2, pipId3 }, relationTable[pipId4].ToArray(), true);
XAssert.AreArraysEqual(new PipId[0], relationTable.Enumerate(pipId1).ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId1 }, relationTable.Enumerate(pipId2).ToArray(), true);
XAssert.AreArraysEqual(new[] { pipId2, pipId3 }, relationTable.Enumerate(pipId4).ToArray(), true);
// try traversing from pipId4; should traverse over pipId3 and reach pipId (as well as pipId2)
IEnumerable<PipId> reachable = relationTable.Traverse(
pipId4,
p => packedExecution.PipTable[p].PipType == PipType.Process
? TraversalFilterResult.AcceptAndHalt
: TraversalFilterResult.RejectAndContinue);
XAssert.AreArraysEqual(new[] { pipId1, pipId2 }, reachable.OrderBy(p => p.Value).ToArray(), true);
}
[Fact]
public void RelationTable_can_be_built_unordered()
{
PackedExecution packedExecution = new PackedExecution();
PackedExecution.Builder packedExecutionBuilder = new PackedExecution.Builder(packedExecution);
long hash = 1;
string name = "ShellCommon.Shell.ShellCommon.Shell.Merged.Winmetadata";
PipId pipId1 = packedExecutionBuilder.PipTableBuilder.Add(hash, name, PipType.Process);
PipId pipId2 = packedExecutionBuilder.PipTableBuilder.Add(hash + 1, $"{name}2", PipType.Process);
PipId pipId3 = packedExecutionBuilder.PipTableBuilder.Add(hash + 2, $"{name}3", PipType.SealDirectory);
PipId pipId4 = packedExecutionBuilder.PipTableBuilder.Add(hash + 3, $"{name}4", PipType.Process);
packedExecution.ConstructRelationTables();
RelationTable<PipId, PipId> pipDependencyTable = packedExecution.PipDependencies;
pipDependencyTable.FillToBaseTableCount();
pipDependencyTable.AddUnordered(pipId4, new PipId[] { pipId2, pipId3 });
pipDependencyTable.AddUnordered(pipId3, new PipId[] { pipId1 });
pipDependencyTable.AddUnordered(pipId2, new PipId[] { pipId1 });
XAssert.IsTrue(pipDependencyTable.MayBeUnordered);
CheckRelation(packedExecution, pipId1, pipId2, pipId3, pipId4, pipDependencyTable);
}
}
}

Просмотреть файл

@ -58,7 +58,7 @@ namespace Test.Tool.Analyzers
PackedExecution.Builder packedExecutionBuilder = new PackedExecution.Builder(packedExecution);
string path = "d:\\os\\bin\\shellcommon\\shell\\merged\\winmetadata\\appresolverux.winmd";
FileId id = packedExecutionBuilder.FileTableBuilder.GetOrAdd(path, 1024 * 1024, default, default);
FileId id = packedExecutionBuilder.FileTableBuilder.GetOrAdd(path, 1024 * 1024, default, default, default);
XAssert.AreEqual(0, packedExecution.PipTable.Count);
XAssert.AreEqual(1, packedExecution.FileTable.Count);
@ -96,7 +96,7 @@ namespace Test.Tool.Analyzers
string name = "ShellCommon.Shell.ShellCommon.Shell.Merged.Winmetadata";
PipId pipId = packedExecutionBuilder.PipTableBuilder.Add(hash, name, PipType.Process);
string path = "d:\\os\\bin\\shellcommon\\shell\\merged\\winmetadata\\appresolverux.winmd";
packedExecutionBuilder.FileTableBuilder.GetOrAdd(path, 1024 * 1024, pipId, default);
packedExecutionBuilder.FileTableBuilder.GetOrAdd(path, 1024 * 1024, default, default, default);
string workerName = "BIGWORKER";
packedExecutionBuilder.WorkerTableBuilder.GetOrAdd(workerName);
@ -118,7 +118,6 @@ namespace Test.Tool.Analyzers
FileId fileId2 = packedExecution2.FileTable.Ids.First();
XAssert.AreEqual(path, packedExecution2.FileTable.PathTable.GetText(packedExecution2.FileTable[fileId2].Path));
XAssert.AreEqual(pipId2, packedExecution2.FileTable[fileId2].ProducerPip);
WorkerId workerId2 = packedExecution2.WorkerTable.Ids.First();
XAssert.AreEqual(workerName, new string(packedExecution2.StringTable[packedExecution2.WorkerTable[workerId2]]));

Просмотреть файл

@ -0,0 +1,120 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using BuildXL.Utilities.PackedExecution;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
using Xunit.Abstractions;
using System.Collections.Generic;
using System.Linq;
namespace Test.Tool.Analyzers
{
public class PackedTableNameFilterTests : TemporaryStorageTestBase
{
public PackedTableNameFilterTests(ITestOutputHelper output) : base(output)
{
}
public static PackedExecution.Builder ConstructExecution()
{
PackedExecution packedExecution = new PackedExecution();
PackedExecution.Builder builder = new PackedExecution.Builder(packedExecution);
builder.PipTableBuilder.Add(0, "alpha.bravo.charlie", PipType.Process);
builder.PipTableBuilder.Add(1, "alpha.bravo.delta.echo", PipType.Process);
builder.PipTableBuilder.Add(2, "alpha.foxtrot.golf.hotel", PipType.Process);
return builder;
}
[Fact]
public void NameFilter_filters_substrings_correctly()
{
PackedExecution.Builder builder = ConstructExecution();
NameIndex nameIndex = new NameIndex(builder.PackedExecution.PipTable.PipNameTable);
NameFilter<PipId> nameFilter = new NameFilter<PipId>(
builder.PackedExecution.PipTable,
nameIndex,
pid => builder.PackedExecution.PipTable[pid].Name,
'.',
"rav");
PipId[] results = nameFilter.Filter().OrderBy(pid => pid).ToArray();
XAssert.AreEqual(2, results.Count());
XAssert.AreEqual(new PipId(1), results.First());
XAssert.AreEqual(new PipId(2), results.Last());
NameFilter<PipId> nameFilter2 = new NameFilter<PipId>(
builder.PackedExecution.PipTable,
nameIndex,
pid => builder.PackedExecution.PipTable[pid].Name,
'.',
"RAV");
PipId[] results2 = nameFilter2.Filter().OrderBy(pid => pid).ToArray();
XAssert.AreArraysEqual(results, results2, true);
}
[Fact]
public void NameFilter_filters_starts_and_ends_correctly()
{
PackedExecution.Builder builder = ConstructExecution();
NameIndex nameIndex = new NameIndex(builder.PackedExecution.PipTable.PipNameTable);
// should match "alpha.bravo" pip substrings
NameFilter<PipId> nameFilter = new NameFilter<PipId>(
builder.PackedExecution.PipTable,
nameIndex,
pid => builder.PackedExecution.PipTable[pid].Name,
'.',
"a.b");
PipId[] results = nameFilter.Filter().OrderBy(pid => pid).ToArray();
XAssert.AreEqual(2, results.Count());
XAssert.AreEqual(new PipId(1), results.First());
XAssert.AreEqual(new PipId(2), results.Last());
}
[Fact]
public void NameFilter_filters_internal_atoms_by_equality()
{
PackedExecution.Builder builder = ConstructExecution();
NameIndex nameIndex = new NameIndex(builder.PackedExecution.PipTable.PipNameTable);
// should match "alpha.bravo" pip substrings
NameFilter<PipId> nameFilter = new NameFilter<PipId>(
builder.PackedExecution.PipTable,
nameIndex,
pid => builder.PackedExecution.PipTable[pid].Name,
'.',
"a.bravo.d");
PipId[] results = nameFilter.Filter().ToArray();
XAssert.AreEqual(1, results.Count());
XAssert.AreEqual(new PipId(2), results.First());
// should match "alpha.bravo" pip substrings
NameFilter<PipId> nameFilter2 = new NameFilter<PipId>(
builder.PackedExecution.PipTable,
nameIndex,
pid => builder.PackedExecution.PipTable[pid].Name,
'.',
"t.golf.h");
PipId[] results2 = nameFilter2.Filter().ToArray();
XAssert.AreEqual(1, results2.Count());
XAssert.AreEqual(new PipId(3), results2.First());
}
}
}

Просмотреть файл

@ -0,0 +1,56 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using BuildXL.Utilities.PackedTable;
using System;
using System.Diagnostics;
using System.Linq;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
using Xunit.Abstractions;
namespace Test.Tool.Analyzers
{
public class PackedTableNameIndexTests : TemporaryStorageTestBase
{
public PackedTableNameIndexTests(ITestOutputHelper output) : base(output)
{
}
[Fact]
public void NameIndex_contains_all_names()
{
StringTable stringTable = new StringTable();
StringTable.CachingBuilder stringTableBuilder = new StringTable.CachingBuilder(stringTable);
NameTable nameTable = new NameTable('.', stringTable);
NameTable.Builder nameTableBuilder = new NameTable.Builder(nameTable, stringTableBuilder);
NameId id = nameTableBuilder.GetOrAdd("a.b.c");
NameId id2 = nameTableBuilder.GetOrAdd("a.b.d.e");
NameId id3 = nameTableBuilder.GetOrAdd("a.f.g.h");
NameIndex nameIndex = new NameIndex(nameTable);
XAssert.AreEqual(8, nameIndex.Count);
XAssert.AreEqual(3, nameIndex[id].Length);
XAssert.AreEqual(4, nameIndex[id2].Length);
XAssert.AreEqual(4, nameIndex[id3].Length);
// We know these are the string IDs because string IDs get added as the names are constructed,
// and we happened to add names with each successive atom in lexical order.
StringId a = new StringId(1);
StringId b = new StringId(2);
StringId c = new StringId(3);
StringId d = new StringId(4);
StringId e = new StringId(5);
StringId f = new StringId(6);
StringId g = new StringId(7);
StringId h = new StringId(8);
XAssert.AreArraysEqual(new[] { a, b, c }, nameIndex.Enumerate(id).Select(entry => entry.Atom).ToArray(), true);
XAssert.AreArraysEqual(new[] { a, b, d, e }, nameIndex.Enumerate(id2).Select(entry => entry.Atom).ToArray(), true);
XAssert.AreArraysEqual(new[] { a, f, g, h }, nameIndex.Enumerate(id3).Select(entry => entry.Atom).ToArray(), true);
}
}
}

Просмотреть файл

@ -3,6 +3,7 @@
using BuildXL.Utilities.PackedTable;
using System;
using System.Diagnostics;
using System.Linq;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
@ -16,6 +17,24 @@ namespace Test.Tool.Analyzers
{
}
[Fact]
public void NameId_equality_is_consistent()
{
NameId oneId = new(1);
#pragma warning disable CS1718 // Comparison made to same variable
XAssert.IsTrue(oneId == oneId);
XAssert.IsFalse(oneId != oneId);
#pragma warning restore CS1718 // Comparison made to same variable
XAssert.IsFalse(oneId == default);
XAssert.IsTrue(oneId != default);
XAssert.IsTrue(default(NameId) == default(NameId));
NameId defaultId = default;
XAssert.IsTrue(defaultId == default);
XAssert.IsFalse(defaultId != default);
XAssert.IsFalse(defaultId == oneId);
XAssert.IsTrue(defaultId != oneId);
}
[Fact]
public void NameTable_can_store_one_singular_element()
{

Просмотреть файл

@ -0,0 +1,79 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using BuildXL.Utilities.PackedTable;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
using Xunit.Abstractions;
namespace Test.Tool.Analyzers
{
/// <summary>
/// Tests for Parallel sorting extension method for Memory[T]
/// </summary>
public class ParallelMemorySortExtensionsTests : XunitBuildXLTest
{
public ParallelMemorySortExtensionsTests(ITestOutputHelper output) : base(output)
{
}
/// <summary>Confirm data is properly sorted</summary>
public static void ConfirmSorted(int[] data)
{
for (int i = 1; i < data.Length; i++)
{
if (data[i - 1] > data[i])
{
throw new Exception("Sorting failure");
}
}
}
/// <summary>Test sorting the input array in a few ways</summary>
public static void TestSort(int[] data)
{
int[] copy1 = (int[])data.Clone();
ParallelMemorySortExtensions.ParallelSort<int>(copy1, (i, j) => i.CompareTo(j), minimumSubspanSize: 1);
ConfirmSorted(copy1);
int[] copy2 = (int[])data.Clone();
ParallelMemorySortExtensions.ParallelSort<int>(copy2, (i, j) => i.CompareTo(j), minimumSubspanSize: 3);
ConfirmSorted(copy2);
}
/// <summary>Test sorting already-sorted data</summary>
[Fact]
public void TestSortedSort()
{
TestSort(new int[] { 1 });
TestSort(new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
}
/// <summary>Test sorting antisorted data</summary>
[Fact]
public void TestReverseSort()
{
TestSort(new int[] { 9, 8, 7, 6, 5, 4, 3, 2, 1 });
}
/// <summary>Test sorting interleaved data</summary>
[Fact]
public void TestInterleavedSort()
{
TestSort(new int[] { 1, 4, 7, 2, 5, 8, 3, 6, 9 });
}
/// <summary>Test sorting disordered data</summary>
[Fact]
public void TestRandomSort()
{
TestSort(new int[] { 8, 4, 3, 5, 6, 1, 9, 2, 7 });
}
}
}

Просмотреть файл

@ -14,15 +14,28 @@ namespace PackedExecution {
sources: globR(d`.`, "*.cs"),
references: [
Core.dll,
TestProcess.exe,
TestUtilities.dll,
TestUtilities.XUnit.dll,
importFrom("BuildXL.Cache.ContentStore").Hashing.dll,
importFrom("BuildXL.Core.UnitTests").EngineTestUtilities.dll,
importFrom("BuildXL.Core.UnitTests").Scheduler.dll,
importFrom("BuildXL.Core.UnitTests").Scheduler.IntegrationTest.dll,
importFrom("BuildXL.Engine").Cache.dll,
importFrom("BuildXL.Engine").Engine.dll,
importFrom("BuildXL.Engine").Processes.dll,
importFrom("BuildXL.Engine").Scheduler.dll,
importFrom("BuildXL.Pips").dll,
importFrom("BuildXL.Tools").Execution.Analyzer.exe,
importFrom("BuildXL.Tools.UnitTests").Test.Tool.Analyzers.dll,
importFrom("BuildXL.Utilities").dll,
importFrom("BuildXL.Utilities").Native.dll,
importFrom("BuildXL.Utilities").Interop.dll,
importFrom("BuildXL.Utilities").PackedTable.dll,
importFrom("BuildXL.Utilities").PackedExecution.dll,
importFrom("BuildXL.Utilities").ToolSupport.dll,
importFrom("BuildXL.Utilities").Collections.dll,
importFrom("BuildXL.Utilities").Configuration.dll,
],
importFrom("BuildXL.Utilities").Interop.dll,
importFrom("BuildXL.Utilities").Native.dll,
importFrom("BuildXL.Utilities").PackedExecution.dll,
importFrom("BuildXL.Utilities").PackedTable.dll,
importFrom("BuildXL.Utilities").ToolSupport.dll,
]
});
}

Просмотреть файл

@ -17,7 +17,7 @@ if "%1" == "-usedev" (
)
echo.
REM Runing the XLG BuildXL analyzer: %BxlAnalyzerPath%\bxlanalyzer.exe
@echo Running the XLG BuildXL analyzer: %BxlAnalyzerPath%\bxlanalyzer.exe
echo.
%BxlAnalyzerPath%\bxlanalyzer.exe %1 %2 %3 %4 %5 %6 %7 %8 %9
if %ERRORLEVEL% NEQ 0 (