From dbbcb4769572472915b92c62aa9ef3d122a673b4 Mon Sep 17 00:00:00 2001 From: Iman Narasamdya Date: Tue, 10 Dec 2019 23:59:22 +0000 Subject: [PATCH] Merged PR 523144: Improve cache miss analysis presentation This is the first phase of giving more love to cache miss analysis: improving the analysis presentation. The current presentation has the following issues: - Analysis outputs are hard to understand, and look cryptic. - Cache miss analysis relies on a buggy third-party algorithm that can spit out a lot of irrelevant outputs. - Analysis of weak fingerprint do not respect order-independent entries. - Etc. The new presentation is still Json based and is similar to (but more refined than) the legacy cache miss analysis (which customers like). Example of new presentation (for mismatched strong fingerprints): ``` { "StrongFingerprint": { "Old": "471383773F3C19EAC1F4EF4DDD496E2076428406", "New": "FA15D762D7BFE52570BDFC070917B4103E0EE713" }, "Paths": { "Changed": { "b:\\out\\objects\\7\\0\\907noclwl174l942adk4eeeedjrh2a\\t_1\\fingerprints73945ac\\0\\obj\\readonly\\src_0": { "Old": "AccessType: DirectoryEnumeration | Hash: 3100D2E3F4", "New": "AccessType: DirectoryEnumeration | Hash: C675A28412", "Members": { "Added": [ "src_3", "src_4" ] } } } } } ``` The old presentation is preserved because some customer (namely Eric) relies on the old format. Related work items: #1520132, #1567602, #1572558 --- .../Advanced-Features/Cache-Miss-Analysis.md | 5 +- Public/Src/App/Bxl/Args.cs | 3 + Public/Src/App/Bxl/HelpText.cs | 5 + Public/Src/App/Bxl/Strings.resx | 57 +- .../Engine/Cache/Serialization/JsonTree.cs | 30 +- .../Scheduler/Fingerprints/ObservedPathSet.cs | 21 + .../Fingerprints/PipFingerprintField.cs | 41 ++ .../Fingerprints/PipFingerprinter.cs | 106 +-- .../Fingerprints/PipFingerprintingVersion.cs | 3 +- .../Tracing/CacheMissAnalysisUtilities.cs | 99 ++- .../Scheduler/Tracing/FingerprintDiff.cs | 327 +++++++++ .../Tracing/FingerprintStoreReader.cs | 153 +++- .../Scheduler/Tracing/JsonFingerprintDiff.cs | 683 ++++++++++++++++++ ...ProcessStrongFingerprintComputationData.cs | 6 +- .../Tracing/RuntimeCacheMissAnalyzer.cs | 18 +- .../FingerprintStore/FingerprintDiffTests.cs | 78 ++ Public/Src/Pips/Dll/Operations/Process.cs | 13 + .../CacheMiss/FingerprintStoreAnalyzer.cs | 18 +- .../FingerprintStoreAnalyzerTests.cs | 17 +- .../Configuration/CacheMissAnalysisOption.cs | 24 + .../Configuration/ILoggingConfiguration.cs | 5 + .../Mutable/LoggingConfiguration.cs | 5 + 22 files changed, 1562 insertions(+), 155 deletions(-) create mode 100644 Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintField.cs create mode 100644 Public/Src/Engine/Scheduler/Tracing/FingerprintDiff.cs create mode 100644 Public/Src/Engine/Scheduler/Tracing/JsonFingerprintDiff.cs create mode 100644 Public/Src/Engine/UnitTests/FingerprintStore/FingerprintDiffTests.cs diff --git a/Documentation/Wiki/Advanced-Features/Cache-Miss-Analysis.md b/Documentation/Wiki/Advanced-Features/Cache-Miss-Analysis.md index f1dcc9c81..610f6f179 100644 --- a/Documentation/Wiki/Advanced-Features/Cache-Miss-Analysis.md +++ b/Documentation/Wiki/Advanced-Features/Cache-Miss-Analysis.md @@ -28,12 +28,15 @@ The "analysis.txt" file in the output directory shows the first pip in each depe ### Diff Format -Both cache miss analyzers use *JsonDiffPatch* to diff *WeakFingerprint* and *StrongFingerprint* json files. If you are not familiar with json diff syntax, you can find the reference in the following links: +The new cache miss analyzer produces diff outputs in the form of Json. The new cache miss analyzer offers two different diff formats. The first format, called *CustomJsonDiff*, is a custom diff format resulting from our own diff algorithm that understands the semantics of weak and strong fingerprints. + +The second diff format is *JsonDiffPatch*. This format shows the diff as a delta between two Json reprsentations. To output this format, BuildXL relies on an external diff algorithm. References about the algorithm and the diff syntaxk can be found in the following links: [General diff syntax reference](https://github.com/benjamine/jsondiffpatch/blob/master/docs/deltas.md) [Array diff syntax reference](https://github.com/benjamine/jsondiffpatch/blob/master/docs/arrays.md) +The default diff format is CustomJsonDiff. One can specifying explicitly the diff format to use by using `/cacheMissDiffFormat:` #### Known Limitations The cache miss analyzer works correctly under the assumption that the two builds being compared shared the same graph scope and processed all of the same pips through the full scheduling algorithm. When this assumption is false, the analyzer may produce the following messages: diff --git a/Public/Src/App/Bxl/Args.cs b/Public/Src/App/Bxl/Args.cs index fb565e028..e2b95e3ca 100644 --- a/Public/Src/App/Bxl/Args.cs +++ b/Public/Src/App/Bxl/Args.cs @@ -251,6 +251,9 @@ namespace BuildXL OptionHandlerFactory.CreateBoolOptionWithValue( "cacheMiss", (opt, sign) => ParseCacheMissAnalysisOption(opt, sign, loggingConfiguration, pathTable)), + OptionHandlerFactory.CreateOption( + "cacheMissDiffFormat", + opt => CommandLineUtilities.ParseEnumOption(opt)), OptionHandlerFactory.CreateOption( "cacheSessionName", opt => cacheConfiguration.CacheSessionName = CommandLineUtilities.ParseStringOption(opt)), diff --git a/Public/Src/App/Bxl/HelpText.cs b/Public/Src/App/Bxl/HelpText.cs index b5dee57d0..6ffa9b1e5 100644 --- a/Public/Src/App/Bxl/HelpText.cs +++ b/Public/Src/App/Bxl/HelpText.cs @@ -358,6 +358,11 @@ namespace BuildXL Strings.HelpText_DisplayHelp_CacheMiss, HelpLevel.Verbose); + hw.WriteOption( + "/cacheMissDiffFormat:[format]", + Strings.HelpText_DisplayHelp_CacheMissDiffFormat, + HelpLevel.Verbose); + hw.WriteOption( "/scriptShowSlowest[+|-]", Strings.HelpText_DisplayHelp_ScriptShowSlowest, diff --git a/Public/Src/App/Bxl/Strings.resx b/Public/Src/App/Bxl/Strings.resx index 221df64bc..c1a09541b 100644 --- a/Public/Src/App/Bxl/Strings.resx +++ b/Public/Src/App/Bxl/Strings.resx @@ -1,17 +1,17 @@  - @@ -1033,4 +1033,7 @@ Example: ad2d42d2ec5d2ca0c0b7ad65402d07c7ef40b91e Validates the cgmanifest.json file at the specified path. This file should contain up-to-date names and versions of all Nuget packages used within BuildXL for Component Governance. Any mismatch will cause the Build to fail. Updated file can be created using the /generateCgManifestForNugets:<path> + + Diff format for cache miss analysis. Allowed values are CustomJsonDiff and JsonPatchDiff. Defaults to CustomJsonDiff + \ No newline at end of file diff --git a/Public/Src/Engine/Cache/Serialization/JsonTree.cs b/Public/Src/Engine/Cache/Serialization/JsonTree.cs index bdca26825..63ef45b85 100644 --- a/Public/Src/Engine/Cache/Serialization/JsonTree.cs +++ b/Public/Src/Engine/Cache/Serialization/JsonTree.cs @@ -68,14 +68,14 @@ namespace BuildXL.Engine.Cache.Serialization return false; } - if (this.Values.Count != other.Values.Count) + if (Values.Count != other.Values.Count) { return false; } - for (int i = 0; i < this.Values.Count; ++i) + for (int i = 0; i < Values.Count; ++i) { - if (this.Values[i] != other.Values[i]) + if (Values[i] != other.Values[i]) { return false; } @@ -119,7 +119,7 @@ namespace BuildXL.Engine.Cache.Serialization /// public static class JsonTree { - private static JsonDiffPatch s_jdp = null; + private static readonly JsonDiffPatch s_jdp = null; static JsonTree() { @@ -156,7 +156,7 @@ namespace BuildXL.Engine.Cache.Serialization Parent = parentNode, Name = reader.Value.ToString() }; - parentNode.Children.AddFirst(currentNode); + parentNode.Children.AddLast(currentNode); break; case JsonToken.String: currentNode.Values.Add(reader.Value.ToString()); @@ -191,7 +191,7 @@ namespace BuildXL.Engine.Cache.Serialization // If the root is being used to just point to a bunch of child nodes, skip printing it if (string.IsNullOrEmpty(root.Name)) { - for (var it = root.Children.Last; it != null; it = it.Previous) + for (var it = root.Children.First; it != null; it = it.Next) { BuildStringHelper(it.Value, wr); } @@ -230,7 +230,7 @@ namespace BuildXL.Engine.Cache.Serialization collectionWriter.Add(value); } - for (var it = n.Children.Last; it != null; it = it.Previous) + for (var it = n.Children.First; it != null; it = it.Next) { BuildStringHelper(it.Value, collectionWriter); } @@ -384,5 +384,21 @@ namespace BuildXL.Engine.Cache.Serialization } } } + + /// + /// Visits tree. + /// + public static void VisitTree(JsonNode root, Action processNode, bool recurse) + { + for (var it = root.Children.First; it != null; it = it.Next) + { + JsonNode node = it.Value; + processNode(node); + if (recurse) + { + VisitTree(node, processNode, recurse); + } + } + } } } diff --git a/Public/Src/Engine/Scheduler/Fingerprints/ObservedPathSet.cs b/Public/Src/Engine/Scheduler/Fingerprints/ObservedPathSet.cs index 783a6c043..312c7ca07 100644 --- a/Public/Src/Engine/Scheduler/Fingerprints/ObservedPathSet.cs +++ b/Public/Src/Engine/Scheduler/Fingerprints/ObservedPathSet.cs @@ -23,6 +23,27 @@ namespace BuildXL.Scheduler.Fingerprints [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1815:OverrideEqualsAndOperatorEqualsOnValueTypes")] public readonly struct ObservedPathSet { + /// + /// Constants used for labeling. + /// + public readonly struct Labels + { + /// + /// Label for . + /// + public const string UnsafeOptions = nameof(ObservedPathSet.UnsafeOptions); + + /// + /// Label for . + /// + public const string ObservedAccessedFileNames = nameof(ObservedPathSet.ObservedAccessedFileNames); + + /// + /// Label for . + /// + public const string Paths = nameof(ObservedPathSet.Paths); + } + /// /// Failure describing why deserialization of a path set failed (). /// diff --git a/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintField.cs b/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintField.cs new file mode 100644 index 000000000..3f5f9efee --- /dev/null +++ b/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintField.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace BuildXL.Scheduler.Fingerprints +{ + /// + /// Struct naming common fields in pip fingerprint. + /// + public struct PipFingerprintField + { + /// + public const string ExecutionAndFingerprintOptionsHash = nameof(ExecutionAndFingerprintOptionsHash); + + /// + public const string ContentHashAlgorithmName = nameof(ContentHashAlgorithmName); + + /// + public const string PipType = nameof(PipType); + + /// + public struct Process + { + /// + public const string SourceChangeAffectedInputList = nameof(SourceChangeAffectedInputList); + } + + /// + public struct FileDependency + { + /// + public const string PathNormalizedWriteFileContent = nameof(PathNormalizedWriteFileContent); + } + + /// + public struct FileOutput + { + /// + public const string Attributes = nameof(Attributes); + } + } +} diff --git a/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprinter.cs b/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprinter.cs index 39ac800f3..7a15b90b6 100644 --- a/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprinter.cs +++ b/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprinter.cs @@ -174,12 +174,12 @@ namespace BuildXL.Scheduler.Fingerprints Contract.Requires(fingerprinter != null); Contract.Requires(pip != null); - fingerprinter.Add("ExecutionAndFingerprintOptionsHash", m_extraFingerprintSalts.CalculatedSaltsFingerprint); + fingerprinter.Add(PipFingerprintField.ExecutionAndFingerprintOptionsHash, m_extraFingerprintSalts.CalculatedSaltsFingerprint); // Fingerprints must change when outputs are hashed with a different algorithm. - fingerprinter.Add("ContentHashAlgorithmName", s_outputContentHashAlgorithmName); + fingerprinter.Add(PipFingerprintField.ContentHashAlgorithmName, s_outputContentHashAlgorithmName); - fingerprinter.Add("PipType", GetHashMarker(pip)); + fingerprinter.Add(PipFingerprintField.PipType, GetHashMarker(pip)); switch (pip.PipType) { @@ -212,7 +212,7 @@ namespace BuildXL.Scheduler.Fingerprints Contract.Requires(fingerprinter != null); Contract.Requires(hashSourceFile != null); - fingerprinter.Add("File", hashSourceFile.Artifact); + fingerprinter.Add(nameof(HashSourceFile.Artifact), hashSourceFile.Artifact); } /// @@ -223,8 +223,8 @@ namespace BuildXL.Scheduler.Fingerprints Contract.Requires(fingerprinter != null); Contract.Requires(copyFile != null); - AddFileDependency(fingerprinter, "Source", copyFile.Source); - AddFileOutput(fingerprinter, "Destination", copyFile.Destination); + AddFileDependency(fingerprinter, nameof(CopyFile.Source), copyFile.Source); + AddFileOutput(fingerprinter, nameof(CopyFile.Destination), copyFile.Destination); } /// @@ -235,9 +235,9 @@ namespace BuildXL.Scheduler.Fingerprints Contract.Requires(fingerprinter != null); Contract.Requires(writeFile != null); - AddFileOutput(fingerprinter, "Destination", writeFile.Destination); - AddPipData(fingerprinter, "Contents", writeFile.Contents); - fingerprinter.Add("Encoding", (byte)writeFile.Encoding); + AddFileOutput(fingerprinter, nameof(WriteFile.Destination), writeFile.Destination); + AddPipData(fingerprinter, nameof(WriteFile.Contents), writeFile.Contents); + fingerprinter.Add(nameof(WriteFile.Encoding), (byte)writeFile.Encoding); } /// @@ -248,17 +248,17 @@ namespace BuildXL.Scheduler.Fingerprints Contract.Requires(fingerprinter != null); Contract.Requires(sealDirectory != null); - fingerprinter.Add("Path", sealDirectory.DirectoryRoot); - fingerprinter.Add("Kind", sealDirectory.Kind.ToString()); - fingerprinter.Add("Scrub", sealDirectory.Scrub.ToString()); + fingerprinter.Add(nameof(SealDirectory.DirectoryRoot), sealDirectory.DirectoryRoot); + fingerprinter.Add(nameof(SealDirectory.Kind), sealDirectory.Kind.ToString()); + fingerprinter.Add(nameof(SealDirectory.Scrub), sealDirectory.Scrub.ToString()); // Sort the contents based on their members' expanded paths so that they are stable across different path tables. var sortedContents = SortedReadOnlyArray.CloneAndSort(sealDirectory.Contents, m_expandedPathFileArtifactComparer); - fingerprinter.AddCollection>("Contents", sortedContents, (fp, f) => AddFileDependency(fp, f)); - fingerprinter.AddCollection>("Patterns", sealDirectory.Patterns, (fp, p) => fp.Add(p)); - fingerprinter.Add("IsComposite", sealDirectory.IsComposite.ToString()); - fingerprinter.AddCollection>("ComposedDirectories", sealDirectory.ComposedDirectories, (fp, d) => AddDirectoryDependency(fp, d)); + fingerprinter.AddCollection>(nameof(SealDirectory.Contents), sortedContents, (fp, f) => AddFileDependency(fp, f)); + fingerprinter.AddCollection>(nameof(SealDirectory.Patterns), sealDirectory.Patterns, (fp, p) => fp.Add(p)); + fingerprinter.Add(nameof(SealDirectory.IsComposite), sealDirectory.IsComposite.ToString()); + fingerprinter.AddCollection>(nameof(SealDirectory.ComposedDirectories), sealDirectory.ComposedDirectories, (fp, d) => AddDirectoryDependency(fp, d)); } /// @@ -266,62 +266,62 @@ namespace BuildXL.Scheduler.Fingerprints /// protected virtual void AddWeakFingerprint(IFingerprinter fingerprinter, Process process) { - fingerprinter.Add("Executable", process.Executable); - fingerprinter.Add("WorkingDirectory", process.WorkingDirectory); + fingerprinter.Add(nameof(Process.Executable), process.Executable); + fingerprinter.Add(nameof(Process.WorkingDirectory), process.WorkingDirectory); if (process.StandardInput.IsData) { // We only add standard input if it is data. If it is a file, then it is guaranteed to be in the dependency list. - AddPipData(fingerprinter, "StandardInputData", process.StandardInput.Data); + AddPipData(fingerprinter, nameof(Process.StandardInputData), process.StandardInput.Data); } - AddFileOutput(fingerprinter, "StandardError", process.StandardError); - AddFileOutput(fingerprinter, "StandardOutput", process.StandardOutput); + AddFileOutput(fingerprinter, nameof(Process.StandardError), process.StandardError); + AddFileOutput(fingerprinter, nameof(Process.StandardOutput), process.StandardOutput); // Files within untrackedPaths and untrackedScopes are irrelevent to the weak fingerprint and are removed from the fingerprint ReadOnlyArray relevantDependencies = process.Dependencies.Where(d => !IsUntracked(process, d.Path)).ToReadOnlyArray(); - fingerprinter.AddOrderIndependentCollection>("Dependencies", relevantDependencies, (fp, f) => AddFileDependency(fp, f), m_expandedPathFileArtifactComparer); - fingerprinter.AddOrderIndependentCollection>("DirectoryDependencies", process.DirectoryDependencies, (fp, d) => AddDirectoryDependency(fp, d), DirectoryComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.Dependencies), relevantDependencies, (fp, f) => AddFileDependency(fp, f), m_expandedPathFileArtifactComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.DirectoryDependencies), process.DirectoryDependencies, (fp, d) => AddDirectoryDependency(fp, d), DirectoryComparer); - fingerprinter.AddOrderIndependentCollection>("Outputs", process.FileOutputs, (fp, f) => AddFileOutput(fp, f), m_expandedPathFileArtifactWithAttributesComparer); - fingerprinter.AddOrderIndependentCollection>("DirectoryOutputs", process.DirectoryOutputs, (h, p) => h.Add(p.Path), DirectoryComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.FileOutputs), process.FileOutputs, (fp, f) => AddFileOutput(fp, f), m_expandedPathFileArtifactWithAttributesComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.DirectoryOutputs), process.DirectoryOutputs, (h, p) => h.Add(p.Path), DirectoryComparer); - fingerprinter.AddOrderIndependentCollection>("UntrackedPaths", process.UntrackedPaths, (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); - fingerprinter.AddOrderIndependentCollection>("UntrackedScopes", process.UntrackedScopes, (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.UntrackedPaths), process.UntrackedPaths, (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.UntrackedScopes), process.UntrackedScopes, (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); - fingerprinter.AddOrderIndependentCollection>("PreserveOutputWhitelist", process.PreserveOutputWhitelist, (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.PreserveOutputWhitelist), process.PreserveOutputWhitelist, (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); - fingerprinter.Add("HasUntrackedChildProcesses", process.HasUntrackedChildProcesses ? 1 : 0); - fingerprinter.Add("AllowUndeclaredSourceReads", process.AllowUndeclaredSourceReads ? 1 : 0); - fingerprinter.Add("AbsentPathProbeUnderOpaquesMode", (byte)process.ProcessAbsentPathProbeInUndeclaredOpaquesMode); + fingerprinter.Add(nameof(Process.HasUntrackedChildProcesses), process.HasUntrackedChildProcesses ? 1 : 0); + fingerprinter.Add(nameof(Process.AllowUndeclaredSourceReads), process.AllowUndeclaredSourceReads ? 1 : 0); + fingerprinter.Add(nameof(Process.ProcessAbsentPathProbeInUndeclaredOpaquesMode), (byte)process.ProcessAbsentPathProbeInUndeclaredOpaquesMode); // When DisableCacheLookup is set, the pip is marked as perpetually dirty for incremental scheduling. // It must also go to the weak fingerprint so IS will get a miss when you change from the DisableCacheLookup = false // to DisableCacheLookup = true. if (process.DisableCacheLookup) { - fingerprinter.Add("DisableCacheLookup", ContentHashingUtilities.CreateRandom()); + fingerprinter.Add(nameof(Process.DisableCacheLookup), ContentHashingUtilities.CreateRandom()); } - fingerprinter.Add("DoubleWritePolicy", (byte)process.DoubleWritePolicy); + fingerprinter.Add(nameof(Process.DoubleWritePolicy), (byte)process.DoubleWritePolicy); if (process.RequiresAdmin) { - fingerprinter.Add("RequiresAdmin", 1); + fingerprinter.Add(nameof(Process.RequiresAdmin), 1); } - fingerprinter.Add("NeedsToRunInContainer", process.NeedsToRunInContainer ? 1 : 0); - fingerprinter.Add("ContainerIsolationLevel", (byte)process.ContainerIsolationLevel); + fingerprinter.Add(nameof(Process.NeedsToRunInContainer), process.NeedsToRunInContainer ? 1 : 0); + fingerprinter.Add(nameof(Process.ContainerIsolationLevel), (byte)process.ContainerIsolationLevel); - AddPipData(fingerprinter, "Arguments", process.Arguments); + AddPipData(fingerprinter, nameof(Process.Arguments), process.Arguments); if (process.ResponseFileData.IsValid) { - AddPipData(fingerprinter, "ResponseFileData", process.ResponseFileData); + AddPipData(fingerprinter, nameof(Process.ResponseFileData), process.ResponseFileData); } fingerprinter.AddOrderIndependentCollection>( - "EnvironmentVariables", + nameof(Process.EnvironmentVariables), process.EnvironmentVariables, (fCollection, env) => { @@ -337,33 +337,37 @@ namespace BuildXL.Scheduler.Fingerprints m_environmentVariableComparer ); - fingerprinter.Add("WarningTimeout", process.WarningTimeout.HasValue ? process.WarningTimeout.Value.Ticks : -1); - fingerprinter.Add("Timeout", process.Timeout.HasValue ? process.Timeout.Value.Ticks : -1); + fingerprinter.Add(nameof(Process.WarningTimeout), process.WarningTimeout.HasValue ? process.WarningTimeout.Value.Ticks : -1); + fingerprinter.Add(nameof(Process.Timeout), process.Timeout.HasValue ? process.Timeout.Value.Ticks : -1); if (process.WarningRegex.IsValid) { - fingerprinter.Add("WarningRegex.Pattern", process.WarningRegex.Pattern); - fingerprinter.Add("WarningRegex.Options", (int)process.WarningRegex.Options); + fingerprinter.Add(nameof(Process.WarningRegexPattern), process.WarningRegex.Pattern); + fingerprinter.Add(nameof(Process.WarningRegexOptions), (int)process.WarningRegex.Options); } if (process.ErrorRegex.IsValid) { - fingerprinter.Add("ErrorRegex.Pattern", process.ErrorRegex.Pattern); - fingerprinter.Add("ErrorRegex.Options", (int)process.ErrorRegex.Options); + fingerprinter.Add(nameof(Process.ErrorRegexPattern), process.ErrorRegex.Pattern); + fingerprinter.Add(nameof(Process.ErrorRegexOptions), (int)process.ErrorRegex.Options); } - fingerprinter.AddCollection>("SuccessExitCodes", process.SuccessExitCodes, (h, i) => h.Add(i)); + fingerprinter.AddOrderIndependentCollection>(nameof(Process.SuccessExitCodes), process.SuccessExitCodes, (h, i) => h.Add(i), Comparer.Default); if (process.ChangeAffectedInputListWrittenFile.IsValid) { - fingerprinter.AddOrderIndependentCollection>("SourceChangeAffectedInputList", m_sourceChangeAffectedInputsLookup(process).ToReadOnlyArray(), (h, p) => h.Add(p), m_pathTable.ExpandedPathComparer); - fingerprinter.Add("ChangeAffectedInputListWrittenFile", process.ChangeAffectedInputListWrittenFile); + fingerprinter.AddOrderIndependentCollection>( + PipFingerprintField.Process.SourceChangeAffectedInputList, + m_sourceChangeAffectedInputsLookup(process).ToReadOnlyArray(), + (h, p) => h.Add(p), + m_pathTable.ExpandedPathComparer); + fingerprinter.Add(nameof(Process.ChangeAffectedInputListWrittenFile), process.ChangeAffectedInputListWrittenFile); } if (process.ChildProcessesToBreakawayFromSandbox != null) { fingerprinter.AddOrderIndependentCollection>( - "ChildProcessesToBreakawayFromSandbox", + nameof(Process.ChildProcessesToBreakawayFromSandbox), process.ChildProcessesToBreakawayFromSandbox.Select(processName => processName.StringId).ToReadOnlyArray(), (h, p) => h.Add(p), m_pathTable.StringTable.OrdinalComparer); @@ -409,7 +413,7 @@ namespace BuildXL.Scheduler.Fingerprints // into write file outputs so we could get their "path normalized" content(ie with paths tokenized). fingerprinter.AddNested( fileArtifact.Path, - fp => AddPipData(fp, "PathNormalizedWriteFileContent", filePipData)); + fp => AddPipData(fp, PipFingerprintField.FileDependency.PathNormalizedWriteFileContent, filePipData)); } else { @@ -460,7 +464,7 @@ namespace BuildXL.Scheduler.Fingerprints Contract.Requires(fingerprinter != null); // For attributed file artifacts both path and attributes are critical for fingerprinting - fingerprinter.AddNested(fileArtifact.Path, fp => fp.Add("Attributes", (int)fileArtifact.FileExistence)); + fingerprinter.AddNested(fileArtifact.Path, fp => fp.Add(PipFingerprintField.FileOutput.Attributes, (int)fileArtifact.FileExistence)); } /// diff --git a/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintingVersion.cs b/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintingVersion.cs index 4199eb2cc..432dac241 100644 --- a/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintingVersion.cs +++ b/Public/Src/Engine/Scheduler/Fingerprints/PipFingerprintingVersion.cs @@ -42,7 +42,8 @@ namespace BuildXL.Scheduler.Fingerprints /// 68: Added ChildProcessesToBreakawayFromSandbox /// 69: Added dynamic existing probe. /// 70: Removed duplicates from ObservedAccessedFileNames. + /// 71: Rename fields in weak fingerprint. /// - TwoPhaseV2 = 70, + TwoPhaseV2 = 71, } } \ No newline at end of file diff --git a/Public/Src/Engine/Scheduler/Tracing/CacheMissAnalysisUtilities.cs b/Public/Src/Engine/Scheduler/Tracing/CacheMissAnalysisUtilities.cs index 6fef57c32..a2fa86190 100644 --- a/Public/Src/Engine/Scheduler/Tracing/CacheMissAnalysisUtilities.cs +++ b/Public/Src/Engine/Scheduler/Tracing/CacheMissAnalysisUtilities.cs @@ -5,6 +5,8 @@ using System; using System.Diagnostics.ContractsLight; using System.IO; using BuildXL.Engine.Cache.Serialization; +using BuildXL.Utilities.Configuration; +using Newtonsoft.Json.Linq; using static BuildXL.Scheduler.Tracing.FingerprintStoreReader; namespace BuildXL.Scheduler.Tracing @@ -26,6 +28,9 @@ namespace BuildXL.Scheduler.Tracing /// WeakFingerprintMismatch, + /// + PathSetHashMismatch, + /// StrongFingerprintMismatch, @@ -57,7 +62,8 @@ namespace BuildXL.Scheduler.Tracing TextWriter writer, PipCacheMissInfo missInfo, Func oldSessionFunc, - Func newSessionFunc) + Func newSessionFunc, + CacheMissDiffFormat diffFormat) { Contract.Requires(oldSessionFunc != null); Contract.Requires(newSessionFunc != null); @@ -71,7 +77,7 @@ namespace BuildXL.Scheduler.Tracing case PipCacheMissType.MissForDescriptorsDueToWeakFingerprints: case PipCacheMissType.MissForDescriptorsDueToStrongFingerprints: // Compute the pip unique output hash to use as the primary lookup key for fingerprint store entries - return AnalyzeFingerprints(oldSessionFunc, newSessionFunc, writer); + return AnalyzeFingerprints(oldSessionFunc, newSessionFunc, writer, diffFormat); // We had a weak and strong fingerprint match, but couldn't retrieve correct data from the cache case PipCacheMissType.MissForCacheEntry: @@ -106,7 +112,8 @@ namespace BuildXL.Scheduler.Tracing private static CacheMissAnalysisResult AnalyzeFingerprints( Func oldSessionFunc, Func newSessionFunc, - TextWriter writer) + TextWriter writer, + CacheMissDiffFormat diffFormat) { var result = CacheMissAnalysisResult.Invalid; @@ -158,19 +165,30 @@ namespace BuildXL.Scheduler.Tracing if (oldPipSession.FormattedSemiStableHash != newPipSession.FormattedSemiStableHash) { // Make trivial json so the print looks like the rest of the diff - var oldNode = new JsonNode + if (diffFormat == CacheMissDiffFormat.CustomJsonDiff) { - Name = RepeatedStrings.FormattedSemiStableHashChanged - }; - oldNode.Values.Add(oldPipSession.FormattedSemiStableHash); - - var newNode = new JsonNode + var diff = new JProperty("SemiStableHash", + new JObject( + new JProperty("Old", oldPipSession.FormattedSemiStableHash), + new JProperty("New", newPipSession.FormattedSemiStableHash))); + WriteLine(new JObject(diff).ToString(), writer); + } + else { - Name = RepeatedStrings.FormattedSemiStableHashChanged - }; - newNode.Values.Add(newPipSession.FormattedSemiStableHash); + var oldNode = new JsonNode + { + Name = RepeatedStrings.FormattedSemiStableHashChanged + }; + oldNode.Values.Add(oldPipSession.FormattedSemiStableHash); - WriteLine(JsonTree.PrintTreeDiff(oldNode, newNode), writer); + var newNode = new JsonNode + { + Name = RepeatedStrings.FormattedSemiStableHashChanged + }; + newNode.Values.Add(newPipSession.FormattedSemiStableHash); + + WriteLine(JsonTree.PrintTreeDiff(oldNode, newNode), writer); + } } // Diff based off the actual fingerprints instead of the PipCacheMissType @@ -186,13 +204,47 @@ namespace BuildXL.Scheduler.Tracing if (oldPipSession.WeakFingerprint != newPipSession.WeakFingerprint) { WriteLine("WeakFingerprint", writer); - WriteLine(JsonTree.PrintTreeDiff(oldPipSession.GetWeakFingerprintTree(), newPipSession.GetWeakFingerprintTree()), writer); + + if (diffFormat == CacheMissDiffFormat.CustomJsonDiff) + { + WriteLine(oldPipSession.DiffWeakFingerprint(newPipSession).ToString(), writer); + } + else + { + WriteLine(JsonTree.PrintTreeDiff(oldPipSession.GetWeakFingerprintTree(), newPipSession.GetWeakFingerprintTree()), writer); + } + result = CacheMissAnalysisResult.WeakFingerprintMismatch; } + else if (oldPipSession.PathSetHash != newPipSession.PathSetHash) + { + WriteLine($"PathSet", writer); + + if (diffFormat == CacheMissDiffFormat.CustomJsonDiff) + { + WriteLine(oldPipSession.DiffPathSet(newPipSession).ToString(), writer); + } + else + { + // JsonPatchDiff does not have pathset comparison. + WriteLine(JsonTree.PrintTreeDiff(oldPipSession.GetStrongFingerprintTree(), newPipSession.GetStrongFingerprintTree()), writer); + } + + result = CacheMissAnalysisResult.PathSetHashMismatch; + } else if (oldPipSession.StrongFingerprint != newPipSession.StrongFingerprint) { WriteLine("StrongFingerprint", writer); - WriteLine(JsonTree.PrintTreeDiff(oldPipSession.GetStrongFingerprintTree(), newPipSession.GetStrongFingerprintTree()), writer); + + if (diffFormat == CacheMissDiffFormat.CustomJsonDiff) + { + WriteLine(oldPipSession.DiffStrongFingerprint(newPipSession).ToString(), writer); + } + else + { + WriteLine(JsonTree.PrintTreeDiff(oldPipSession.GetStrongFingerprintTree(), newPipSession.GetStrongFingerprintTree()), writer); + } + result = CacheMissAnalysisResult.StrongFingerprintMismatch; } else @@ -237,7 +289,22 @@ namespace BuildXL.Scheduler.Tracing /// Formatted semi stable hash changed. /// public const string FormattedSemiStableHashChanged - = "FormattedSemiStableHash"; + = "SemiStableHash"; + + /// + /// Marker indicating that a value is not specified. + /// + public const string UnspecifiedValue = "[Unspecified value]"; + + /// + /// Marker indicating that a value is specified. + /// + public const string ExistentValue = "[Value exists]"; + + /// + /// Marker indicating that an expected value is missing, which indicates a bug in the cache miss analysis. + /// + public const string MissingValue = "Missing value"; } } } diff --git a/Public/Src/Engine/Scheduler/Tracing/FingerprintDiff.cs b/Public/Src/Engine/Scheduler/Tracing/FingerprintDiff.cs new file mode 100644 index 000000000..8284ed5a0 --- /dev/null +++ b/Public/Src/Engine/Scheduler/Tracing/FingerprintDiff.cs @@ -0,0 +1,327 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using BuildXL.Scheduler.Fingerprints; +using BuildXL.Utilities; + +namespace BuildXL.Scheduler.Tracing +{ + /// + /// Utilities class for diff-ing fingerprints irrespective of the stored data and of the diff data representation. + /// + internal static class FingerprintDiff + { + #region Extraction + private static readonly List s_emptyList = new List(); + + /// + /// Extracts differences between two unordered maps (or dictionaries). + /// + internal static bool ExtractUnorderedMapDiff( + IReadOnlyDictionary oldData, + IReadOnlyDictionary newData, + Func equalValue, + out IReadOnlyList added, + out IReadOnlyList removed, + out IReadOnlyList changed) + { + bool hasDiff = ExtractUnorderedListDiff(oldData.Keys, newData.Keys, out added, out removed); + List mutableChanged = s_emptyList; + + foreach (var kvp in oldData) + { + if (newData.TryGetValue(kvp.Key, out var newValue) && !equalValue(kvp.Value, newValue)) + { + if (mutableChanged == s_emptyList) + { + mutableChanged = new List(); + } + + mutableChanged.Add(kvp.Key); + } + } + + changed = mutableChanged; + + return hasDiff || changed.Count > 0; + } + + /// + /// Extracts differences between two unordered lists (or sets). + /// + internal static bool ExtractUnorderedListDiff( + IEnumerable oldData, + IEnumerable newData, + out IReadOnlyList added, + out IReadOnlyList removed) + { + bool oldAny = oldData.Any(); + bool newAny = newData.Any(); + + if (!oldAny && !newAny) + { + added = removed = s_emptyList; + } + else if (oldAny && !newAny) + { + added = s_emptyList; + removed = oldData.ToHashSet().ToList(); + } + else if (!oldAny && newAny) + { + removed = s_emptyList; + added = newData.ToHashSet().ToList(); + } + else + { + var newSet = newData.ToHashSet(); + var oldSet = oldData.ToHashSet(); + newSet.ExceptWith(oldData); + oldSet.ExceptWith(newData); + added = newSet.ToList(); + removed = oldSet.ToList(); + } + + return added.Count > 0 || removed.Count > 0; + } + + #endregion Extraction + + #region Internal fingerprint data + + /// + /// Observed input data. + /// + internal struct ObservedInputData : IEquatable + { + /// + /// Path. + /// + public readonly string Path; + + /// + /// Flags. + /// + public readonly string Flags; + + /// + /// Pattern. + /// + public readonly string Pattern; + + /// + /// Access type. + /// + public readonly string AccessType; + + /// + /// Content hash or membership hash. + /// + public readonly string Hash; + + /// + /// Creates an instance of . + /// + public ObservedInputData( + string path, + string flags, + string pattern, + string hashMarker, + string hash) + { + Path = path; + Flags = flags; + Pattern = pattern; + AccessType = hashMarker; + Hash = hash; + } + + /// + /// Creates an instance of . + /// + public ObservedInputData(string path, string flags, string pattern) : this(path, flags, pattern, null, null) { } + + /// + public bool Equals(ObservedInputData other) => + Path == other.Path && Flags == other.Flags && Pattern == other.Pattern && AccessType == other.AccessType && Hash == other.Hash; + + /// + public override bool Equals(object obj) => StructUtilities.Equals(this, obj); + + /// + public override int GetHashCode() + { + return HashCodeHelper.Combine(hashCode(Path), hashCode(Flags), hashCode(Pattern), hashCode(AccessType), hashCode(Hash)); + + static int hashCode(string s) => s != null ? EqualityComparer.Default.GetHashCode(s) : 0; + } + + /// + /// Describes diff with respect to other instance of . + /// + /// + /// + public string DescribeDiffWithoutPath(ObservedInputData data) => + string.Join( + " | ", + (new[] { + Prefix(nameof(AccessType), ObservedInputConstants.ToExpandedString(AccessType)), + Flags == data.Flags ? null : Prefix(nameof(Flags), Flags), + Pattern == data.Pattern ? null : Prefix(nameof(Pattern), Pattern), + Hash == data.Hash ? null : Prefix(nameof(Hash), Hash) }).Where(s => !string.IsNullOrEmpty(s))); + + private string Prefix(string prefix, string item) => string.IsNullOrEmpty(item) ? null : prefix + ": " + item; + } + + /// + /// Input file data. + /// + internal struct InputFileData : IEquatable + { + /// + /// Path. + /// + public readonly string Path; + + /// + /// Content hash or content itself (in case of being written by a write-file pip). + /// + public readonly string HashOrContent; + + /// + /// Creates an instance of . + /// + public InputFileData(string path, string hashOrContent) + { + Path = path; + HashOrContent = hashOrContent; + } + + /// + public bool Equals(InputFileData other) => Path == other.Path && HashOrContent == other.HashOrContent; + + /// + public override bool Equals(object obj) => StructUtilities.Equals(this, obj); + + /// + public override int GetHashCode() + { + return HashCodeHelper.Combine(hashCode(Path), hashCode(HashOrContent)); + + static int hashCode(string s) => s != null ? EqualityComparer.Default.GetHashCode(s) : 0; + } + } + + /// + /// Output file data. + /// + internal struct OutputFileData : IEquatable + { + /// + /// Path. + /// + public readonly string Path; + + /// + /// Attributes. + /// + public readonly string Attributes; + + /// + /// Creates an instance of . + /// + public OutputFileData(string path, string attributes) + { + Path = path; + Attributes = attributes; + } + + /// + public bool Equals(OutputFileData other) => Path == other.Path && Attributes == other.Attributes; + + /// + public override bool Equals(object obj) => StructUtilities.Equals(this, obj); + + /// + public override int GetHashCode() + { + return HashCodeHelper.Combine(hashCode(Path), hashCode(Attributes)); + + static int hashCode(string s) => s != null ? EqualityComparer.Default.GetHashCode(s) : 0; + } + } + + /// + /// Environment variable data. + /// + internal struct EnvironmentVariableData : IEquatable + { + /// + /// Name. + /// + public readonly string Name; + + /// + /// Value.s + /// + public readonly string Value; + + /// + /// Creates an instance of . + /// + public EnvironmentVariableData(string name, string value) + { + Name = name; + Value = value; + } + + /// + public bool Equals(EnvironmentVariableData other) => Name == other.Name && Value == other.Value; + + /// + public override bool Equals(object obj) => StructUtilities.Equals(this, obj); + + /// + public override int GetHashCode() + { + return HashCodeHelper.Combine(hashCode(Name), hashCode(Value)); + + static int hashCode(string s) => s != null ? EqualityComparer.Default.GetHashCode(s) : 0; + } + } + + #endregion Internal fingerprint data + + #region Pools + + private static ObjectPool> CreateMapPool() => + new ObjectPool>( + () => new Dictionary(), + map => { map.Clear(); return map; }); + + /// + /// Pool for . + /// + public static ObjectPool> InputFileDataMapPool { get; } = CreateMapPool(); + + /// + /// Pool for . + /// + public static ObjectPool> OutputFileDataMapPool { get; } = CreateMapPool(); + + /// + /// Pool for . + /// + public static ObjectPool> EnvironmentVariableDataMapPool { get; } = CreateMapPool(); + + /// + /// Pool for . + /// + public static ObjectPool> ObservedInputDataMapPool { get; } = CreateMapPool(); + + #endregion Pools + } +} diff --git a/Public/Src/Engine/Scheduler/Tracing/FingerprintStoreReader.cs b/Public/Src/Engine/Scheduler/Tracing/FingerprintStoreReader.cs index 241caaadc..2bc339732 100644 --- a/Public/Src/Engine/Scheduler/Tracing/FingerprintStoreReader.cs +++ b/Public/Src/Engine/Scheduler/Tracing/FingerprintStoreReader.cs @@ -10,6 +10,7 @@ using BuildXL.Engine.Cache.Serialization; using BuildXL.Pips.Operations; using BuildXL.Scheduler.Fingerprints; using BuildXL.Utilities; +using Newtonsoft.Json.Linq; using static BuildXL.Scheduler.Tracing.FingerprintStore; namespace BuildXL.Scheduler.Tracing @@ -150,6 +151,30 @@ namespace BuildXL.Scheduler.Tracing } } + /// + /// Path set hash of the entry. + /// + public string PathSetHash + { + get + { + Contract.Assert(EntryExists); + return m_entry.StrongFingerprintEntry.PathSetHashToInputs.Key; + } + } + + /// + /// Get path set value of the entry. + /// + public string PathSetValue + { + get + { + Contract.Assert(EntryExists); + return m_entry.StrongFingerprintEntry.PathSetHashToInputs.Value; + } + } + /// /// Constructor /// @@ -172,22 +197,57 @@ namespace BuildXL.Scheduler.Tracing /// /// Get weak fingerprint tree for the entry /// - public JsonNode GetWeakFingerprintTree() - { - return JsonTree.Deserialize(m_entry.WeakFingerprintToInputs.Value); - } + public JsonNode GetWeakFingerprintTree() => JsonTree.Deserialize(m_entry.WeakFingerprintToInputs.Value); /// /// Get strong fingerprint tree for the entry /// - public JsonNode GetStrongFingerprintTree() - { - var strongEntry = m_entry.StrongFingerprintEntry; - var strongFingerprintTree = JsonTree.Deserialize(strongEntry.StrongFingerprintToInputs.Value); - var pathSetTree = JsonTree.Deserialize(strongEntry.PathSetHashToInputs.Value); + public JsonNode GetStrongFingerprintTree() => MergeStrongFingerprintAndPathSetTrees(GetStrongFingerpintInputTree(), GetPathSetTree()); - return MergeStrongFingerprintAndPathSetTrees(strongFingerprintTree, pathSetTree); - } + /// + /// Get pathset tree. + /// + public JsonNode GetPathSetTree() => JsonTree.Deserialize(m_entry.StrongFingerprintEntry.PathSetHashToInputs.Value); + + private JsonNode GetStrongFingerpintInputTree() => JsonTree.Deserialize(m_entry.StrongFingerprintEntry.StrongFingerprintToInputs.Value); + + /// + /// Diff pathsets. + /// + public JObject DiffPathSet(PipRecordingSession otherSession) => + JsonFingerprintDiff.DiffPathSets( + PathSetHash, + GetPathSetTree(), + GetStrongFingerpintInputTree(), + otherSession.PathSetHash, + otherSession.GetPathSetTree(), + otherSession.GetStrongFingerpintInputTree(), + directoryMembershipHash => GetDirectoryMembership(m_store, directoryMembershipHash), + otherDirectoryMembershipHash => GetDirectoryMembership(otherSession.m_store, otherDirectoryMembershipHash)); + + /// + /// Diff strong fingerprints. + /// + public JObject DiffStrongFingerprint(PipRecordingSession otherSession) => + JsonFingerprintDiff.DiffStrongFingerprints( + StrongFingerprint, + GetPathSetTree(), + GetStrongFingerpintInputTree(), + otherSession.StrongFingerprint, + otherSession.GetPathSetTree(), + otherSession.GetStrongFingerpintInputTree(), + directoryMembershipHash => GetDirectoryMembership(m_store, directoryMembershipHash), + otherDirectoryMembershipHash => GetDirectoryMembership(otherSession.m_store, otherDirectoryMembershipHash)); + + /// + /// Diff weak fingerprints. + /// + public JObject DiffWeakFingerprint(PipRecordingSession otherSession) => + JsonFingerprintDiff.DiffWeakFingerprints( + WeakFingerprint, + GetWeakFingerprintTree(), + otherSession.WeakFingerprint, + otherSession.GetWeakFingerprintTree()); /// /// Path set hash inputs are stored separately from the strong fingerprint inputs. @@ -210,11 +270,11 @@ namespace BuildXL.Scheduler.Tracing /// /// From path set hash /// - /// [4] "PathSet":"" + /// [4] "Paths":"" /// [5] "Path":"B:/out/objects/n/x/qbkexxlc8je93wycw7yrlw0a305n7k/xunit-out/CacheMissAnaAD836B23/3/obj/readonly/src_0" /// [6] "Flags":"IsDirectoryPath, DirectoryEnumeration, DirectoryEnumerationWithAllPattern" /// [7] "EnumeratePatternRegex":"^.*$" - /// + /// /// And end with: /// /// [1] "PathSet":"VSO0:7E2E49845EC0AE7413519E3EE605272078AF0B1C2911C021681D1D9197CC134A00" @@ -238,10 +298,9 @@ namespace BuildXL.Scheduler.Tracing // In preparation for merging with observed inputs nodes, // remove the path set node's branch from the path set tree - // [4] "PathSet":"" - var pathSetNode = JsonTree.FindNodeByName(pathSetTree, ObservedPathEntryConstants.PathSet); + // [4] "Paths":"" + var pathSetNode = JsonTree.FindNodeByName(pathSetTree, ObservedPathSet.Labels.Paths); JsonTree.EmancipateBranch(pathSetNode); - JsonNode currPathNode = null; JsonNode currFlagNode = null; JsonNode currRegexNode = null; @@ -252,23 +311,17 @@ namespace BuildXL.Scheduler.Tracing switch (child.Name) { case ObservedPathEntryConstants.Path: + if (currPathNode != null) + { + mergePathSetNode(parentPathNode, currPathNode, currFlagNode, currRegexNode, observedInputIt.Value); + observedInputIt = observedInputsNode.Children.First; + currPathNode = null; + currFlagNode = null; + currRegexNode = null; + } + currPathNode = child; - // Switch from literal string "path" to actual file system path - // [5'] "B:/out/objects/n/x/qbkexxlc8je93wycw7yrlw0a305n7k/xunit-out/CacheMissAnaAD836B23/3/obj/readonly/src_0":"" - currPathNode.Name = currPathNode.Values[0]; - // The name captures the node's value, so clear the values to avoid extraneous value comparison when diffing - currPathNode.Values.Clear(); - JsonTree.ReparentBranch(currPathNode, parentPathNode); - - // [6'] "Flags":"IsDirectoryPath, DirectoryEnumeration, DirectoryEnumerationWithAllPattern" - JsonTree.ReparentBranch(currFlagNode, currPathNode); - // [7'] "EnumeratePatternRegex":"^.*$" - JsonTree.ReparentBranch(currRegexNode, currPathNode); - - // [3'] "ObservedInput":"E:VSO0:E0C5007DC8CF2D331236F156F136C50CACE2A5D549CD132D9B44ABD1F13D50CC00" - // [8] "Members":"[src_1, src_2]" - ReparentObservedInput(observedInputIt.Value, currPathNode); - observedInputIt = observedInputsNode.Children.First; + JsonTree.EmancipateBranch(currPathNode); break; case ObservedPathEntryConstants.Flags: // [6] "Flags":"IsDirectoryPath, DirectoryEnumeration, DirectoryEnumerationWithAllPattern" @@ -285,6 +338,11 @@ namespace BuildXL.Scheduler.Tracing } } + if (currPathNode != null) + { + mergePathSetNode(parentPathNode, currPathNode, currFlagNode, currRegexNode, observedInputIt.Value); + } + // Re-parent any other branches of the path set tree to the strong fingerprint tree // so they are still in a full strong fingerprint tree comparison. // We re-parent under parentPathNode because branches of pathSetTree are elements of PathSet @@ -296,6 +354,26 @@ namespace BuildXL.Scheduler.Tracing } return strongFingerprintTree; + + void mergePathSetNode(JsonNode parentNode, JsonNode pathNode, JsonNode flagNode, JsonNode regexNode, JsonNode observedInputNode) + { + // Switch from literal string "path" to actual file system path + // [5'] "B:/out/objects/n/x/qbkexxlc8je93wycw7yrlw0a305n7k/xunit-out/CacheMissAnaAD836B23/3/obj/readonly/src_0":"" + pathNode.Name = pathNode.Values[0]; + + // The name captures the node's value, so clear the values to avoid extraneous value comparison when diffing + pathNode.Values.Clear(); + JsonTree.ReparentBranch(pathNode, parentNode); + + // [6'] "Flags":"IsDirectoryPath, DirectoryEnumeration, DirectoryEnumerationWithAllPattern" + JsonTree.ReparentBranch(flagNode, pathNode); + // [7'] "EnumeratePatternRegex":"^.*$" + JsonTree.ReparentBranch(regexNode, pathNode); + + // [3'] "ObservedInput":"E:VSO0:E0C5007DC8CF2D331236F156F136C50CACE2A5D549CD132D9B44ABD1F13D50CC00" + // [8] "Members":"[src_1, src_2]" + ReparentObservedInput(observedInputNode, pathNode); + } } /// @@ -390,6 +468,17 @@ namespace BuildXL.Scheduler.Tracing } } + private static IReadOnlyList GetDirectoryMembership(FingerprintStore store, string directoryFingerprint) + { + if(!store.TryGetContentHashValue(directoryFingerprint, out string storedValue)) + { + return null; + } + + var directoryMembershipTree = JsonTree.Deserialize(storedValue); + return directoryMembershipTree.Children.First.Value.Values; + } + /// /// Writes a message to a specific pip's file. /// diff --git a/Public/Src/Engine/Scheduler/Tracing/JsonFingerprintDiff.cs b/Public/Src/Engine/Scheduler/Tracing/JsonFingerprintDiff.cs new file mode 100644 index 000000000..566b6f98d --- /dev/null +++ b/Public/Src/Engine/Scheduler/Tracing/JsonFingerprintDiff.cs @@ -0,0 +1,683 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics.ContractsLight; +using System.Linq; +using BuildXL.Engine.Cache.Serialization; +using BuildXL.Pips.Operations; +using BuildXL.Scheduler.Fingerprints; +using BuildXL.Utilities; +using Newtonsoft.Json.Linq; +using static BuildXL.Scheduler.Tracing.FingerprintDiff; + +namespace BuildXL.Scheduler.Tracing +{ + /// + /// Class for diff-ing weak and strong fingerprints, and present the result as Json object. + /// + internal static class JsonFingerprintDiff + { + #region Diff-ing + + /// + /// Diffs weak fingerprints. + /// + /// Weak fingerprint. + /// Weak fingerprint tree. + /// Other weak fingerprint. + /// Other weak fingerprint tree. + /// + public static JObject DiffWeakFingerprints( + string weakFingerprint, + JsonNode weakFingerprintTree, + string otherWeakFingerprint, + JsonNode otherWeakFingerprintTree) + { + JObject result = new JObject(); + + if (weakFingerprint == otherWeakFingerprint) + { + return result; + } + + // { + // WeakFingerprint: { Old: old_weak_fingerprint, New: new_weak_fingerprint } + // } + AddPropertyIfNotNull(result, RenderSingleValueDiff("WeakFingerprint", weakFingerprint, otherWeakFingerprint)); + + using (var weakFingerprintDataPool = JsonNodeMapPool.GetInstance()) + using (var otherWeakFingerprintDataPool = JsonNodeMapPool.GetInstance()) + { + var weakFingerprintData = weakFingerprintDataPool.Instance; + var otherWeakFingerprintData = otherWeakFingerprintDataPool.Instance; + + JsonTree.VisitTree(weakFingerprintTree, wfNode => weakFingerprintData[wfNode.Name] = wfNode, recurse: false); + JsonTree.VisitTree(otherWeakFingerprintTree, wfNode => otherWeakFingerprintData[wfNode.Name] = wfNode, recurse: false); + + var fields = new HashSet(weakFingerprintData.Keys.Concat(otherWeakFingerprintData.Keys)); + + foreach (var field in fields) + { + bool getFieldNode = weakFingerprintData.TryGetValue(field, out JsonNode fieldNode); + bool getOtherFieldNode = otherWeakFingerprintData.TryGetValue(field, out JsonNode otherFieldNode); + + if (getFieldNode != getOtherFieldNode) + { + string fieldValue = getFieldNode + ? (fieldNode.Values != null && fieldNode.Values.Count == 1 + ? fieldNode.Values[0] + : CacheMissAnalysisUtilities.RepeatedStrings.ExistentValue) + : CacheMissAnalysisUtilities.RepeatedStrings.UnspecifiedValue; + string otherFieldValue = getOtherFieldNode + ? (otherFieldNode.Values != null && otherFieldNode.Values.Count == 1 + ? otherFieldNode.Values[0] + : CacheMissAnalysisUtilities.RepeatedStrings.ExistentValue) + : CacheMissAnalysisUtilities.RepeatedStrings.UnspecifiedValue; + + AddPropertyIfNotNull(result, RenderSingleValueDiff(field, fieldValue, otherFieldValue)); + } + else if (getFieldNode && getOtherFieldNode) + { + Contract.Assert(fieldNode != null); + Contract.Assert(otherFieldNode != null); + + AddPropertyIfNotNull(result, DiffWeakFingerprintField(fieldNode, otherFieldNode)); + } + } + } + + Contract.Assert(result.Count > 0); + + return result; + + } + + /// + /// Diffs strong fingerprints. + /// + /// Strong fingerprint. + /// Pathset tree. + /// Strong fingerprint input tree. + /// Other strong fingerprint. + /// Other pathset tree. + /// Other strong fingerprint input tree. + /// Delegate for getting directory membership. + /// Delegate for getting other directory membership. + /// + public static JObject DiffStrongFingerprints( + string strongFingerprint, + JsonNode pathSetTree, + JsonNode strongFingerprintInputTree, + string otherStrongFingerprint, + JsonNode otherPathSetTree, + JsonNode otherStrongFingerprintInputTree, + Func> getDirectoryMembership, + Func> getOtherDirectoryMembership) + { + JObject result = new JObject(); + + if (strongFingerprint == otherStrongFingerprint) + { + return result; + } + + // { + // StrongFingerprint: { Old: old_strong_fingerprint, New: new_strong_fingerprint } + // } + AddPropertyIfNotNull(result, RenderSingleValueDiff("StrongFingerprint", strongFingerprint, otherStrongFingerprint)); + + AddPropertyIfNotNull( + result, + DiffObservedPaths( + pathSetTree, + strongFingerprintInputTree, + otherPathSetTree, + otherStrongFingerprintInputTree, + getDirectoryMembership, + getOtherDirectoryMembership)); + + Contract.Assert(result.Count > 0); + + return result; + } + + /// + /// Diffs strong fingerprints. + /// + /// Pathset hash. + /// Pathset tree. + /// Strong fingerprint input tree. + /// Other pathset hash. + /// Other pathset tree. + /// Other strong fingerprint input tree. + /// Delegate for getting directory membership. + /// Delegate for getting other directory membership. + /// + public static JObject DiffPathSets( + string pathSetHash, + JsonNode pathSetTree, + JsonNode strongFingerprintInputTree, + string otherPathSetHash, + JsonNode otherPathSetTree, + JsonNode otherStrongFingerprintInputTree, + Func> getDirectoryMembership, + Func> getOtherDirectoryMembership) + { + JObject result = new JObject(); + + if (pathSetHash == otherPathSetHash) + { + return result; + } + + // { + // PathSetHash: { Old: old_path_set_hash, New: new_path_set_hash } + // } + AddPropertyIfNotNull(result, RenderSingleValueDiff("PathSetHash", pathSetHash, otherPathSetHash)); + + JsonNode unsafeOptionsNode = JsonTree.FindNodeByName(pathSetTree, ObservedPathSet.Labels.UnsafeOptions); + JsonNode otherUnsafeOptionsNode = JsonTree.FindNodeByName(otherPathSetTree, ObservedPathSet.Labels.UnsafeOptions); + + // This is less ideal because we can't see the difference. + // TODO: dump unsafe option data to the fingerprint store so that we can analyze the content. + // { + // UnsafeOptions: { Old: old_bits, New: new_bits: } + // } + AddPropertyIfNotNull(result, RenderSingleValueDiff(ObservedPathSet.Labels.UnsafeOptions, unsafeOptionsNode.Values[0], otherUnsafeOptionsNode.Values[0])); + + AddPropertyIfNotNull( + result, + DiffObservedPaths( + pathSetTree, + strongFingerprintInputTree, + otherPathSetTree, + otherStrongFingerprintInputTree, + getDirectoryMembership, + getOtherDirectoryMembership)); + + JsonNode obsFileNameNode = JsonTree.FindNodeByName(pathSetTree, ObservedPathSet.Labels.ObservedAccessedFileNames); + JsonNode otherObsFileNameNode = JsonTree.FindNodeByName(otherPathSetTree, ObservedPathSet.Labels.ObservedAccessedFileNames); + + bool hasDiff = ExtractUnorderedListDiff(obsFileNameNode.Values, otherObsFileNameNode.Values, out var addedFileNames, out var removedFileName); + + if (hasDiff) + { + result.Add(new JProperty( + ObservedPathSet.Labels.ObservedAccessedFileNames, + RenderUnorderedListDiff(addedFileNames, removedFileName, RenderPath))); + } + + Contract.Assert(result.Count > 0); + + return result; + } + + private static JProperty DiffWeakFingerprintField(JsonNode fieldNode, JsonNode otherFieldNode) + { + Contract.Requires(fieldNode != null); + Contract.Requires(otherFieldNode != null); + Contract.Requires(fieldNode.Name == otherFieldNode.Name); + + switch (fieldNode.Name) + { + case nameof(Process.Dependencies): + { + using (var inputFileDataPool = InputFileDataMapPool.GetInstance()) + using (var otherInputFileDataPool = InputFileDataMapPool.GetInstance()) + { + var inputFileData = inputFileDataPool.Instance; + var otherInputFileData = otherInputFileDataPool.Instance; + populateInputFileData(fieldNode, inputFileData); + populateInputFileData(otherFieldNode, otherInputFileData); + return ExtractUnorderedMapDiff( + inputFileData, + otherInputFileData, + (dOld, dNew) => dOld.Equals(dNew), + out var added, + out var removed, + out var changed) + ? new JProperty(fieldNode.Name, RenderUnorderedMapDiff( + inputFileData, + otherInputFileData, + added, + removed, + changed, + RenderPath, + (dataA, dataB) => dataA.HashOrContent)) + : null; + } + } + + case nameof(Process.FileOutputs): + { + using (var outputFileDataPool = OutputFileDataMapPool.GetInstance()) + using (var otherOutputFileDataPool = OutputFileDataMapPool.GetInstance()) + { + var outputFileData = outputFileDataPool.Instance; + var otherOutputFileData = otherOutputFileDataPool.Instance; + populateOutputFileData(fieldNode, outputFileData); + populateOutputFileData(otherFieldNode, otherOutputFileData); + return ExtractUnorderedMapDiff( + outputFileData, + otherOutputFileData, + (dOld, dNew) => dOld.Equals(dNew), + out var added, + out var removed, + out var changed) + ? new JProperty(fieldNode.Name, RenderUnorderedMapDiff( + outputFileData, + otherOutputFileData, + added, + removed, + changed, + RenderPath, + (dataA, dataB) => dataA.Attributes)) + : null; + } + } + + case nameof(Process.EnvironmentVariables): + { + using (var envVarDataPool = EnvironmentVariableDataMapPool.GetInstance()) + using (var otherEnvVarDataPool = EnvironmentVariableDataMapPool.GetInstance()) + { + var envVarData = envVarDataPool.Instance; + var otherEnvVarData = otherEnvVarDataPool.Instance; + populateEnvironmentVariableData(fieldNode, envVarData); + populateEnvironmentVariableData(otherFieldNode, otherEnvVarData); + return ExtractUnorderedMapDiff( + envVarData, + otherEnvVarData, + (dOld, dNew) => dOld.Equals(dNew), + out var added, + out var removed, + out var changed) + ? new JProperty(fieldNode.Name, RenderUnorderedMapDiff( + envVarData, + otherEnvVarData, + added, + removed, + changed, + k => k, + (dataA, dataB) => dataA.Value)) + : null; + } + } + + case nameof(Process.DirectoryDependencies): + case nameof(Process.DirectoryOutputs): + case nameof(Process.UntrackedPaths): + case nameof(Process.UntrackedScopes): + case nameof(Process.PreserveOutputWhitelist): + case nameof(Process.SuccessExitCodes): + case PipFingerprintField.Process.SourceChangeAffectedInputList: + case nameof(Process.ChildProcessesToBreakawayFromSandbox): + { + var data = fieldNode.Values; + var otherData = otherFieldNode.Values; + return ExtractUnorderedListDiff(data, otherData, out var added, out var removed) + ? new JProperty(fieldNode.Name, RenderUnorderedListDiff(added, removed, RenderPath)) + : null; + } + default: + return RenderSingleValueDiff(fieldNode.Name, getSingleValueNode(fieldNode), getSingleValueNode(otherFieldNode)); + + } + + string getSingleValueNode(JsonNode node) => + node.Values.Count > 0 + ? node.Values[0] + : CacheMissAnalysisUtilities.RepeatedStrings.MissingValue; + + void populateInputFileData(JsonNode dependencyNode, Dictionary inputFileData) + { + JsonTree.VisitTree( + dependencyNode, + node => + { + string value = CacheMissAnalysisUtilities.RepeatedStrings.MissingValue; + if (node.Values.Count > 0) + { + value = node.Values[0]; + } + else if (node.Children.First != null + && node.Children.First.Value.Name == PipFingerprintField.FileDependency.PathNormalizedWriteFileContent + && node.Children.First.Value.Values.Count > 0) + { + value = node.Children.First.Value.Values[0]; + } + + inputFileData[node.Name] = new InputFileData(node.Name, value); + }, + recurse: false); + } + + void populateOutputFileData(JsonNode outputNode, Dictionary outputFileData) + { + JsonTree.VisitTree( + outputNode, + node => + { + string value = CacheMissAnalysisUtilities.RepeatedStrings.MissingValue; + if (node.Children.First != null + && node.Children.First.Value.Name == PipFingerprintField.FileOutput.Attributes + && node.Children.First.Value.Values.Count > 0) + { + value = node.Children.First.Value.Values[0]; + } + + outputFileData[node.Name] = new OutputFileData(node.Name, value); + }, + recurse: false); + } + + void populateEnvironmentVariableData(JsonNode environmentVariableNode, Dictionary environmentVariableData) + { + JsonTree.VisitTree( + environmentVariableNode, + node => + { + environmentVariableData[node.Name] = new EnvironmentVariableData( + node.Name, + node.Values.Count > 0 ? node.Values[0] : CacheMissAnalysisUtilities.RepeatedStrings.MissingValue); + }, + recurse: false); + } + } + + private static JProperty DiffObservedPaths( + JsonNode pathSetTree, + JsonNode strongFingerprintInputTree, + JsonNode otherPathSetTree, + JsonNode otherStrongFingerprintInputTree, + Func> getDirectoryMembership, + Func> getOtherDirectoryMembership) + { + JsonNode pathsNode = JsonTree.FindNodeByName(pathSetTree, ObservedPathSet.Labels.Paths); + JsonNode otherPathsNode = JsonTree.FindNodeByName(otherPathSetTree, ObservedPathSet.Labels.Paths); + + JsonNode observedInputsTree = JsonTree.FindNodeByName(strongFingerprintInputTree, ObservedInputConstants.ObservedInputs); + JsonNode otherObservedInputsTree = JsonTree.FindNodeByName(otherStrongFingerprintInputTree, ObservedInputConstants.ObservedInputs); + + using (var pathSetDataPool = ObservedInputDataMapPool.GetInstance()) + using (var otherPathSetDataPool = ObservedInputDataMapPool.GetInstance()) + { + var pathSetData = pathSetDataPool.Instance; + var otherPathSetData = otherPathSetDataPool.Instance; + traversePathSetPaths(pathsNode, observedInputsTree, pathSetData); + traversePathSetPaths(otherPathsNode, otherObservedInputsTree, otherPathSetData); + + bool hasDiff = ExtractUnorderedMapDiff( + pathSetData, + otherPathSetData, + (data, otherData) => data.Equals(otherData), + out var added, + out var removed, + out var changed); + + if (hasDiff) + { + // { + // Paths: { + // Added : [..paths..], + // Removed: [..paths..], + // Changed: { + // path: { Old: ..., New: ... } + // }: + // } + // } + return new JProperty( + ObservedPathSet.Labels.Paths, + RenderUnorderedMapDiff( + pathSetData, + otherPathSetData, + added, + removed, + changed, + RenderPath, + (dataA, dataB) => dataA.DescribeDiffWithoutPath(dataB), + c => diffDirectoryIfApplicable(pathSetData, otherPathSetData, c))); + } + + return null; + } + + JProperty diffDirectoryIfApplicable(Dictionary obsInputData, Dictionary otherObsInputData, string possiblyChangeDirectory) + { + // { + // Members: { + // Added : [..file..], + // Removed : [..file..] + // } + // } + const string MembersLabel = "Members"; + + var change = obsInputData[possiblyChangeDirectory]; + var otherChange = otherObsInputData[possiblyChangeDirectory]; + if (change.AccessType == ObservedInputConstants.DirectoryEnumeration + && otherChange.AccessType == ObservedInputConstants.DirectoryEnumeration + && change.Pattern == otherChange.Pattern) + { + var members = getDirectoryMembership(change.Hash); + + if (members == null) + { + return new JProperty(MembersLabel, $"{CacheMissAnalysisUtilities.RepeatedStrings.MissingDirectoryMembershipFingerprint} ({nameof(ObservedInputData.Hash)}: {change.Hash})"); + } + + var otherMembers = getOtherDirectoryMembership(otherChange.Hash); + + if (otherMembers == null) + { + return new JProperty(MembersLabel, $"{CacheMissAnalysisUtilities.RepeatedStrings.MissingDirectoryMembershipFingerprint} ({nameof(ObservedInputData.Hash)}: {otherChange.Hash})"); + } + + bool hasDiff = ExtractUnorderedListDiff(members, otherMembers, out var addedMembers, out var removedMembers); + + if (hasDiff) + { + return new JProperty(MembersLabel, RenderUnorderedListDiff(addedMembers, removedMembers, RenderPath)); + } + } + + return null; + } + + void traversePathSetPaths( + JsonNode pathSetTree, + JsonNode strongFingerprintInputTree, + Dictionary populatedData) + { + TraversePathSetPaths(pathSetTree, strongFingerprintInputTree, data => populatedData[data.Path] = data); + } + } + + #endregion Diff-ing + + #region Rendering + + private static JObject RenderUnorderedMapDiff( + IReadOnlyDictionary oldData, + IReadOnlyDictionary newData, + IReadOnlyList added, + IReadOnlyList removed, + IReadOnlyList changed, + Func renderKey, + Func describeValueDiff, + Func extraDiffChange = null) + { + JObject result = RenderUnorderedListDiff(added, removed, renderKey); + + JProperty changedProperty = null; + + if (changed != null && changed.Count > 0) + { + changedProperty = new JProperty( + "Changed", + new JObject(changed.Select(c => RenderSingleValueDiff( + renderKey(c), + describeValueDiff(oldData[c], newData[c]), + describeValueDiff(newData[c], oldData[c]), + extraDiffChange)).ToArray())); + } + + if (result == null && changedProperty == null) + { + return null; + } + + if (result == null) + { + result = new JObject(); + } + + if (changedProperty != null) + { + result.Add(changedProperty); + } + + return result; + } + + private static JObject RenderUnorderedListDiff( + IReadOnlyList added, + IReadOnlyList removed, + Func renderItem) + { + JProperty addedProperty = added != null && added.Count > 0 ? new JProperty("Added", new JArray(added.Select(a => renderItem(a)).ToArray())) : null; + JProperty removedProperty = removed != null && removed.Count > 0 ? new JProperty("Removed", new JArray(removed.Select(a => renderItem(a)).ToArray())) : null; + + if (addedProperty == null && removedProperty == null) + { + return null; + } + + JObject result = new JObject(); + + addToResult(addedProperty); + addToResult(removedProperty); + + return result; + + void addToResult(JProperty p) + { + if (p != null) + { + result.Add(p); + } + } + } + + private static JProperty RenderSingleValueDiff(string key, string oldValue, string newValue, Func extraDiff = null) + { + if (oldValue == newValue) + { + return null; + } + + var diff = new [] + { + new JProperty("Old", oldValue), + new JProperty("New", newValue) + }; + + var diffObject = new JObject(diff); + + if (extraDiff != null) + { + JProperty extra = extraDiff(key); + if (extra != null) + { + diffObject.Add(extra); + } + } + return new JProperty(key, diffObject); + } + + private static string RenderPath(string path) => path; + + #endregion Rendering + + #region Traversal + + private static void TraversePathSetPaths( + JsonNode pathSetPathsNode, + JsonNode observedInputs, + Action action) + { + string path = null; + string flags = null; + string pattern = null; + + string hashMarker = null; + string hash = null; + + var obIt = observedInputs?.Children.First; + + for (var it = pathSetPathsNode.Children.First; it != null; it = it.Next) + { + var elem = it.Value; + switch (elem.Name) + { + case ObservedPathEntryConstants.Path: + if (path != null) + { + action(new ObservedInputData(path, flags, pattern, hashMarker, hash)); + path = null; + flags = null; + pattern = null; + hashMarker = null; + hash = null; + } + + path = elem.Values[0]; + + if (obIt != null) + { + hashMarker = obIt.Value.Name; + hash = obIt.Value.Values[0]; + obIt = obIt.Next; + } + + break; + case ObservedPathEntryConstants.Flags: + Contract.Assert(path != null); + flags = elem.Values[0]; + break; + case ObservedPathEntryConstants.EnumeratePatternRegex: + Contract.Assert(path != null); + pattern = elem.Values[0]; + break; + default: + break; + } + } + + if (path != null) + { + action(new ObservedInputData(path, flags, pattern, hashMarker, hash)); + } + } + + #endregion Traversal + + #region Utilities + + private static void AddPropertyIfNotNull(JObject o, JProperty p) + { + if (p != null) + { + o.Add(p); + } + } + + private static ObjectPool> JsonNodeMapPool { get; } = + new ObjectPool>( + () => new Dictionary(), + map => { map.Clear(); return map; }); + + #endregion Utilities + } +} diff --git a/Public/Src/Engine/Scheduler/Tracing/ProcessStrongFingerprintComputationData.cs b/Public/Src/Engine/Scheduler/Tracing/ProcessStrongFingerprintComputationData.cs index 55fbfa769..cee4926b8 100644 --- a/Public/Src/Engine/Scheduler/Tracing/ProcessStrongFingerprintComputationData.cs +++ b/Public/Src/Engine/Scheduler/Tracing/ProcessStrongFingerprintComputationData.cs @@ -278,12 +278,12 @@ namespace BuildXL.Scheduler.Tracing logStats: false)) { UnsafeOptions.Serialize(buildXLWriter); - writer.Add("SerializedUnsafeOptions", System.BitConverter.ToString(stream.ToArray())); + writer.Add(ObservedPathSet.Labels.UnsafeOptions, System.BitConverter.ToString(stream.ToArray())); } } var thisRef = this; - writer.AddNested(ObservedPathEntryConstants.PathSet, w => + writer.AddNested(ObservedPathSet.Labels.Paths, w => { foreach (var p in thisRef.PathEntries) { @@ -300,7 +300,7 @@ namespace BuildXL.Scheduler.Tracing }); writer.AddCollection>( - "ObservedAccessedFileNames", + ObservedPathSet.Labels.ObservedAccessedFileNames, ObservedAccessedFileNames, (w, v) => w.Add(v)); diff --git a/Public/Src/Engine/Scheduler/Tracing/RuntimeCacheMissAnalyzer.cs b/Public/Src/Engine/Scheduler/Tracing/RuntimeCacheMissAnalyzer.cs index 887e542e5..248e8e982 100644 --- a/Public/Src/Engine/Scheduler/Tracing/RuntimeCacheMissAnalyzer.cs +++ b/Public/Src/Engine/Scheduler/Tracing/RuntimeCacheMissAnalyzer.cs @@ -96,7 +96,14 @@ namespace BuildXL.Scheduler.Tracing if (possibleStore.Succeeded) { Logger.Log.SuccessLoadFingerprintStoreToCompare(loggingContext, option.Mode.ToString(), possibleStore.Result.StoreDirectory); - return new RuntimeCacheMissAnalyzer(logTarget, loggingContext, context, possibleStore.Result, graph, runnablePipPerformance); + return new RuntimeCacheMissAnalyzer( + logTarget, + loggingContext, + context, + possibleStore.Result, + graph, + runnablePipPerformance, + configuration.Logging.CacheMissDiffFormat); } Logger.Log.GettingFingerprintStoreTrace(loggingContext, I($"Failed to read the fingerprint store to compare. Mode: {option.Mode.ToString()} Failure: {possibleStore.Failure.DescribeIncludingInnerFailures()}")); @@ -126,13 +133,16 @@ namespace BuildXL.Scheduler.Tracing /// public FingerprintStore PreviousFingerprintStore { get; } + private readonly CacheMissDiffFormat m_cacheMissDiffFormat; + private RuntimeCacheMissAnalyzer( FingerprintStoreExecutionLogTarget logTarget, LoggingContext loggingContext, PipExecutionContext context, FingerprintStore previousFingerprintStore, IReadonlyDirectedGraph graph, - IDictionary runnablePipPerformance) + IDictionary runnablePipPerformance, + CacheMissDiffFormat cacheMissDiffFormat) { m_loggingContext = loggingContext; m_logTarget = logTarget; @@ -142,6 +152,7 @@ namespace BuildXL.Scheduler.Tracing m_changedPips = new VisitationTracker(graph); m_pipCacheMissesDict = new ConcurrentDictionary(); m_runnablePipPerformance = runnablePipPerformance; + m_cacheMissDiffFormat = cacheMissDiffFormat; } internal void AddCacheMiss(PipCacheMissInfo cacheMissInfo) @@ -199,7 +210,8 @@ namespace BuildXL.Scheduler.Tracing writer, missInfo, () => new FingerprintStoreReader.PipRecordingSession(PreviousFingerprintStore, oldEntry), - () => new FingerprintStoreReader.PipRecordingSession(m_logTarget.ExecutionFingerprintStore, newEntry)); + () => new FingerprintStoreReader.PipRecordingSession(m_logTarget.ExecutionFingerprintStore, newEntry), + m_cacheMissDiffFormat); // The diff sometimes contains several empty new lines at the end. var reason = writer.ToString().TrimEnd(Environment.NewLine.ToCharArray()); diff --git a/Public/Src/Engine/UnitTests/FingerprintStore/FingerprintDiffTests.cs b/Public/Src/Engine/UnitTests/FingerprintStore/FingerprintDiffTests.cs new file mode 100644 index 000000000..f333e8fdd --- /dev/null +++ b/Public/Src/Engine/UnitTests/FingerprintStore/FingerprintDiffTests.cs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.Collections.Generic; +using System.Linq; +using Test.BuildXL.TestUtilities.Xunit; +using Xunit; +using Xunit.Abstractions; +using static BuildXL.Scheduler.Tracing.FingerprintDiff; + +namespace Test.BuildXL.FingerprintStore +{ + public class FingerprintDiffTests : XunitBuildXLTest + { + public FingerprintDiffTests(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public void ExtractObservedInputsDiff() + { + var commonData = new[] + { + new ObservedInputData(A("X", "Y", "f"), "flagF", "*.cs", "F", "XYf123"), + new ObservedInputData(A("X", "Y", "g"), "flagG", "*.cc", "G", "XYg123"), + }; + + var oldData = new Dictionary(commonData.ToDictionary(k => k.Path)) + { + [A("X", "Y", "a")] = new ObservedInputData(A("X", "Y", "a"), "flagA", "*.cs", "A", "XYa123"), + [A("X", "Y", "b")] = new ObservedInputData(A("X", "Y", "b"), "flagB", "*.cb", "B", "XYb123"), + [A("X", "Y", "c")] = new ObservedInputData(A("X", "Y", "c"), "flagC", "*.cc", "C", "XYc123"), + [A("X", "Y", "d")] = new ObservedInputData(A("X", "Y", "d"), "flagD", "*.cd", "D", "XYd123"), + }; + + var newData = new Dictionary(commonData.ToDictionary(k => k.Path)) + { + [A("X", "Y", "p")] = new ObservedInputData(A("X", "Y", "p"), "flagP", "*.cp", "P", "XYp123"), + [A("X", "Y", "q")] = new ObservedInputData(A("X", "Y", "q"), "flagQ", "*.cq", "Q", "XYq123"), + [A("X", "Y", "c")] = new ObservedInputData(A("X", "Y", "c"), "flagC1", "*.cc", "C", "XYc123"), + [A("X", "Y", "d")] = new ObservedInputData(A("X", "Y", "d"), "flagD", "*.cd", "D", "XYd124"), + }; + + bool hasDiff = ExtractUnorderedMapDiff( + oldData, + newData, + (o, n) => o.Equals(n), + out var added, + out var removed, + out var changed); + + XAssert.IsTrue(hasDiff); + XAssert.AreEqual(2, added.Count); + XAssert.AreEqual(2, removed.Count); + XAssert.AreEqual(2, changed.Count); + + XAssert.Contains(added, A("X", "Y", "p"), A("X", "Y", "q")); + XAssert.Contains(removed, A("X", "Y", "a"), A("X", "Y", "b")); + XAssert.Contains(changed, A("X", "Y", "c"), A("X", "Y", "d")); + } + + [Fact] + public void ExtractFilesDiff() + { + var oldData = new[] { A("X", "Y", "a"), A("X", "Y", "b"), A("X", "Y", "c"), A("X", "Y", "d") }; + var newData = new[] { A("X", "Y", "a"), A("X", "Y", "b"), A("X", "Y", "f"), A("X", "Y", "g") }; + + bool hasDiff = ExtractUnorderedListDiff(oldData, newData, out var added, out var removed); + + XAssert.IsTrue(hasDiff); + XAssert.AreEqual(2, added.Count); + XAssert.AreEqual(2, removed.Count); + + XAssert.Contains(added, A("X", "Y", "f"), A("X", "Y", "g")); + XAssert.Contains(removed, A("X", "Y", "c"), A("X", "Y", "d")); + } + } +} diff --git a/Public/Src/Pips/Dll/Operations/Process.cs b/Public/Src/Pips/Dll/Operations/Process.cs index 7e06da430..bc8145521 100644 --- a/Public/Src/Pips/Dll/Operations/Process.cs +++ b/Public/Src/Pips/Dll/Operations/Process.cs @@ -5,6 +5,7 @@ using System; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.ContractsLight; using System.Linq; +using System.Text.RegularExpressions; using BuildXL.Native.Processes; using BuildXL.Utilities; using BuildXL.Utilities.Collections; @@ -252,12 +253,24 @@ namespace BuildXL.Pips.Operations [PipCaching(FingerprintingRole = FingerprintingRole.Semantic)] public RegexDescriptor WarningRegex { get; } + /// + public StringId WarningRegexPattern => WarningRegex.Pattern; + + /// + public RegexOptions WarningRegexOptions => WarningRegex.Options; + /// /// Optional regular expression to detect errors in console error / output. /// [PipCaching(FingerprintingRole = FingerprintingRole.Semantic)] public RegexDescriptor ErrorRegex { get; } + /// + public StringId ErrorRegexPattern => ErrorRegex.Pattern; + + /// + public RegexOptions ErrorRegexOptions => ErrorRegex.Options; + /// /// When false (or not set): process output is scanned for error messages line by line; /// 'errorRegex' is applied to each line and if ANY match is found the ENTIRE line is reported. diff --git a/Public/Src/Tools/Execution.Analyzer/Analyzers.Core/CacheMiss/FingerprintStoreAnalyzer.cs b/Public/Src/Tools/Execution.Analyzer/Analyzers.Core/CacheMiss/FingerprintStoreAnalyzer.cs index 4a50f14d6..fb4de6417 100644 --- a/Public/Src/Tools/Execution.Analyzer/Analyzers.Core/CacheMiss/FingerprintStoreAnalyzer.cs +++ b/Public/Src/Tools/Execution.Analyzer/Analyzers.Core/CacheMiss/FingerprintStoreAnalyzer.cs @@ -24,6 +24,8 @@ namespace BuildXL.Execution.Analyzer bool allPips = false; bool noBanner = false; long sshValue = -1; + CacheMissDiffFormat cacheMissDiffFormat = CacheMissDiffFormat.CustomJsonDiff; + foreach (var opt in AnalyzerOptions) { if (opt.Name.Equals("outputDirectory", StringComparison.OrdinalIgnoreCase) || @@ -44,6 +46,10 @@ namespace BuildXL.Execution.Analyzer { noBanner = ParseBooleanOption(opt); } + else if (opt.Name.Equals("cacheMissDiffFormat", StringComparison.OrdinalIgnoreCase)) + { + cacheMissDiffFormat = ParseEnumOption(opt); + } else { throw Error("Unknown option for cache miss analysis: {0}", opt.Name); @@ -76,6 +82,7 @@ namespace BuildXL.Execution.Analyzer writer.WriteOption("outputDirectory", "Required. The directory where to write the results", shortName: "o"); writer.WriteOption("allPips", "Optional. Defaults to false."); writer.WriteOption("pipId", "Optional. Run for specific pip.", shortName: "p"); + writer.WriteOption("diffFormat", "Optional. Diff format. Allowed values are JsonDiff and JsonPatchDiff. Defaults to CustomJsonDiff"); } } @@ -106,6 +113,11 @@ namespace BuildXL.Execution.Analyzer /// public long SemiStableHashToRun; + /// + /// Diff format. + /// + public CacheMissDiffFormat CacheMissDiffFormat = CacheMissDiffFormat.CustomJsonDiff; + /// /// Analysis model based on the new build. /// @@ -320,7 +332,8 @@ namespace BuildXL.Execution.Analyzer writer, miss, () => m_oldReader.StartPipRecordingSession(pip, pipUniqueOutputHashStr), - () => m_newCacheLookupReader.StartPipRecordingSession(pip, pipUniqueOutputHashStr)); + () => m_newCacheLookupReader.StartPipRecordingSession(pip, pipUniqueOutputHashStr), + CacheMissDiffFormat); } else { @@ -328,7 +341,8 @@ namespace BuildXL.Execution.Analyzer m_writer, miss, () => m_oldReader.StartPipRecordingSession(pip, pipUniqueOutputHash.ToString()), - () => m_newReader.StartPipRecordingSession(pip, pipUniqueOutputHash.ToString())); + () => m_newReader.StartPipRecordingSession(pip, pipUniqueOutputHash.ToString()), + CacheMissDiffFormat); } if (analysisResult == CacheMissAnalysisResult.MissingFromOldBuild) diff --git a/Public/Src/Tools/UnitTests/Analyzers/FingerprintStoreAnalyzerTests.cs b/Public/Src/Tools/UnitTests/Analyzers/FingerprintStoreAnalyzerTests.cs index 47ed569a7..3fd4dacc7 100644 --- a/Public/Src/Tools/UnitTests/Analyzers/FingerprintStoreAnalyzerTests.cs +++ b/Public/Src/Tools/UnitTests/Analyzers/FingerprintStoreAnalyzerTests.cs @@ -138,7 +138,7 @@ namespace Test.Tool.Analyzers ScheduleRunResult buildB = RunScheduler().AssertCacheMiss(pip.PipId); messages = new string[] { ArtifactToPrint(dir), ObservedInputType.AbsentPathProbe.ToString(), ObservedInputType.DirectoryEnumeration.ToString() }; - + RunAnalyzer(buildA, buildB).AssertPipMiss(pip, PipCacheMissType.MissForDescriptorsDueToStrongFingerprints, messages); // Strong fingerprint miss: Added new files to enumerated directory @@ -147,13 +147,8 @@ namespace Test.Tool.Analyzers ScheduleRunResult buildC = RunScheduler().AssertCacheMiss(pip.PipId); messages = new string[] { ArtifactToPrint(dir), Path.GetFileName(ArtifactToPrint(addedFile)), Path.GetFileName(ArtifactToPrint(victimFile)) }; - - RunAnalyzer(buildB, buildC).AssertPipMiss( - pip, - PipCacheMissType.MissForDescriptorsDueToStrongFingerprints, - ArtifactToPrint(dir), - Path.GetFileName(ArtifactToPrint(addedFile)), - Path.GetFileName(ArtifactToPrint(victimFile))); + + RunAnalyzer(buildB, buildC).AssertPipMiss(pip, PipCacheMissType.MissForDescriptorsDueToStrongFingerprints, messages); // Strong fingerprint miss: Deleted file in enumerated directory File.Delete(ArtifactToPrint(victimFile)); @@ -360,7 +355,7 @@ namespace Test.Tool.Analyzers // Reset the graph and re-schedule the same pip but with an added command line arg ResetPipGraphBuilder(); - var mismatchingPipBuilder = this.CreatePipBuilder(new Operation[] + var mismatchingPipBuilder = CreatePipBuilder(new Operation[] { outOp }); @@ -420,14 +415,12 @@ namespace Test.Tool.Analyzers cacheLookupStore.RemoveContentHashForTesting(directoryMembershipFingerprint); } - var result = RunAnalyzer(buildA, buildB).AssertPipMiss( + RunAnalyzer(buildA, buildB).AssertPipMiss( pip, PipCacheMissType.MissForDescriptorsDueToStrongFingerprints, ArtifactToPrint(dir), ObservedInputType.AbsentPathProbe.ToString(), ObservedInputType.DirectoryEnumeration.ToString()); - - result.AssertAnalyzerOutput(CacheMissAnalysisUtilities.RepeatedStrings.MissingDirectoryMembershipFingerprint); } /// diff --git a/Public/Src/Utilities/Configuration/CacheMissAnalysisOption.cs b/Public/Src/Utilities/Configuration/CacheMissAnalysisOption.cs index c4fc6ee85..2ee75005a 100644 --- a/Public/Src/Utilities/Configuration/CacheMissAnalysisOption.cs +++ b/Public/Src/Utilities/Configuration/CacheMissAnalysisOption.cs @@ -88,4 +88,28 @@ namespace BuildXL.Utilities.Configuration /// CustomPath } + + /// + /// Cache miss diff format. + /// + public enum CacheMissDiffFormat + { + /// + /// Custom (i.e., non-standard) Json diff format. + /// + CustomJsonDiff, + + /// + /// Json patch diff format. + /// + /// + /// This format will soon be deprecated because + /// - the format is not easy to understand and looks cryptic, and + /// - it relies on a buggy thrid-party package. + /// However, some customers have already play around with this format. Thus, + /// to avoid breaking customers hard, this format is preserved, but needs to be selected + /// as the default will be . + /// + JsonPatchDiff, + } } diff --git a/Public/Src/Utilities/Configuration/ILoggingConfiguration.cs b/Public/Src/Utilities/Configuration/ILoggingConfiguration.cs index d5069afd1..7a9382fcb 100644 --- a/Public/Src/Utilities/Configuration/ILoggingConfiguration.cs +++ b/Public/Src/Utilities/Configuration/ILoggingConfiguration.cs @@ -309,6 +309,11 @@ namespace BuildXL.Utilities.Configuration /// CacheMissAnalysisOption CacheMissAnalysisOption { get; } + /// + /// Diff format for cache miss analysis. + /// + CacheMissDiffFormat CacheMissDiffFormat { get; } + /// /// Whether console output should be optimized for Azure DevOps output. /// diff --git a/Public/Src/Utilities/Configuration/Mutable/LoggingConfiguration.cs b/Public/Src/Utilities/Configuration/Mutable/LoggingConfiguration.cs index 090841a66..134bc3e52 100644 --- a/Public/Src/Utilities/Configuration/Mutable/LoggingConfiguration.cs +++ b/Public/Src/Utilities/Configuration/Mutable/LoggingConfiguration.cs @@ -43,6 +43,7 @@ namespace BuildXL.Utilities.Configuration.Mutable FailPipOnFileAccessError = true; UseCustomPipDescriptionOnConsole = true; CacheMissAnalysisOption = CacheMissAnalysisOption.Disabled(); + CacheMissDiffFormat = CacheMissDiffFormat.CustomJsonDiff; RedirectedLogsDirectory = AbsolutePath.Invalid; } @@ -124,6 +125,7 @@ namespace BuildXL.Utilities.Configuration.Mutable template.CacheMissAnalysisOption.Mode, new List(template.CacheMissAnalysisOption.Keys), pathRemapper.Remap(template.CacheMissAnalysisOption.CustomPath)); + CacheMissDiffFormat = template.CacheMissDiffFormat; OptimizeConsoleOutputForAzureDevOps = template.OptimizeConsoleOutputForAzureDevOps; InvocationExpandedCommandLineArguments = template.InvocationExpandedCommandLineArguments; OptimizeProgressUpdatingForAzureDevOps = template.OptimizeProgressUpdatingForAzureDevOps; @@ -314,6 +316,9 @@ namespace BuildXL.Utilities.Configuration.Mutable /// public CacheMissAnalysisOption CacheMissAnalysisOption { get; set; } + /// + public CacheMissDiffFormat CacheMissDiffFormat { get; set; } + /// public bool OptimizeConsoleOutputForAzureDevOps { get; set; }