diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs index 1b631e6..a93a361 100644 --- a/AppInspector.RulesEngine/TextContainer.cs +++ b/AppInspector.RulesEngine/TextContainer.cs @@ -172,11 +172,13 @@ public class TextContainer } /// - /// If this file is a JSON, XML or YML file, returns the string contents of the specified path. + /// If this file is XML, attempts to return the the string contents of the specified XPath applied to the file. /// If the path does not exist, or the file is not JSON, XML or YML returns null. + /// Method contains some heuristic behavior and may not cover all cases. + /// Please report any issues with a sample XML and XPATH to reproduce. /// - /// - /// + /// XPath to query document with + /// Enumeration of string and Boundary tuples for the XPath matches. Boundary locations refer to the locations in the original document on disk. internal IEnumerable<(string, Boundary)> GetStringFromXPath(string Path, Dictionary xpathNameSpaces) { lock (_xpathLock) @@ -221,26 +223,56 @@ public class TextContainer continue; } - // First we find the name + // We have to heuristically calculate the original indexes of the locations in the original document because the internal representation differs + // For example it will convert to + + // First we find the name, absolute position index var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex; - // Then we grab the index of the end of this tag. - // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text. - var endTagIndex = FullContent[nameIndex..].IndexOf('>'); - // We also look for self-closing tag - var selfClosedTag = FullContent[endTagIndex-1] == '/'; - // If the tag is self closing innerxml will be empty string, so the finding is located at the end of the tag and is empty string - // Otherwise the finding is the content of the xml tag - var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex; - // Move the minimum index up in case there are multiple instances of identical OuterXML - // This ensures we won't re-find the same one - var totalOffset = minIndex + nameIndex + endTagIndex; - minIndex = totalOffset; - var location = new Boundary + // Then we calculate the absolute index of the end of the tag. + // We can't use OuterXML property because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text. + var endTagIndex = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal) + nameIndex; + // If we are matching a tag itself, the previous char should be the open tag + // | + // v + // + // If its a property it won't be + // | + // v + // + var isProp = FullContent[(nameIndex - 1)] != '<'; + // Check for self-closing tag + var selfClosedTag = FullContent[endTagIndex - 1] == '/'; + + // This is for when we're capturing the value of a property of the tag rather than the tag itself + if (isProp) { - Index = offset, - Length = nodeIter.Current.InnerXml.Length - }; - yield return (nodeIter.Current.Value, location); + // Find the index of character after the next end tag index after the name + var nextClosingIndexAfterName = endTagIndex+1; + // If we have a self closing tag, we can use that index, otherwise we need the closure of this tag + var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1; + // Move the minimum index up to the end of the closing tag to avoid additioanl matches of the same values + minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1; + var location = new Boundary + { + // +2 for the \" before the value for the property + Index = nameIndex + nodeIter.Current.Name.Length + 2, + Length = nodeIter.Current.InnerXml.Length + }; + yield return (nodeIter.Current.Value, location); + } + else + { + // Move the offset to the end of the opening tag + var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex; + // Move the minimum index up to the end of the closing tag + minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1; + var location = new Boundary + { + Index = offset, + Length = nodeIter.Current.InnerXml.Length + }; + yield return (nodeIter.Current.Value, location); + } } } diff --git a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs index d942cf0..29725d9 100644 --- a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs +++ b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs @@ -83,6 +83,82 @@ public class XmlAndJsonTests } ]"; + private const string xmlStringRuleForPropWithData = @"[ + { + ""id"": ""SA000005"", + ""name"": ""Testing.Rules.XML"", + ""tags"": [ + ""Testing.Rules.XML"" + ], + ""severity"": ""Critical"", + ""description"": ""This rule checks the value of the property property to be true"", + ""patterns"": [ + { + ""pattern"": ""true"", + ""type"": ""string"", + ""confidence"": ""High"", + ""scopes"": [ + ""code"" + ], + ""xpaths"" : [""/bookstore/book/title/@*[name()='property']""] + } + ], + ""_comment"": """" + } +]"; + + private const string xmlStringRuleForPropWithDataForData = @"[ + { + ""id"": ""SA000005"", + ""name"": ""Testing.Rules.XML"", + ""tags"": [ + ""Testing.Rules.XML"" + ], + ""severity"": ""Critical"", + ""description"": ""This rule checks the value of the title tag when it has a property"", + ""patterns"": [ + { + ""pattern"": ""Franklin"", + ""type"": ""regex"", + ""confidence"": ""High"", + ""scopes"": [ + ""code"" + ], + ""xpaths"" : [""/bookstore/book/title""] + } + ], + ""_comment"": """" + } +]"; + + private const string xmlDataPropsWithTagValue = + @" + + + The Autobiography of Benjamin Franklin + + Benjamin + Franklin + + 8.99 + + + The Confidence Man + + Herman + Melville + + 11.99 + + + The Gorgias + + Plato + + 9.99 + + "; + private const string jsonData = @"{ ""books"": @@ -228,14 +304,14 @@ public class XmlAndJsonTests { ""xpaths"": [""system.web/trace/@enabled""], ""pattern"": ""true"", - ""type"": ""regex"" + ""type"": ""string"" } ], ""must-match"": [ - ""\n\n"" + ""\n\n"" ], ""must-not-match"": [ - ""\n\n"" + ""\n\n"" ] }]"; RuleSet rules = new(); @@ -268,6 +344,28 @@ public class XmlAndJsonTests Assert.Fail(); } } + [DataRow(xmlStringRuleForPropWithDataForData, "Franklin", 212)] + [DataRow(xmlStringRuleForPropWithData, "true", 176)] + [DataTestMethod] + public void XmlTagWithPropsAndValue(string rule, string expectedValue, int expectedIndex) + { + RuleSet rules = new(); + rules.AddString(rule, "XmlTestRules"); + Microsoft.ApplicationInspector.RulesEngine.RuleProcessor processor = new(rules, + new RuleProcessorOptions { AllowAllTagsInBuildFiles = true }); + if (_languages.FromFileNameOut("test.xml", out var info)) + { + var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info); + Assert.AreEqual(1, matches.Count); + var match = matches[0]; + Assert.AreEqual(expectedValue, match.Sample); + Assert.AreEqual(expectedIndex, match.Boundary.Index); + } + else + { + Assert.Fail(); + } + } [DataRow(xmlStringRule)] [DataRow(jsonAndXmlStringRule)]