diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs
index 1b631e6..a93a361 100644
--- a/AppInspector.RulesEngine/TextContainer.cs
+++ b/AppInspector.RulesEngine/TextContainer.cs
@@ -172,11 +172,13 @@ public class TextContainer
}
///
- /// If this file is a JSON, XML or YML file, returns the string contents of the specified path.
+ /// If this file is XML, attempts to return the the string contents of the specified XPath applied to the file.
/// If the path does not exist, or the file is not JSON, XML or YML returns null.
+ /// Method contains some heuristic behavior and may not cover all cases.
+ /// Please report any issues with a sample XML and XPATH to reproduce.
///
- ///
- ///
+ /// XPath to query document with
+ /// Enumeration of string and Boundary tuples for the XPath matches. Boundary locations refer to the locations in the original document on disk.
internal IEnumerable<(string, Boundary)> GetStringFromXPath(string Path, Dictionary xpathNameSpaces)
{
lock (_xpathLock)
@@ -221,26 +223,56 @@ public class TextContainer
continue;
}
- // First we find the name
+ // We have to heuristically calculate the original indexes of the locations in the original document because the internal representation differs
+ // For example it will convert to
+
+ // First we find the name, absolute position index
var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name, StringComparison.Ordinal) + minIndex;
- // Then we grab the index of the end of this tag.
- // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
- var endTagIndex = FullContent[nameIndex..].IndexOf('>');
- // We also look for self-closing tag
- var selfClosedTag = FullContent[endTagIndex-1] == '/';
- // If the tag is self closing innerxml will be empty string, so the finding is located at the end of the tag and is empty string
- // Otherwise the finding is the content of the xml tag
- var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
- // Move the minimum index up in case there are multiple instances of identical OuterXML
- // This ensures we won't re-find the same one
- var totalOffset = minIndex + nameIndex + endTagIndex;
- minIndex = totalOffset;
- var location = new Boundary
+ // Then we calculate the absolute index of the end of the tag.
+ // We can't use OuterXML property because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
+ var endTagIndex = FullContent[nameIndex..].IndexOf('>', StringComparison.Ordinal) + nameIndex;
+ // If we are matching a tag itself, the previous char should be the open tag
+ // |
+ // v
+ //
+ // If its a property it won't be
+ // |
+ // v
+ //
+ var isProp = FullContent[(nameIndex - 1)] != '<';
+ // Check for self-closing tag
+ var selfClosedTag = FullContent[endTagIndex - 1] == '/';
+
+ // This is for when we're capturing the value of a property of the tag rather than the tag itself
+ if (isProp)
{
- Index = offset,
- Length = nodeIter.Current.InnerXml.Length
- };
- yield return (nodeIter.Current.Value, location);
+ // Find the index of character after the next end tag index after the name
+ var nextClosingIndexAfterName = endTagIndex+1;
+ // If we have a self closing tag, we can use that index, otherwise we need the closure of this tag
+ var offset = selfClosedTag ? endTagIndex : FullContent[nextClosingIndexAfterName..].IndexOf('>') + nextClosingIndexAfterName + 1;
+ // Move the minimum index up to the end of the closing tag to avoid additioanl matches of the same values
+ minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
+ var location = new Boundary
+ {
+ // +2 for the \" before the value for the property
+ Index = nameIndex + nodeIter.Current.Name.Length + 2,
+ Length = nodeIter.Current.InnerXml.Length
+ };
+ yield return (nodeIter.Current.Value, location);
+ }
+ else
+ {
+ // Move the offset to the end of the opening tag
+ var offset = selfClosedTag ? endTagIndex : FullContent[nameIndex..].IndexOf(nodeIter.Current.InnerXml, StringComparison.Ordinal) + nameIndex;
+ // Move the minimum index up to the end of the closing tag
+ minIndex = selfClosedTag ? offset : FullContent[offset..].IndexOf('>') + offset + 1;
+ var location = new Boundary
+ {
+ Index = offset,
+ Length = nodeIter.Current.InnerXml.Length
+ };
+ yield return (nodeIter.Current.Value, location);
+ }
}
}
diff --git a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
index d942cf0..29725d9 100644
--- a/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
+++ b/AppInspector.Tests/RuleProcessor/XmlAndJsonTests.cs
@@ -83,6 +83,82 @@ public class XmlAndJsonTests
}
]";
+ private const string xmlStringRuleForPropWithData = @"[
+ {
+ ""id"": ""SA000005"",
+ ""name"": ""Testing.Rules.XML"",
+ ""tags"": [
+ ""Testing.Rules.XML""
+ ],
+ ""severity"": ""Critical"",
+ ""description"": ""This rule checks the value of the property property to be true"",
+ ""patterns"": [
+ {
+ ""pattern"": ""true"",
+ ""type"": ""string"",
+ ""confidence"": ""High"",
+ ""scopes"": [
+ ""code""
+ ],
+ ""xpaths"" : [""/bookstore/book/title/@*[name()='property']""]
+ }
+ ],
+ ""_comment"": """"
+ }
+]";
+
+ private const string xmlStringRuleForPropWithDataForData = @"[
+ {
+ ""id"": ""SA000005"",
+ ""name"": ""Testing.Rules.XML"",
+ ""tags"": [
+ ""Testing.Rules.XML""
+ ],
+ ""severity"": ""Critical"",
+ ""description"": ""This rule checks the value of the title tag when it has a property"",
+ ""patterns"": [
+ {
+ ""pattern"": ""Franklin"",
+ ""type"": ""regex"",
+ ""confidence"": ""High"",
+ ""scopes"": [
+ ""code""
+ ],
+ ""xpaths"" : [""/bookstore/book/title""]
+ }
+ ],
+ ""_comment"": """"
+ }
+]";
+
+ private const string xmlDataPropsWithTagValue =
+ @"
+
+
+ The Autobiography of Benjamin Franklin
+
+ Benjamin
+ Franklin
+
+ 8.99
+
+
+ The Confidence Man
+
+ Herman
+ Melville
+
+ 11.99
+
+
+ The Gorgias
+
+ Plato
+
+ 9.99
+
+ ";
+
private const string jsonData =
@"{
""books"":
@@ -228,14 +304,14 @@ public class XmlAndJsonTests
{
""xpaths"": [""system.web/trace/@enabled""],
""pattern"": ""true"",
- ""type"": ""regex""
+ ""type"": ""string""
}
],
""must-match"": [
- ""\n\n""
+ ""\n\n""
],
""must-not-match"": [
- ""\n\n""
+ ""\n\n""
]
}]";
RuleSet rules = new();
@@ -268,6 +344,28 @@ public class XmlAndJsonTests
Assert.Fail();
}
}
+ [DataRow(xmlStringRuleForPropWithDataForData, "Franklin", 212)]
+ [DataRow(xmlStringRuleForPropWithData, "true", 176)]
+ [DataTestMethod]
+ public void XmlTagWithPropsAndValue(string rule, string expectedValue, int expectedIndex)
+ {
+ RuleSet rules = new();
+ rules.AddString(rule, "XmlTestRules");
+ Microsoft.ApplicationInspector.RulesEngine.RuleProcessor processor = new(rules,
+ new RuleProcessorOptions { AllowAllTagsInBuildFiles = true });
+ if (_languages.FromFileNameOut("test.xml", out var info))
+ {
+ var matches = processor.AnalyzeFile(xmlDataPropsWithTagValue, new FileEntry("test.xml", new MemoryStream()), info);
+ Assert.AreEqual(1, matches.Count);
+ var match = matches[0];
+ Assert.AreEqual(expectedValue, match.Sample);
+ Assert.AreEqual(expectedIndex, match.Boundary.Index);
+ }
+ else
+ {
+ Assert.Fail();
+ }
+ }
[DataRow(xmlStringRule)]
[DataRow(jsonAndXmlStringRule)]