Workaround for namespace based parsing. (#499)

This commit is contained in:
Gabe Stocco 2022-08-10 23:34:54 -07:00 коммит произвёл GitHub
Родитель 1a083e10a0
Коммит d7bf96f101
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 86 добавлений и 3 удалений

Просмотреть файл

@ -148,8 +148,13 @@ namespace Microsoft.ApplicationInspector.RulesEngine
{
if (nodeIter.Current is not null)
{
var outerLoc = FullContent[minIndex..].IndexOf(nodeIter.Current.OuterXml);
var offset = FullContent[outerLoc..].IndexOf(nodeIter.Current.InnerXml) + outerLoc + minIndex;
// First we find the name
var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name);
// Then we grab the index of the end of this tag.
// We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
var endTagIndex = FullContent[nameIndex..].IndexOf('>');
var totalOffset = nameIndex + endTagIndex + minIndex;
var offset = FullContent[totalOffset..].IndexOf(nodeIter.Current.InnerXml) + totalOffset;
// Move the minimum index up in case there are multiple instances of identical OuterXML
// This ensures we won't re-find the same one
minIndex = offset;

Просмотреть файл

@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
@ -220,6 +221,83 @@ http://
Assert.AreEqual(2, matches.Count);
}
}
[TestMethod]
public void TestXmlWithAndWithoutNamespace()
{
var content = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<project xmlns=""http://maven.apache.org/POM/4.0.0"" xmlns:xsi=""http://www.w3.org/2001/XMLSchema-instance"" xsi:schemaLocation=""http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"">
<modelVersion>4.0.0</modelVersion>
<groupId>xxx</groupId>
<artifactId>xxx</artifactId>
<version>0.1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>${project.groupId}:${project.artifactId}</name>
<description />
<properties>
<java.version>17</java.version>
</properties>
</project>";
// The same as above but with no namespace specified
var noNamespaceContent = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<project>
<modelVersion>4.0.0</modelVersion>
<groupId>xxx</groupId>
<artifactId>xxx</artifactId>
<version>0.1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>${project.groupId}:${project.artifactId}</name>
<description />
<properties>
<java.version>17</java.version>
</properties>
</project>";
var rule = @"[{
""name"": ""Source code: Java 17"",
""id"": ""CODEJAVA000000"",
""description"": ""Java 17 maven configuration"",
""applies_to_file_regex"": [
""pom.xml""
],
""tags"": [
""Code.Java.17""
],
""severity"": ""critical"",
""patterns"": [
{
""pattern"": ""17"",
""xpaths"" : [""/*[local-name(.)='project']/*[local-name(.)='properties']/*[local-name(.)='java.version']""],
""type"": ""regex"",
""scopes"": [
""code""
],
""modifiers"": [
""i""
],
""confidence"": ""high""
}
]
}]";
RuleSet rules = new(null);
var originalSource = "TestRules";
rules.AddString(rule, originalSource);
var analyzer = new Microsoft.ApplicationInspector.RulesEngine.RuleProcessor(rules, new RuleProcessorOptions(){Parallel = false, AllowAllTagsInBuildFiles = true});
if (_languages.FromFileNameOut("pom.xml", out LanguageInfo info))
{
var matches = analyzer.AnalyzeFile(content, new Microsoft.CST.RecursiveExtractor.FileEntry("pom.xml", new MemoryStream()), info);
Assert.AreEqual(1, matches.Count);
matches = analyzer.AnalyzeFile(noNamespaceContent, new Microsoft.CST.RecursiveExtractor.FileEntry("pom.xml", new MemoryStream()), info);
Assert.AreEqual(1, matches.Count);
}
}
[DataRow(true, 1, new[] { 2 })]
[DataRow(false, 1, new[] { 3 })]