Initial commit

2020-07-31 08:49:16 -07:00 · 2020-07-31 08:49:16 -07:00 · a2acd2635b
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+RecursiveExtractor/bin
+RecursiveExtractor/obj
+RecursiveExtractor.Tests/bin
+RecursiveExtractor.Tests/obj
+.vs
--- a/21
+++ b/21
@ -1,21 +0,0 @@
-    MIT License
-
-    Copyright (c) Microsoft Corporation.
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy
-    of this software and associated documentation files (the "Software"), to deal
-    in the Software without restriction, including without limitation the rights
-    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-    copies of the Software, and to permit persons to whom the Software is
-    furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be included in all
-    copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-    SOFTWARE
--- a/PRIVACY.md
+++ b/PRIVACY.md
@ -0,0 +1,8 @@
+# Privacy and Telemetry Notice
+
+## Data Collection
+
+The Recursive Extractor software itself does *not* collect information about your
+use of the software, and therefore does not send such information to any source,
+including to Microsoft. Nevertheless, our privacy statement is located at
+https://go.microsoft.com/fwlink/?LinkID=824704. 
--- a/RecursiveExtractor.Tests/ExtractorTests.cs
+++ b/RecursiveExtractor.Tests/ExtractorTests.cs
@ -0,0 +1,203 @@
+// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
+
+using Microsoft.CST.OpenSource.RecursiveExtractor;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using NLog;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+
+namespace Microsoft.CST.OpenSource.Tests
+{
+    [TestClass]
+    public class ExtractorTests
+    {
+        [DataTestMethod]
+        [DataRow("Shared.zip", false)]
+        [DataRow("Shared.zip", true)]
+        [DataRow("Shared.7z", false)]
+        [DataRow("Shared.7z", true)]
+        [DataRow("Shared.Tar", false)]
+        [DataRow("Shared.Tar", true)]
+        [DataRow("Shared.rar", false)]
+        [DataRow("Shared.rar", true)]
+        [DataRow("Shared.rar4", false)]
+        [DataRow("Shared.rar4", true)]
+        [DataRow("Shared.tar.bz2", false)]
+        [DataRow("Shared.tar.bz2", true)]
+        [DataRow("Shared.tar.gz", false)]
+        [DataRow("Shared.tar.gz", true)]
+        [DataRow("Shared.tar.xz", false)]
+        [DataRow("Shared.tar.xz", true)]
+        [DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", true, 6)]
+        [DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", false, 6)]
+        [DataRow("Shared.a", false, 1)]
+        [DataRow("Shared.a", true, 1)]
+        [DataRow("Shared.deb", false)]
+        [DataRow("Shared.deb", true)]
+        [DataRow("Shared.ar", false)]
+        [DataRow("Shared.ar", true)]
+        [DataRow("Shared.iso", false)]
+        [DataRow("Shared.iso", true)]
+        [DataRow("Shared.vhd", false, 29)] // 26 + Some invisible system files
+        [DataRow("Shared.vhd", true, 29)]
+        [DataRow("Shared.vhdx", false)]
+        [DataRow("Shared.vhdx", true)]
+        [DataRow("Shared.wim", false)]
+        [DataRow("Shared.wim", true)]
+        [DataRow("Empty.vmdk", false, 0)]
+        [DataRow("Empty.vmdk", true, 0)]
+        [DataRow("TextFile.md", false, 1)]
+        [DataRow("TextFile.md", true, 1)]
+        public void ExtractArchive(string fileName, bool parallel, int expectedNumFiles = 26)
+        {
+            var extractor = new Extractor();
+            var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
+            var results = extractor.ExtractFile(path, parallel).ToList();
+            Assert.IsTrue(results.Count() == expectedNumFiles);
+        }
+
+        [DataTestMethod]
+        [DataRow("Shared.zip", false)]
+        [DataRow("Shared.zip", true)]
+        [DataRow("Shared.7z", false)]
+        [DataRow("Shared.7z", true)]
+        [DataRow("Shared.Tar", false)]
+        [DataRow("Shared.Tar", true)]
+        [DataRow("Shared.rar", false)]
+        [DataRow("Shared.rar", true)]
+        [DataRow("Shared.rar4", false)]
+        [DataRow("Shared.rar4", true)]
+        [DataRow("Shared.tar.bz2", false)]
+        [DataRow("Shared.tar.bz2", true)]
+        [DataRow("Shared.tar.gz", false)]
+        [DataRow("Shared.tar.gz", true)]
+        [DataRow("Shared.tar.xz", false)]
+        [DataRow("Shared.tar.xz", true)]
+        [DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", true, 6)]
+        [DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", false, 6)]
+        [DataRow("Shared.a", false, 1)]
+        [DataRow("Shared.a", true, 1)]
+        [DataRow("Shared.deb", false)]
+        [DataRow("Shared.deb", true)]
+        [DataRow("Shared.ar", false)]
+        [DataRow("Shared.ar", true)]
+        [DataRow("Shared.iso", false)]
+        [DataRow("Shared.iso", true)]
+        [DataRow("Shared.vhd", false, 29)] // 26 + Some invisible system files
+        [DataRow("Shared.vhd", true, 29)]
+        [DataRow("Shared.vhdx", false)]
+        [DataRow("Shared.vhdx", true)]
+        [DataRow("Shared.wim", false)]
+        [DataRow("Shared.wim", true)]
+        [DataRow("Empty.vmdk", false, 0)]
+        [DataRow("Empty.vmdk", true, 0)]
+        [DataRow("TextFile.md", false, 1)]
+        [DataRow("TextFile.md", true, 1)]
+        public void ExtractArchiveFromStream(string fileName, bool parallel, int expectedNumFiles = 26)
+        {
+            var extractor = new Extractor();
+            var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
+            using var stream = new FileStream(path, FileMode.Open);
+            var results = extractor.ExtractStream(path, stream, parallel).ToList();
+            Assert.IsTrue(results.Count() == expectedNumFiles);
+            stream.Close();
+        }
+
+        [DataTestMethod]
+        [DataRow("Nested.Zip", false, 26 * 8)]
+        [DataRow("Nested.Zip", true, 26 * 8)]
+        public void ExtractNestedArchive(string fileName, bool parallel, int expectedNumFiles)
+        {
+            var extractor = new Extractor();
+            var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
+            var results = extractor.ExtractFile(path, parallel);
+            Assert.IsTrue(results.Count() == expectedNumFiles);
+        }
+
+        [DataTestMethod]
+        [DataRow("Shared.zip", ArchiveFileType.ZIP)]
+        [DataRow("Shared.7z", ArchiveFileType.P7ZIP)]
+        [DataRow("Shared.Tar", ArchiveFileType.TAR)]
+        [DataRow("Shared.rar", ArchiveFileType.RAR)]
+        [DataRow("Shared.rar4", ArchiveFileType.RAR)]
+        [DataRow("Shared.tar.bz2", ArchiveFileType.BZIP2)]
+        [DataRow("Shared.tar.gz", ArchiveFileType.GZIP)]
+        [DataRow("Shared.tar.xz", ArchiveFileType.XZ)]
+        [DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", ArchiveFileType.DEB)]
+        [DataRow("Shared.a", ArchiveFileType.UNKNOWN)]
+        [DataRow("Shared.deb", ArchiveFileType.DEB)]
+        [DataRow("Shared.ar", ArchiveFileType.AR)]
+        [DataRow("Shared.iso", ArchiveFileType.ISO_9660)]
+        [DataRow("Shared.vhd", ArchiveFileType.VHD)]
+        [DataRow("Shared.vhdx", ArchiveFileType.VHDX)]
+        [DataRow("Shared.wim", ArchiveFileType.WIM)]
+        [DataRow("Empty.vmdk", ArchiveFileType.VMDK)]
+        [DataRow("TextFile.md", ArchiveFileType.UNKNOWN)]
+        public void TestMiniMagic(string fileName, ArchiveFileType expectedArchiveFileType)
+        {
+            var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
+            using FileStream fs = new FileStream(path, FileMode.Open);
+            // Test just based on the content
+            var fileEntry = new FileEntry("NoName", fs);
+
+            Assert.IsTrue(MiniMagic.DetectFileType(fileEntry) == expectedArchiveFileType);
+            Assert.IsTrue(fileEntry.Content.Position == 0);
+
+            // Should also work if the stream doesn't start at 0
+            fileEntry.Content.Position = 10;
+            Assert.IsTrue(MiniMagic.DetectFileType(fileEntry) == expectedArchiveFileType);
+            Assert.IsTrue(fileEntry.Content.Position == 10);
+
+            // We should also detect just on file names if the content doesn't match
+            var nameOnlyEntry = new FileEntry(fileName, new MemoryStream());
+            Assert.IsTrue(MiniMagic.DetectFileType(nameOnlyEntry) == expectedArchiveFileType);
+        }
+
+        [DataTestMethod]
+        [DataRow("droste.zip", false)]
+        [DataRow("droste.zip", true)]
+        [DataRow("10GB.7z.bz2", false)]
+        [DataRow("10GB.7z.bz2", true)]
+        [DataRow("10GB.gz.bz2", false)]
+        [DataRow("10GB.gz.bz2", true)]
+        [DataRow("10GB.rar.bz2", false)]
+        [DataRow("10GB.rar.bz2", true)]
+        [DataRow("10GB.xz.bz2", false)]
+        [DataRow("10GB.xz.bz2", true)]
+        [DataRow("10GB.zip.bz2", false)]
+        [DataRow("10GB.zip.bz2", true)]
+        [DataRow("zblg.zip", false)]
+        [DataRow("zblg.zip", true)]
+        [DataRow("zbsm.zip", false)]
+        [DataRow("zbsm.zip", true)]
+        [DataRow("zbxl.zip", false)]
+        [DataRow("zbxl.zip", true)]
+        public void TestQuineBombs(string fileName, bool parallel)
+        {
+            var extractor = new Extractor();
+            var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
+            IEnumerable<FileEntry> results;
+            try
+            {
+                results = extractor.ExtractFile(path, parallel).ToList();
+                // Getting here means we didnt catch the bomb
+            }
+            // We should throw an overflow exception when we detect a quine or bomb
+            catch (Exception e) when (
+                    e is OverflowException)
+            {
+                return;
+            }
+            catch (Exception e)
+            {
+                Logger.Debug(e, "Shouldn't hit other exceptions in this test.");
+            }
+            // Getting here means we didnt catch the bomb
+            Assert.Fail();
+        }
+
+        protected static readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
+    }
+}
--- a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj
+++ b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj
@ -0,0 +1,124 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+    <IsPackable>false</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
+    <PackageReference Include="MSTest.TestAdapter" Version="2.1.0" />
+    <PackageReference Include="MSTest.TestFramework" Version="2.1.0" />
+    <PackageReference Include="DiscUtils.Btrfs" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.HfsPlus" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.SquashFs" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Xfs" Version="0.15.1-ci0002" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.0-preview-20200428-01" />
+    <PackageReference Include="MSTest.TestAdapter" Version="2.1.1" />
+    <PackageReference Include="MSTest.TestFramework" Version="2.1.1" />
+    <PackageReference Include="coverlet.collector" Version="1.2.1">
+      <PrivateAssets>all</PrivateAssets>
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+    </PackageReference>
+    <PackageReference Include="Sarif.Sdk" Version="2.2.5" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\RecursiveExtractor\RecursiveExtractor.csproj" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Update="TestData\10GB.7z.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\10GB.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\10GB.gz.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\10GB.rar.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\10GB.xz.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\10GB.zip.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\droste.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Empty.vmdk">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Nested.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\TextFile.md">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.7z">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.a">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.ar">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.deb">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.iso">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.rar">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.rar4">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.tar">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.tar.bz2">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.tar.gz">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.tar.xz">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.vhd">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.vhdx">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.wim">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\sysvbanner_1.0-17fakesync1_amd64.deb">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\zblg.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\Shared.bsd.ar">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\zbsm.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="TestData\zbxl.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+  <ItemGroup>
+    <PackageReference Update="Nerdbank.GitVersioning" Version="3.1.91" />
+  </ItemGroup>
+</Project>
--- a/RecursiveExtractor.Tests/TestData/10GB.7z.bz2
+++ b/RecursiveExtractor.Tests/TestData/10GB.7z.bz2
--- a/RecursiveExtractor.Tests/TestData/10GB.bz2
+++ b/RecursiveExtractor.Tests/TestData/10GB.bz2
--- a/RecursiveExtractor.Tests/TestData/10GB.gz.bz2
+++ b/RecursiveExtractor.Tests/TestData/10GB.gz.bz2
--- a/RecursiveExtractor.Tests/TestData/10GB.rar.bz2
+++ b/RecursiveExtractor.Tests/TestData/10GB.rar.bz2
--- a/RecursiveExtractor.Tests/TestData/10GB.xz.bz2
+++ b/RecursiveExtractor.Tests/TestData/10GB.xz.bz2
--- a/RecursiveExtractor.Tests/TestData/10GB.zip.bz2
+++ b/RecursiveExtractor.Tests/TestData/10GB.zip.bz2
--- a/RecursiveExtractor.Tests/TestData/Empty.vmdk
+++ b/RecursiveExtractor.Tests/TestData/Empty.vmdk
--- a/RecursiveExtractor.Tests/TestData/Nested.zip
+++ b/RecursiveExtractor.Tests/TestData/Nested.zip
--- a/RecursiveExtractor.Tests/TestData/Shared.7z
+++ b/RecursiveExtractor.Tests/TestData/Shared.7z
--- a/RecursiveExtractor.Tests/TestData/Shared.a
+++ b/RecursiveExtractor.Tests/TestData/Shared.a
--- a/RecursiveExtractor.Tests/TestData/Shared.ar
+++ b/RecursiveExtractor.Tests/TestData/Shared.ar
--- a/RecursiveExtractor.Tests/TestData/Shared.bsd.ar
+++ b/RecursiveExtractor.Tests/TestData/Shared.bsd.ar
--- a/RecursiveExtractor.Tests/TestData/Shared.deb
+++ b/RecursiveExtractor.Tests/TestData/Shared.deb
--- a/RecursiveExtractor.Tests/TestData/Shared.iso
+++ b/RecursiveExtractor.Tests/TestData/Shared.iso
--- a/RecursiveExtractor.Tests/TestData/Shared.rar
+++ b/RecursiveExtractor.Tests/TestData/Shared.rar
--- a/RecursiveExtractor.Tests/TestData/Shared.rar4
+++ b/RecursiveExtractor.Tests/TestData/Shared.rar4
--- a/RecursiveExtractor.Tests/TestData/Shared.tar
+++ b/RecursiveExtractor.Tests/TestData/Shared.tar
--- a/RecursiveExtractor.Tests/TestData/Shared.tar.bz2
+++ b/RecursiveExtractor.Tests/TestData/Shared.tar.bz2
--- a/RecursiveExtractor.Tests/TestData/Shared.tar.gz
+++ b/RecursiveExtractor.Tests/TestData/Shared.tar.gz
--- a/RecursiveExtractor.Tests/TestData/Shared.tar.xz
+++ b/RecursiveExtractor.Tests/TestData/Shared.tar.xz
--- a/RecursiveExtractor.Tests/TestData/Shared.vhd
+++ b/RecursiveExtractor.Tests/TestData/Shared.vhd
--- a/RecursiveExtractor.Tests/TestData/Shared.vhdx
+++ b/RecursiveExtractor.Tests/TestData/Shared.vhdx
--- a/RecursiveExtractor.Tests/TestData/Shared.wim
+++ b/RecursiveExtractor.Tests/TestData/Shared.wim
--- a/RecursiveExtractor.Tests/TestData/Shared.zip
+++ b/RecursiveExtractor.Tests/TestData/Shared.zip
--- a/RecursiveExtractor.Tests/TestData/TextFile.md
+++ b/RecursiveExtractor.Tests/TestData/TextFile.md
@ -0,0 +1,57 @@
+## RecursiveExtractor
+
+RecursiveExtractor is a general-purpose file extractor.
+
+### Format Support
+
+RecursiveExtractor supports extracting the following types of archives:
+
+* GNU AR
+* BZip2
+* [deb](https://en.wikipedia.org/wiki/Deb_(file_format))
+* ISO
+* tar
+* VHD
+* VHDX
+* VMDK
+* WIM
+* XZip
+* zip
+
+## Using RecursiveExtractor
+
+To use RecursiveExtractor, just instantiate an `Extractor` object and call the `ExtractFile`
+method with either a filename or a byte array. This method will return an IEnumerable
+of FileEntry objects, each one of which will contain the name of the file and its 
+contents, plus some additional metadata. 
+
+```
+using Microsoft.CST.RecursiveExtractor;
+
+...
+
+// Initialize the RecursiveExtractor extractor
+var extractor = new Extractor();
+
+// Extract from an existing file
+foreach (var fileEntry in extractor.ExtractFile("test.zip"))
+{
+    Console.WriteLine(fileEntry.FullPath);
+}
+
+// Extract from a byte array
+byte[] bytes = ...;
+// The "nonexistent.zip" name doesn't really matter, but is used as part of the
+// FileEntry.FullPath string.
+foreach (var fileEntry in extractor.ExtractFile("nonexistent.zip", bytes))
+{
+    Console.WriteLine(fileEntry.FullPath);
+}
+```
+
+## Issues
+
+If you find any issues with RecursiveExtractor, please [open an issue](https://github.com/Microsoft/OSSGadget/issues/new)
+in the [Microsoft/OSSGadget](https://github.com/Microsoft/OSSGadget) repository.
+
+
--- a/RecursiveExtractor.Tests/TestData/droste.zip
+++ b/RecursiveExtractor.Tests/TestData/droste.zip
--- a/RecursiveExtractor.Tests/TestData/sysvbanner_1.0-17fakesync1_amd64.deb
+++ b/RecursiveExtractor.Tests/TestData/sysvbanner_1.0-17fakesync1_amd64.deb
--- a/RecursiveExtractor.Tests/TestData/zblg.zip
+++ b/RecursiveExtractor.Tests/TestData/zblg.zip
--- a/RecursiveExtractor.Tests/TestData/zbsm.zip
+++ b/RecursiveExtractor.Tests/TestData/zbsm.zip
--- a/RecursiveExtractor.Tests/TestData/zbxl.zip
+++ b/RecursiveExtractor.Tests/TestData/zbxl.zip
--- a/RecursiveExtractor.sln
+++ b/RecursiveExtractor.sln
@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.30309.148
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor", "RecursiveExtractor\RecursiveExtractor.csproj", "{A7F7492B-60E0-468C-B267-BA60EC131E86}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor.Tests", "RecursiveExtractor.Tests\RecursiveExtractor.Tests.csproj", "{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{A7F7492B-60E0-468C-B267-BA60EC131E86}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A7F7492B-60E0-468C-B267-BA60EC131E86}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A7F7492B-60E0-468C-B267-BA60EC131E86}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A7F7492B-60E0-468C-B267-BA60EC131E86}.Release|Any CPU.Build.0 = Release|Any CPU
+		{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {38234B7F-8828-462C-8C2A-747A4A195D7F}
+	EndGlobalSection
+EndGlobal
--- a/RecursiveExtractor/ArFile.cs
+++ b/RecursiveExtractor/ArFile.cs
@ -0,0 +1,302 @@
+// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Microsoft.CST.OpenSource.RecursiveExtractor
+{
+    /**
+     * Gnu Ar file parser.  Supports SystemV style lookup tables in both 32 and 64 bit mode as well as BSD and GNU formatted .ars.
+     */
+
+    public static class ArFile
+    {
+        // Simple method which returns a the file entries. We can't make this a continuation because we're
+        // using spans.
+        public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry)
+        {
+            if (fileEntry == null)
+            {
+                yield break;
+            }
+            // First, cut out the file signature (8 bytes)
+            fileEntry.Content.Position = 8;
+            var filenameLookup = new Dictionary<int, string>();
+            byte[] headerBuffer = new byte[60];
+            while (true)
+            {
+                if (fileEntry.Content.Length - fileEntry.Content.Position < 60)  // The header for each file is 60 bytes
+                {
+                    break;
+                }
+
+                fileEntry.Content.Read(headerBuffer, 0, 60);
+
+                if (long.TryParse(Encoding.ASCII.GetString(headerBuffer[48..58]), out long size))// header size in bytes
+                {
+                    var filename = Encoding.ASCII.GetString(headerBuffer[0..16]).Trim();
+
+                    // Header with list of file names
+                    if (filename.StartsWith("//"))
+                    {
+                        // This should just be a list of names, size should be safe to load in memory and cast
+                        // to int
+                        var fileNamesBytes = new byte[size];
+                        fileEntry.Content.Read(fileNamesBytes, 0, (int)size);
+
+                        var name = new StringBuilder();
+                        var index = 0;
+                        for (int i = 0; i < fileNamesBytes.Length; i++)
+                        {
+                            if (fileNamesBytes[i] == '/')
+                            {
+                                filenameLookup.Add(index, name.ToString());
+                                name.Clear();
+                            }
+                            else if (fileNamesBytes[i] == '\n')
+                            {
+                                // The next filename would start on the next line
+                                index = i + 1;
+                            }
+                            else
+                            {
+                                name.Append((char)fileNamesBytes[i]);
+                            }
+                        }
+                    }
+                    else if (filename.StartsWith("#1/"))
+                    {
+                        // We should be positioned right after the header
+                        if (int.TryParse(filename.Substring(3), out int nameLength))
+                        {
+                            byte[] nameSpan = new byte[nameLength];
+                            // This should move us right to the file
+                            fileEntry.Content.Read(nameSpan,0,nameLength);
+
+                            var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
+
+                            // The name length is included in the total size reported in the header
+                            CopyStreamBytes(fileEntry.Content, entryStream, size - nameLength);
+
+                            yield return new FileEntry(Encoding.ASCII.GetString(nameSpan), entryStream, fileEntry, true);
+                        }
+                    }
+                    else if (filename.Equals('/'))
+                    {
+                        // System V symbol lookup table N = 32 bit big endian integers (entries in table) then
+                        // N 32 bit big endian integers representing prositions in archive then N \0
+                        // terminated strings "symbol name" (possibly filename)
+
+                        var tableContents = new byte[size];
+                        fileEntry.Content.Read(tableContents,0,(int)size);
+
+                        var numEntries = IntFromBigEndianBytes(tableContents[0..4]);
+                        var filePositions = new int[numEntries];
+                        for (int i = 0; i < numEntries; i++)
+                        {
+                            var start = (i + 1) * 4;
+                            var end = start + 4;
+                            filePositions[i] = IntFromBigEndianBytes(tableContents[start..end]);
+                        }
+
+                        var index = 0;
+                        var sb = new StringBuilder();
+                        var fileEntries = new List<(int, string)>();
+
+                        for (int i = 0; i < tableContents.Length; i++)
+                        {
+                            if (tableContents[i] == '\0')
+                            {
+                                fileEntries.Add((filePositions[index++], sb.ToString()));
+                                sb.Clear();
+                            }
+                            else
+                            {
+                                sb.Append(tableContents[i]);
+                            }
+                        }
+
+                        foreach (var entry in fileEntries)
+                        {
+                            fileEntry.Content.Position = entry.Item1;
+                            fileEntry.Content.Read(headerBuffer, 0, 60);
+
+                            if (long.TryParse(Encoding.ASCII.GetString(headerBuffer[48..58]), out long innerSize))// header size in bytes
+                            {
+                                if (filename.StartsWith("/"))
+                                {
+                                    if (int.TryParse(filename[1..], out int innerIndex))
+                                    {
+                                        try
+                                        {
+                                            filename = filenameLookup[innerIndex];
+                                        }
+                                        catch (Exception)
+                                        {
+                                            Logger.Debug("Expected to find a filename at index {0}", innerIndex);
+                                        }
+                                    }
+                                }
+                                else
+                                {
+                                    filename = entry.Item2;
+                                }
+
+                                var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
+                                CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
+                                yield return new FileEntry(filename, entryStream, fileEntry);
+                            }
+                        }
+                        fileEntry.Content.Position = fileEntry.Content.Length - 1;
+                    }
+                    else if (filename.Equals("/SYM64/"))
+                    {
+                        // https://en.wikipedia.org/wiki/Ar_(Unix)#System_V_(or_GNU)_variant GNU lookup table
+                        // (archives larger than 4GB) N = 64 bit big endian integers (entries in table) then N
+                        // 64 bit big endian integers representing positions in archive then N \0 terminated
+                        // strings "symbol name" (possibly filename)
+
+                        var buffer = new byte[8];
+                        fileEntry.Content.Read(buffer, 0, 8);
+
+                        var numEntries = Int64FromBigEndianBytes(buffer);
+                        var filePositions = new long[numEntries];
+
+                        for (int i = 0; i < numEntries; i++)
+                        {
+                            fileEntry.Content.Read(buffer, 0, 8);
+                            filePositions[i] = Int64FromBigEndianBytes(buffer);
+                        }
+
+                        var index = 0;
+                        var sb = new StringBuilder();
+                        var fileEntries = new List<(long, string)>();
+
+                        while (fileEntry.Content.Position < size)
+                        {
+                            fileEntry.Content.Read(buffer, 0, 1);
+                            if (buffer[0] == '\0')
+                            {
+                                fileEntries.Add((filePositions[index++], sb.ToString()));
+                                sb.Clear();
+                            }
+                            else
+                            {
+                                sb.Append(buffer[0]);
+                            }
+                        }
+
+                        foreach (var innerEntry in fileEntries)
+                        {
+                            fileEntry.Content.Position = innerEntry.Item1;
+
+                            fileEntry.Content.Read(headerBuffer, 0, 60);
+
+                            if (long.TryParse(Encoding.ASCII.GetString(headerBuffer[48..58]), out long innerSize))// header size in bytes
+                            {
+                                if (filename.StartsWith("/"))
+                                {
+                                    if (int.TryParse(filename[1..], out int innerIndex))
+                                    {
+                                        try
+                                        {
+                                            filename = filenameLookup[innerIndex];
+                                        }
+                                        catch (Exception)
+                                        {
+                                            Logger.Debug("Expected to find a filename at index {0}", innerIndex);
+                                        }
+                                    }
+                                }
+                                else
+                                {
+                                    filename = innerEntry.Item2;
+                                }
+                                var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
+                                CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
+                                yield return new FileEntry(filename, entryStream, fileEntry);
+                            }
+                        }
+                        fileEntry.Content.Position = fileEntry.Content.Length - 1;
+                    }
+                    else if (filename.StartsWith("/"))
+                    {
+                        if (int.TryParse(filename[1..], out int index))
+                        {
+                            try
+                            {
+                                filename = filenameLookup[index];
+                            }
+                            catch (Exception)
+                            {
+                                Logger.Debug("Expected to find a filename at index {0}", index);
+                            }
+                        }
+
+                        var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
+                        CopyStreamBytes(fileEntry.Content, entryStream, size);
+
+                        yield return new FileEntry(filename, entryStream, fileEntry, true);
+                    }
+                    else
+                    {
+                        var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
+                        CopyStreamBytes(fileEntry.Content, entryStream, size);
+
+                        yield return new FileEntry(filename, entryStream, fileEntry, true);
+                    }
+                }
+                else
+                {
+                    // Not a valid header, we couldn't parse the file size.
+                    yield break;
+                }
+
+                // Entries are padded on even byte boundaries https://docs.oracle.com/cd/E36784_01/html/E36873/ar.h-3head.html
+                fileEntry.Content.Position = fileEntry.Content.Position % 2 == 1 ? fileEntry.Content.Position + 1 : fileEntry.Content.Position;
+            }
+        }
+
+        public static long Int64FromBigEndianBytes(byte[] value)
+        {
+            if (value.Length == 8)
+            {
+                if (BitConverter.IsLittleEndian)
+                {
+                    Array.Reverse(value);
+                }
+                return BitConverter.ToInt64(value,0);
+            }
+            return -1;
+        }
+
+        public static int IntFromBigEndianBytes(byte[] value)
+        {
+            if (value.Length == 4)
+            {
+                if (BitConverter.IsLittleEndian)
+                {
+                    Array.Reverse(value);
+                }
+                return BitConverter.ToInt32(value,0);
+            }
+            return -1;
+        }
+
+        internal static void CopyStreamBytes(Stream input, Stream output, long bytes)
+        {
+            byte[] buffer = new byte[32768];
+            long read;
+            while (bytes > 0 &&
+                   (read = input.Read(buffer, 0, (int)Math.Min(buffer.Length, bytes))) > 0)
+            {
+                output.Write(buffer, 0, (int)read);
+                bytes -= read;
+            }
+        }
+
+        private static NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
+    }
+}
--- a/RecursiveExtractor/DebArchiveFile.cs
+++ b/RecursiveExtractor/DebArchiveFile.cs
@ -0,0 +1,48 @@
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Microsoft.CST.OpenSource.RecursiveExtractor
+{
+    /**
+     * Very simple implementation of an .Deb format parser, needed for Debian .deb archives.
+     * See: https://en.wikipedia.org/wiki/Deb_(file_format)#/media/File:Deb_File_Structure.svg
+     */
+
+    public static class DebArchiveFile
+    {
+        public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry)
+        {
+            if (fileEntry == null)
+            {
+                yield break;
+            }
+
+            // First, cut out the file signature (8 bytes) and global header (64 bytes)
+            fileEntry.Content.Position = 72;
+            var headerBytes = new byte[60];
+
+            while (true)
+            {
+                if (fileEntry.Content.Length - fileEntry.Content.Position < 60)  // The header for each file is 60 bytes
+                {
+                    break;
+                }
+                fileEntry.Content.Read(headerBytes, 0, 60);
+                var filename = Encoding.ASCII.GetString(headerBytes[0..16]).Trim();  // filename is 16 bytes
+                var fileSizeBytes = headerBytes[48..58]; // File size is decimal-encoded, 10 bytes long
+                if (int.TryParse(Encoding.ASCII.GetString(fileSizeBytes).Trim(), out int fileSize))
+                {
+                    var entryContent = new byte[fileSize];
+                    fileEntry.Content.Read(entryContent, 0, fileSize);
+                    using var stream = new MemoryStream(entryContent);
+                    yield return new FileEntry(filename, stream, fileEntry);
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+    }
+}
--- a/RecursiveExtractor/Extractor.cs
+++ b/RecursiveExtractor/Extractor.cs
--- a/RecursiveExtractor/ExtractorOptions.cs
+++ b/RecursiveExtractor/ExtractorOptions.cs
@ -0,0 +1,40 @@
+using System;
+
+namespace Microsoft.CST.OpenSource.RecursiveExtractor
+{
+    public class ExtractorOptions
+    {
+        /// <summary>
+        ///     The maximum number of items to take at once if parallelization is enabled
+        /// </summary>
+        public int BatchSize { get; set; } = 50;
+
+        /// <summary>
+        ///     Enable timing limit for processing.
+        /// </summary>
+        public bool EnableTiming { get; set; } = false;
+
+        /// <summary>
+        ///     If an archive cannot be extracted return a single file entry for the archive itself.
+        /// </summary>
+        public bool ExtractSelfOnFail { get; set; } = true;
+
+        /// <summary>
+        ///     The maximum number of bytes to extract from the archive and all embedded archives. Set to 0 to
+        ///     remove limit. Note that MaxExpansionRatio may also apply. Defaults to 0.
+        /// </summary>
+        public long MaxExtractedBytes { get; set; } = 0;
+
+        /// <summary>
+        ///     By default, stop extracting if the total number of bytes seen is greater than this multiple of
+        ///     the original archive size. Used to avoid denial of service (zip bombs and the like).
+        /// </summary>
+        public double MaxExtractedBytesRatio { get; set; } = 60.0;
+
+        /// <summary>
+        ///     If timing is enabled, stop processing after this time span. Used to avoid denial of service
+        ///     (zip bombs and the like).
+        /// </summary>
+        public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(300);
+    }
+}
--- a/RecursiveExtractor/FileEntry.cs
+++ b/RecursiveExtractor/FileEntry.cs
@ -0,0 +1,121 @@
+// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
+
+using System;
+using System.IO;
+using System.Threading.Tasks;
+
+namespace Microsoft.CST.OpenSource.RecursiveExtractor
+{
+    public class FileEntry
+    {
+        /// <summary>
+        ///     Constructs a FileEntry object from a Stream. If passthroughStream is set to true, and the
+        ///     stream is seekable, it will directly use inputStream. If passthroughStream is false or it is
+        ///     not seekable, it will copy the full contents of inputStream to a new internal FileStream and
+        ///     attempt to reset the position of inputstream. The finalizer for this class Disposes the
+        ///     contained Stream.
+        /// </summary>
+        /// <param name="name"> </param>
+        /// <param name="parentPath"> </param>
+        /// <param name="inputStream"> </param>
+        /// <param name="parent"> </param>
+        /// <param name="passthroughStream"> </param>
+        public FileEntry(string name, Stream inputStream, FileEntry? parent = null, bool passthroughStream = false)
+        {
+            Parent = parent;
+            Name = name;
+            Passthrough = passthroughStream;
+
+            if (parent == null)
+            {
+                ParentPath = null;
+                FullPath = Name;
+            }
+            else
+            {
+                ParentPath = parent.FullPath;
+                FullPath = $"{ParentPath}{Path.PathSeparator}{Name}";
+            }
+
+            if (inputStream == null)
+            {
+                throw new ArgumentNullException(nameof(inputStream));
+            }
+
+            if (!inputStream.CanRead)
+            {
+                Content = new MemoryStream();
+            }
+
+            // We want to be able to seek, so ensure any passthrough stream is Seekable
+            if (passthroughStream && inputStream.CanSeek)
+            {
+                Content = inputStream;
+                if (Content.Position != 0)
+                {
+                    Content.Position = 0;
+                }
+            }
+            else
+            {
+                // Back with a temporary filestream, this is optimized to be cached in memory when possible
+                // automatically by .NET
+                Content = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
+                long? initialPosition = null;
+
+                if (inputStream.CanSeek)
+                {
+                    initialPosition = inputStream.Position;
+                    if (inputStream.Position != 0)
+                    {
+                        inputStream.Position = 0;
+                    }
+                }
+
+                try
+                {
+                    inputStream.CopyTo(Content);
+                }
+                catch (NotSupportedException)
+                {
+                    try
+                    {
+                        inputStream.CopyToAsync(Content).RunSynchronously();
+                    }
+                    catch (Exception f)
+                    {
+                        Logger.Debug("Failed to copy stream from {0} ({1}:{2})", FullPath, f.GetType(), f.Message);
+                    }
+                }
+                catch(Exception e)
+                {
+                    Logger.Debug("Failed to copy stream from {0} ({1}:{2})", FullPath, e.GetType(), e.Message);
+                }
+
+                if (inputStream.CanSeek && inputStream.Position != 0)
+                {
+                    inputStream.Position = initialPosition ?? 0;
+                }
+
+                Content.Position = 0;
+            }
+        }
+
+        public Stream Content { get; }
+        public string FullPath { get; }
+        public string Name { get; }
+        public FileEntry? Parent { get; }
+        public string? ParentPath { get; }
+        private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
+
+        ~FileEntry()
+        {
+            if (!Passthrough)
+            {
+                Content?.Dispose();
+            }
+        }
+
+        public bool Passthrough { get; }
+    }
+}
--- a/RecursiveExtractor/MiniMagic.cs
+++ b/RecursiveExtractor/MiniMagic.cs
@ -0,0 +1,260 @@
+// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace Microsoft.CST.OpenSource.RecursiveExtractor
+{
+    /// <summary>
+    ///     ArchiveTypes are the kinds of archive files that this module can process.
+    /// </summary>
+    public enum ArchiveFileType
+    {
+        UNKNOWN,
+        ZIP,
+        TAR,
+        XZ,
+        GZIP,
+        BZIP2,
+        RAR,
+        P7ZIP,
+        DEB,
+        AR,
+        ISO_9660,
+        VHDX,
+        VHD,
+        WIM,
+        VMDK
+    }
+
+    /// <summary>
+    ///     MiniMagic is a tiny implementation of a file type identifier based on binary signatures.
+    /// </summary>
+    public static class MiniMagic
+    {
+        /// <summary>
+        ///     Fallback using file extensions in case the binary signature doesn't match.
+        /// </summary>
+        private static readonly Dictionary<string, ArchiveFileType> FileExtensionMap = new Dictionary<string, ArchiveFileType>()
+        {
+            {"ZIP", ArchiveFileType.ZIP },
+            {"APK", ArchiveFileType.ZIP },
+            {"IPA", ArchiveFileType.ZIP },
+            {"JAR", ArchiveFileType.ZIP },
+            {"EAR", ArchiveFileType.ZIP },
+            {"WAR", ArchiveFileType.ZIP },
+
+            {"GZ", ArchiveFileType.GZIP },
+            {"TGZ", ArchiveFileType.GZIP },
+
+            {"TAR", ArchiveFileType.TAR },
+            {"GEM", ArchiveFileType.TAR },
+
+            {"XZ", ArchiveFileType.XZ },
+
+            {"BZ2", ArchiveFileType.BZIP2 },
+
+            {"RAR", ArchiveFileType.RAR },
+            {"RAR4", ArchiveFileType.RAR },
+
+            {"7Z", ArchiveFileType.P7ZIP },
+
+            {"DEB", ArchiveFileType.DEB },
+
+            {"AR", ArchiveFileType.AR },
+
+            {"ISO", ArchiveFileType.ISO_9660 },
+
+            {"VHDX", ArchiveFileType.VHDX },
+
+            {"VHD", ArchiveFileType.VHD },
+
+            {"WIM", ArchiveFileType.WIM },
+
+            {"VMDK", ArchiveFileType.VMDK }
+        };
+
+        public static ArchiveFileType DetectFileType(string filename)
+        {
+#pragma warning disable SEC0116 // Path Tampering Unvalidated File Path
+            using var fs = new FileStream(filename, FileMode.Open);
+#pragma warning restore SEC0116 // Path Tampering Unvalidated File Path
+
+            // If you don't pass passthroughStream: true here it will read the entire file into the stream in
+            // FileEntry This way it will only read the bytes minimagic uses
+            var fileEntry = new FileEntry(filename, fs, null, passthroughStream: true);
+            return DetectFileType(fileEntry);
+        }
+
+        /// <summary>
+        ///     Detects the type of a file.
+        /// </summary>
+        /// <param name="fileEntry"> FileEntry containing the file data. </param>
+        /// <returns> </returns>
+        public static ArchiveFileType DetectFileType(FileEntry fileEntry)
+        {
+            if (fileEntry == null)
+            {
+                return ArchiveFileType.UNKNOWN;
+            }
+            var initialPosition = fileEntry.Content.Position;
+            byte[] buffer = new byte[9];
+            if (fileEntry.Content.Length >= 9)
+            {
+                fileEntry.Content.Position = 0;
+                fileEntry.Content.Read(buffer,0,9);
+                fileEntry.Content.Position = initialPosition;
+
+                if (buffer[0] == 0x50 && buffer[1] == 0x4B && buffer[2] == 0x03 && buffer[3] == 0x04)
+                {
+                    return ArchiveFileType.ZIP;
+                }
+
+                if (buffer[0] == 0x1F && buffer[1] == 0x8B)
+                {
+                    return ArchiveFileType.GZIP;
+                }
+
+                if (buffer[0] == 0xFD && buffer[1] == 0x37 && buffer[2] == 0x7A && buffer[3] == 0x58 && buffer[4] == 0x5A && buffer[5] == 0x00)
+                {
+                    return ArchiveFileType.XZ;
+                }
+                if (buffer[0] == 0x42 && buffer[1] == 0x5A && buffer[2] == 0x68)
+                {
+                    return ArchiveFileType.BZIP2;
+                }
+                if ((buffer[0] == 0x52 && buffer[1] == 0x61 && buffer[2] == 0x72 && buffer[3] == 0x21 && buffer[4] == 0x1A && buffer[5] == 0x07 && buffer[6] == 0x00) ||
+                    (buffer[0] == 0x52 && buffer[1] == 0x61 && buffer[2] == 0x72 && buffer[3] == 0x21 && buffer[4] == 0x1A && buffer[5] == 0x07 && buffer[6] == 0x01 && buffer[7] == 0x00))
+                {
+                    return ArchiveFileType.RAR;
+                }
+                if (buffer[0] == 0x37 && buffer[1] == 0x7A && buffer[2] == 0xBC && buffer[3] == 0xAF && buffer[4] == 0x27 && buffer[5] == 0x1C)
+                {
+                    return ArchiveFileType.P7ZIP;
+                }
+                if (Encoding.ASCII.GetString(buffer[0..8]) == "MSWIM\0\0\0" || Encoding.ASCII.GetString(buffer[0..8]) == "WLPWM\0\0\0")
+                {
+                    return ArchiveFileType.WIM;
+                }
+                if (Encoding.ASCII.GetString(buffer[0..4]) == "KDMV")
+                {
+                    fileEntry.Content.Position = 512;
+                    byte[] secondToken = new byte[21];
+                    fileEntry.Content.Read(secondToken,0,21);
+                    fileEntry.Content.Position = initialPosition;
+
+                    if (Encoding.ASCII.GetString(secondToken) == "# Disk DescriptorFile")
+                    {
+                        return ArchiveFileType.VMDK;
+                    }
+                }
+                // some kind of unix Archive https://en.wikipedia.org/wiki/Ar_(Unix)
+                if (buffer[0] == 0x21 && buffer[1] == 0x3c && buffer[2] == 0x61 && buffer[3] == 0x72 && buffer[4] == 0x63 && buffer[5] == 0x68 && buffer[6] == 0x3e)
+                {
+                    // .deb https://manpages.debian.org/unstable/dpkg-dev/deb.5.en.html
+                    fileEntry.Content.Position = 68;
+                    fileEntry.Content.Read(buffer,0,4);
+                    fileEntry.Content.Position = initialPosition;
+
+                    var encoding = new ASCIIEncoding();
+                    if (encoding.GetString(buffer[0..4]) == "2.0\n")
+                    {
+                        return ArchiveFileType.DEB;
+                    }
+                    else
+                    {
+                        byte[] headerBuffer = new byte[60];
+
+                        // Created by GNU ar https://en.wikipedia.org/wiki/Ar_(Unix)#System_V_(or_GNU)_variant
+                        fileEntry.Content.Position = 8;
+                        fileEntry.Content.Read(headerBuffer,0,60);
+                        fileEntry.Content.Position = initialPosition;
+
+                        var size = int.Parse(Encoding.ASCII.GetString(headerBuffer[48..58])); // header size in bytes
+
+                        if (size > 0)
+                        {
+                            // Defined ending characters for a header
+                            if (headerBuffer[58] == '`' && headerBuffer[59] == '\n')
+                            {
+                                return ArchiveFileType.AR;
+                            }
+                        }
+                    }
+                }
+                // https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-VHDX/%5bMS-VHDX%5d.pdf
+                if (Encoding.UTF8.GetString(buffer[0..8]).Equals("vhdxfile"))
+                {
+                    return ArchiveFileType.VHDX;
+                }
+            }
+
+            if (fileEntry.Content.Length >= 262)
+            {
+                fileEntry.Content.Position = 257;
+                fileEntry.Content.Read(buffer,0,5);
+                fileEntry.Content.Position = initialPosition;
+
+                if (buffer[0] == 0x75 && buffer[1] == 0x73 && buffer[2] == 0x74 && buffer[3] == 0x61 && buffer[4] == 0x72)
+                {
+                    return ArchiveFileType.TAR;
+                }
+            }
+
+            // ISO Format https://en.wikipedia.org/wiki/ISO_9660#Overall_structure Reserved space + 1 header
+            if (fileEntry.Content.Length > 32768 + 2048)
+            {
+                fileEntry.Content.Position = 32769;
+                fileEntry.Content.Read(buffer,0,5);
+                fileEntry.Content.Position = initialPosition;
+
+                if (buffer[0] == 'C' && buffer[1] == 'D' && buffer[2] == '0' && buffer[3] == '0' && buffer[4] == '1')
+                {
+                    return ArchiveFileType.ISO_9660;
+                }
+            }
+
+            //https://www.microsoft.com/en-us/download/details.aspx?id=23850 - 'Hard Disk Footer Format'
+            // Unlike other formats the magic string is stored in the footer, which is either the last 511 or 512 bytes
+            // The magic string is Magic string "conectix" (63 6F 6E 65 63 74 69 78)
+            if (fileEntry.Content.Length > 512)
+            {
+                byte[] vhdFooterCookie = new byte[] { 0x63, 0x6F, 0x6E, 0x65, 0x63, 0x74, 0x69, 0x78 };
+
+                fileEntry.Content.Position = fileEntry.Content.Length - 0x200; // Footer position
+                fileEntry.Content.Read(buffer,0,8);
+                fileEntry.Content.Position = initialPosition;
+
+                if (vhdFooterCookie.SequenceEqual(buffer[0..8]))
+                {
+                    return ArchiveFileType.VHD;
+                }
+
+                fileEntry.Content.Position = fileEntry.Content.Length - 0x1FF; //If created on legacy platform footer is 511 bytes instead
+                fileEntry.Content.Read(buffer, 0, 8);
+                fileEntry.Content.Position = initialPosition;
+
+                if (vhdFooterCookie.SequenceEqual(buffer[0..8]))
+                {
+                    return ArchiveFileType.VHD;
+                }
+            }
+
+            // Fall back to file extensions
+            string fileExtension = Path.GetExtension(fileEntry.Name.ToUpperInvariant());
+
+            if (fileExtension.StartsWith("."))
+            {
+                fileExtension = fileExtension.Substring(1);
+            }
+            if (!FileExtensionMap.TryGetValue(fileExtension, out ArchiveFileType fileType))
+            {
+                fileType = ArchiveFileType.UNKNOWN;
+            }
+            return fileType;
+        }
+    }
+}
--- a/RecursiveExtractor/Program.cs
+++ b/RecursiveExtractor/Program.cs
@ -0,0 +1,12 @@
+using System;
+
+namespace RecursiveExtractor
+{
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            Console.WriteLine("Hello World!");
+        }
+    }
+}
--- a/RecursiveExtractor/README.md
+++ b/RecursiveExtractor/README.md
@ -0,0 +1,57 @@
+## RecursiveExtractor
+
+RecursiveExtractor is a general-purpose file extractor.
+
+### Format Support
+
+RecursiveExtractor supports extracting the following types of archives:
+
+* GNU AR
+* BZip2
+* [deb](https://en.wikipedia.org/wiki/Deb_(file_format))
+* ISO
+* tar
+* VHD
+* VHDX
+* VMDK
+* WIM
+* XZip
+* zip
+
+## Using RecursiveExtractor
+
+To use RecursiveExtractor, just instantiate an `Extractor` object and call the `ExtractFile`
+method with either a filename or a byte array. This method will return an IEnumerable
+of FileEntry objects, each one of which will contain the name of the file and its 
+contents, plus some additional metadata. 
+
+```
+using Microsoft.CST.RecursiveExtractor;
+
+...
+
+// Initialize the RecursiveExtractor extractor
+var extractor = new Extractor();
+
+// Extract from an existing file
+foreach (var fileEntry in extractor.ExtractFile("test.zip"))
+{
+    Console.WriteLine(fileEntry.FullPath);
+}
+
+// Extract from a byte array
+byte[] bytes = ...;
+// The "nonexistent.zip" name doesn't really matter, but is used as part of the
+// FileEntry.FullPath string.
+foreach (var fileEntry in extractor.ExtractFile("nonexistent.zip", bytes))
+{
+    Console.WriteLine(fileEntry.FullPath);
+}
+```
+
+## Issues
+
+If you find any issues with RecursiveExtractor, please [open an issue](https://github.com/Microsoft/OSSGadget/issues/new)
+in the [Microsoft/OSSGadget](https://github.com/Microsoft/OSSGadget) repository.
+
+
--- a/RecursiveExtractor/Range.cs
+++ b/RecursiveExtractor/Range.cs
@ -0,0 +1,276 @@
+// https://github.com/dotnet/corefx/blob/1597b894a2e9cac668ce6e484506eca778a85197/src/Common/src/CoreLib/System/Index.cs
+// https://github.com/dotnet/corefx/blob/1597b894a2e9cac668ce6e484506eca778a85197/src/Common/src/CoreLib/System/Range.cs
+
+#if NETSTANDARD2_0
+using System.Runtime.CompilerServices;
+
+namespace System
+{
+    /// <summary>Represent a type can be used to index a collection either from the start or the end.</summary>
+    /// <remarks>
+    /// Index is used by the C# compiler to support the new index syntax
+    /// <code>
+    /// int[] someArray = new int[5] { 1, 2, 3, 4, 5 } ;
+    /// int lastElement = someArray[^1]; // lastElement = 5
+    /// </code>
+    /// </remarks>
+    internal readonly struct Index : IEquatable<Index>
+    {
+        private readonly int _value;
+
+        /// <summary>Construct an Index using a value and indicating if the index is from the start or from the end.</summary>
+        /// <param name="value">The index value. it has to be zero or positive number.</param>
+        /// <param name="fromEnd">Indicating if the index is from the start or from the end.</param>
+        /// <remarks>
+        /// If the Index constructed from the end, index value 1 means pointing at the last element and index value 0 means pointing at beyond last element.
+        /// </remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Index(int value, bool fromEnd = false)
+        {
+            if (value < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(value), "value must be non-negative");
+            }
+
+            if (fromEnd)
+                _value = ~value;
+            else
+                _value = value;
+        }
+
+        // The following private constructors mainly created for perf reason to avoid the checks
+        private Index(int value)
+        {
+            _value = value;
+        }
+
+        /// <summary>Create an Index pointing at first element.</summary>
+        public static Index Start => new Index(0);
+
+        /// <summary>Create an Index pointing at beyond last element.</summary>
+        public static Index End => new Index(~0);
+
+        /// <summary>Create an Index from the start at the position indicated by the value.</summary>
+        /// <param name="value">The index value from the start.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Index FromStart(int value)
+        {
+            if (value < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(value), "value must be non-negative");
+            }
+
+            return new Index(value);
+        }
+
+        /// <summary>Create an Index from the end at the position indicated by the value.</summary>
+        /// <param name="value">The index value from the end.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Index FromEnd(int value)
+        {
+            if (value < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(value), "value must be non-negative");
+            }
+
+            return new Index(~value);
+        }
+
+        /// <summary>Returns the index value.</summary>
+        public int Value
+        {
+            get
+            {
+                if (_value < 0)
+                {
+                    return ~_value;
+                }
+                else
+                {
+                    return _value;
+                }
+            }
+        }
+
+        /// <summary>Indicates whether the index is from the start or the end.</summary>
+        public bool IsFromEnd => _value < 0;
+
+        /// <summary>Calculate the offset from the start using the giving collection length.</summary>
+        /// <param name="length">The length of the collection that the Index will be used with. length has to be a positive value</param>
+        /// <remarks>
+        /// For performance reason, we don't validate the input length parameter and the returned offset value against negative values.
+        /// we don't validate either the returned offset is greater than the input length.
+        /// It is expected Index will be used with collections which always have non negative length/count. If the returned offset is negative and
+        /// then used to index a collection will get out of range exception which will be same affect as the validation.
+        /// </remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int GetOffset(int length)
+        {
+            var offset = _value;
+            if (IsFromEnd)
+            {
+                // offset = length - (~value)
+                // offset = length + (~(~value) + 1)
+                // offset = length + value + 1
+
+                offset += length + 1;
+            }
+            return offset;
+        }
+
+        /// <summary>Indicates whether the current Index object is equal to another object of the same type.</summary>
+        /// <param name="value">An object to compare with this object</param>
+        public override bool Equals(object? value) => value is Index && _value == ((Index)value)._value;
+
+        /// <summary>Indicates whether the current Index object is equal to another Index object.</summary>
+        /// <param name="other">An object to compare with this object</param>
+        public bool Equals(Index other) => _value == other._value;
+
+        /// <summary>Returns the hash code for this instance.</summary>
+        public override int GetHashCode() => _value;
+
+        /// <summary>Converts integer number to an Index.</summary>
+        public static implicit operator Index(int value) => FromStart(value);
+
+        /// <summary>Converts the value of the current Index object to its equivalent string representation.</summary>
+        public override string ToString()
+        {
+            if (IsFromEnd)
+                return "^" + ((uint)Value).ToString();
+
+            return ((uint)Value).ToString();
+        }
+    }
+
+    /// <summary>Represent a range has start and end indexes.</summary>
+    /// <remarks>
+    /// Range is used by the C# compiler to support the range syntax.
+    /// <code>
+    /// int[] someArray = new int[5] { 1, 2, 3, 4, 5 };
+    /// int[] subArray1 = someArray[0..2]; // { 1, 2 }
+    /// int[] subArray2 = someArray[1..^0]; // { 2, 3, 4, 5 }
+    /// </code>
+    /// </remarks>
+    internal readonly struct Range : IEquatable<Range>
+    {
+        /// <summary>Represent the inclusive start index of the Range.</summary>
+        public Index Start { get; }
+
+        /// <summary>Represent the exclusive end index of the Range.</summary>
+        public Index End { get; }
+
+        /// <summary>Construct a Range object using the start and end indexes.</summary>
+        /// <param name="start">Represent the inclusive start index of the range.</param>
+        /// <param name="end">Represent the exclusive end index of the range.</param>
+        public Range(Index start, Index end)
+        {
+            Start = start;
+            End = end;
+        }
+
+        /// <summary>Indicates whether the current Range object is equal to another object of the same type.</summary>
+        /// <param name="value">An object to compare with this object</param>
+        public override bool Equals(object? value) =>
+            value is Range r &&
+            r.Start.Equals(Start) &&
+            r.End.Equals(End);
+
+        /// <summary>Indicates whether the current Range object is equal to another Range object.</summary>
+        /// <param name="other">An object to compare with this object</param>
+        public bool Equals(Range other) => other.Start.Equals(Start) && other.End.Equals(End);
+
+        /// <summary>Returns the hash code for this instance.</summary>
+        public override int GetHashCode()
+        {
+            return Start.GetHashCode() * 31 + End.GetHashCode();
+        }
+
+        /// <summary>Converts the value of the current Range object to its equivalent string representation.</summary>
+        public override string ToString()
+        {
+            return Start + ".." + End;
+        }
+
+        /// <summary>Create a Range object starting from start index to the end of the collection.</summary>
+        public static Range StartAt(Index start) => new Range(start, Index.End);
+
+        /// <summary>Create a Range object starting from first element in the collection to the end Index.</summary>
+        public static Range EndAt(Index end) => new Range(Index.Start, end);
+
+        /// <summary>Create a Range object starting from first element to the end.</summary>
+        public static Range All => new Range(Index.Start, Index.End);
+
+        /// <summary>Calculate the start offset and length of range object using a collection length.</summary>
+        /// <param name="length">The length of the collection that the range will be used with. length has to be a positive value.</param>
+        /// <remarks>
+        /// For performance reason, we don't validate the input length parameter against negative values.
+        /// It is expected Range will be used with collections which always have non negative length/count.
+        /// We validate the range is inside the length scope though.
+        /// </remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public (int Offset, int Length) GetOffsetAndLength(int length)
+        {
+            int start;
+            var startIndex = Start;
+            if (startIndex.IsFromEnd)
+                start = length - startIndex.Value;
+            else
+                start = startIndex.Value;
+
+            int end;
+            var endIndex = End;
+            if (endIndex.IsFromEnd)
+                end = length - endIndex.Value;
+            else
+                end = endIndex.Value;
+
+            if ((uint)end > (uint)length || (uint)start > (uint)end)
+            {
+                throw new ArgumentOutOfRangeException(nameof(length));
+            }
+
+            return (start, end - start);
+        }
+    }
+}
+
+namespace System.Runtime.CompilerServices
+{
+    internal static class RuntimeHelpers
+    {
+        /// <summary>
+        /// Slices the specified array using the specified range.
+        /// </summary>
+        public static T[] GetSubArray<T>(T[] array, Range range)
+        {
+            if (array == null)
+            {
+                throw new ArgumentNullException(nameof(array));
+            }
+
+            (var offset, var length) = range.GetOffsetAndLength(array.Length);
+
+            if (default(T) != null || typeof(T[]) == array.GetType())
+            {
+                // We know the type of the array to be exactly T[].
+
+                if (length == 0)
+                {
+                    return Array.Empty<T>();
+                }
+
+                var dest = new T[length];
+                Array.Copy(array, offset, dest, 0, length);
+                return dest;
+            }
+            else
+            {
+                // The array is actually a U[] where U:T.
+                var dest = (T[])Array.CreateInstance(array.GetType().GetElementType(), length);
+                Array.Copy(array, offset, dest, 0, length);
+                return dest;
+            }
+        }
+    }
+}
+#endif
--- a/RecursiveExtractor/RecursiveExtractor.csproj
+++ b/RecursiveExtractor/RecursiveExtractor.csproj
@ -0,0 +1,53 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>netstandard2.0;netstandard2.1</TargetFrameworks>
+    <RootNamespace>Microsoft.CST.OpenSource</RootNamespace>
+    <Version>0.0.0-placeholder</Version>
+    <Company>Microsoft</Company>
+    <Authors>Microsoft</Authors>
+    <copyright>© Microsoft Corporation. All rights reserved.</copyright> 
+    <RepositoryType>GitHub</RepositoryType>
+    <RepositoryUrl>https://github.com/Microsoft/OSSGadget</RepositoryUrl>
+    <Configurations>Debug;Release</Configurations>
+    <LangVersion>8.0</LangVersion>
+    <Nullable>Enable</Nullable>
+    <GeneratePackageOnBuild>false</GeneratePackageOnBuild>
+    <Description>RecursiveExtractor is able to process the following formats: ar, bzip2, deb, gzip, iso, tar, vhd, vhdx, vmdk, wim, xzip, and zip. RecursiveExtractor automatically detects the archive type and fails gracefully when attempting to process malformed content.</Description>
+    <PackageId>Microsoft.CST.RecursiveExtractor</PackageId>
+    <PackageTags>unzip extract extractor</PackageTags>
+    <PackageVersion>0.0.0-placeholder</PackageVersion>
+    <PackageProjectUrl>https://github.com/microsoft/OSSGadget</PackageProjectUrl>
+    <PackageIcon>icon-128.png</PackageIcon>
+    <PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
+    <IncludeSymbols>true</IncludeSymbols>
+    <SymbolPackageFormat>snupkg</SymbolPackageFormat>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="DiscUtils.Btrfs" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Core" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Ext" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Fat" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.HfsPlus" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Iso9660" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Ntfs" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Vhd" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Vhdx" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Vmdk" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Wim" Version="0.15.1-ci0002" />
+    <PackageReference Include="DiscUtils.Xfs" Version="0.15.1-ci0002" />
+    <PackageReference Include="NLog" Version="4.7.2" />
+    <PackageReference Include="SharpCompress" Version="0.25.1" />
+    <PackageReference Include="SharpZipLib" Version="1.2.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Update="Nerdbank.GitVersioning" Version="3.1.91" />
+  </ItemGroup>
+  
+  <ItemGroup>
+    <None Include="..\..\..\LICENSE.txt" Pack="true" PackagePath="" />
+    <None Include="..\..\..\icon-128.png" Pack="true" PackagePath="" />
+  </ItemGroup>
+</Project>