This commit is contained in:
Gabe Stocco 2020-07-31 08:49:16 -07:00
Родитель b76ef532ee
Коммит a2acd2635b
46 изменённых файлов: 12098 добавлений и 21 удалений

5
.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,5 @@
RecursiveExtractor/bin
RecursiveExtractor/obj
RecursiveExtractor.Tests/bin
RecursiveExtractor.Tests/obj
.vs

21
LICENSE
Просмотреть файл

@ -1,21 +0,0 @@
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

8
PRIVACY.md Normal file
Просмотреть файл

@ -0,0 +1,8 @@
# Privacy and Telemetry Notice
## Data Collection
The Recursive Extractor software itself does *not* collect information about your
use of the software, and therefore does not send such information to any source,
including to Microsoft. Nevertheless, our privacy statement is located at
https://go.microsoft.com/fwlink/?LinkID=824704.

Просмотреть файл

@ -0,0 +1,203 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
using Microsoft.CST.OpenSource.RecursiveExtractor;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using NLog;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Microsoft.CST.OpenSource.Tests
{
[TestClass]
public class ExtractorTests
{
[DataTestMethod]
[DataRow("Shared.zip", false)]
[DataRow("Shared.zip", true)]
[DataRow("Shared.7z", false)]
[DataRow("Shared.7z", true)]
[DataRow("Shared.Tar", false)]
[DataRow("Shared.Tar", true)]
[DataRow("Shared.rar", false)]
[DataRow("Shared.rar", true)]
[DataRow("Shared.rar4", false)]
[DataRow("Shared.rar4", true)]
[DataRow("Shared.tar.bz2", false)]
[DataRow("Shared.tar.bz2", true)]
[DataRow("Shared.tar.gz", false)]
[DataRow("Shared.tar.gz", true)]
[DataRow("Shared.tar.xz", false)]
[DataRow("Shared.tar.xz", true)]
[DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", true, 6)]
[DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", false, 6)]
[DataRow("Shared.a", false, 1)]
[DataRow("Shared.a", true, 1)]
[DataRow("Shared.deb", false)]
[DataRow("Shared.deb", true)]
[DataRow("Shared.ar", false)]
[DataRow("Shared.ar", true)]
[DataRow("Shared.iso", false)]
[DataRow("Shared.iso", true)]
[DataRow("Shared.vhd", false, 29)] // 26 + Some invisible system files
[DataRow("Shared.vhd", true, 29)]
[DataRow("Shared.vhdx", false)]
[DataRow("Shared.vhdx", true)]
[DataRow("Shared.wim", false)]
[DataRow("Shared.wim", true)]
[DataRow("Empty.vmdk", false, 0)]
[DataRow("Empty.vmdk", true, 0)]
[DataRow("TextFile.md", false, 1)]
[DataRow("TextFile.md", true, 1)]
public void ExtractArchive(string fileName, bool parallel, int expectedNumFiles = 26)
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
var results = extractor.ExtractFile(path, parallel).ToList();
Assert.IsTrue(results.Count() == expectedNumFiles);
}
[DataTestMethod]
[DataRow("Shared.zip", false)]
[DataRow("Shared.zip", true)]
[DataRow("Shared.7z", false)]
[DataRow("Shared.7z", true)]
[DataRow("Shared.Tar", false)]
[DataRow("Shared.Tar", true)]
[DataRow("Shared.rar", false)]
[DataRow("Shared.rar", true)]
[DataRow("Shared.rar4", false)]
[DataRow("Shared.rar4", true)]
[DataRow("Shared.tar.bz2", false)]
[DataRow("Shared.tar.bz2", true)]
[DataRow("Shared.tar.gz", false)]
[DataRow("Shared.tar.gz", true)]
[DataRow("Shared.tar.xz", false)]
[DataRow("Shared.tar.xz", true)]
[DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", true, 6)]
[DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", false, 6)]
[DataRow("Shared.a", false, 1)]
[DataRow("Shared.a", true, 1)]
[DataRow("Shared.deb", false)]
[DataRow("Shared.deb", true)]
[DataRow("Shared.ar", false)]
[DataRow("Shared.ar", true)]
[DataRow("Shared.iso", false)]
[DataRow("Shared.iso", true)]
[DataRow("Shared.vhd", false, 29)] // 26 + Some invisible system files
[DataRow("Shared.vhd", true, 29)]
[DataRow("Shared.vhdx", false)]
[DataRow("Shared.vhdx", true)]
[DataRow("Shared.wim", false)]
[DataRow("Shared.wim", true)]
[DataRow("Empty.vmdk", false, 0)]
[DataRow("Empty.vmdk", true, 0)]
[DataRow("TextFile.md", false, 1)]
[DataRow("TextFile.md", true, 1)]
public void ExtractArchiveFromStream(string fileName, bool parallel, int expectedNumFiles = 26)
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
using var stream = new FileStream(path, FileMode.Open);
var results = extractor.ExtractStream(path, stream, parallel).ToList();
Assert.IsTrue(results.Count() == expectedNumFiles);
stream.Close();
}
[DataTestMethod]
[DataRow("Nested.Zip", false, 26 * 8)]
[DataRow("Nested.Zip", true, 26 * 8)]
public void ExtractNestedArchive(string fileName, bool parallel, int expectedNumFiles)
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
var results = extractor.ExtractFile(path, parallel);
Assert.IsTrue(results.Count() == expectedNumFiles);
}
[DataTestMethod]
[DataRow("Shared.zip", ArchiveFileType.ZIP)]
[DataRow("Shared.7z", ArchiveFileType.P7ZIP)]
[DataRow("Shared.Tar", ArchiveFileType.TAR)]
[DataRow("Shared.rar", ArchiveFileType.RAR)]
[DataRow("Shared.rar4", ArchiveFileType.RAR)]
[DataRow("Shared.tar.bz2", ArchiveFileType.BZIP2)]
[DataRow("Shared.tar.gz", ArchiveFileType.GZIP)]
[DataRow("Shared.tar.xz", ArchiveFileType.XZ)]
[DataRow("sysvbanner_1.0-17fakesync1_amd64.deb", ArchiveFileType.DEB)]
[DataRow("Shared.a", ArchiveFileType.UNKNOWN)]
[DataRow("Shared.deb", ArchiveFileType.DEB)]
[DataRow("Shared.ar", ArchiveFileType.AR)]
[DataRow("Shared.iso", ArchiveFileType.ISO_9660)]
[DataRow("Shared.vhd", ArchiveFileType.VHD)]
[DataRow("Shared.vhdx", ArchiveFileType.VHDX)]
[DataRow("Shared.wim", ArchiveFileType.WIM)]
[DataRow("Empty.vmdk", ArchiveFileType.VMDK)]
[DataRow("TextFile.md", ArchiveFileType.UNKNOWN)]
public void TestMiniMagic(string fileName, ArchiveFileType expectedArchiveFileType)
{
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
using FileStream fs = new FileStream(path, FileMode.Open);
// Test just based on the content
var fileEntry = new FileEntry("NoName", fs);
Assert.IsTrue(MiniMagic.DetectFileType(fileEntry) == expectedArchiveFileType);
Assert.IsTrue(fileEntry.Content.Position == 0);
// Should also work if the stream doesn't start at 0
fileEntry.Content.Position = 10;
Assert.IsTrue(MiniMagic.DetectFileType(fileEntry) == expectedArchiveFileType);
Assert.IsTrue(fileEntry.Content.Position == 10);
// We should also detect just on file names if the content doesn't match
var nameOnlyEntry = new FileEntry(fileName, new MemoryStream());
Assert.IsTrue(MiniMagic.DetectFileType(nameOnlyEntry) == expectedArchiveFileType);
}
[DataTestMethod]
[DataRow("droste.zip", false)]
[DataRow("droste.zip", true)]
[DataRow("10GB.7z.bz2", false)]
[DataRow("10GB.7z.bz2", true)]
[DataRow("10GB.gz.bz2", false)]
[DataRow("10GB.gz.bz2", true)]
[DataRow("10GB.rar.bz2", false)]
[DataRow("10GB.rar.bz2", true)]
[DataRow("10GB.xz.bz2", false)]
[DataRow("10GB.xz.bz2", true)]
[DataRow("10GB.zip.bz2", false)]
[DataRow("10GB.zip.bz2", true)]
[DataRow("zblg.zip", false)]
[DataRow("zblg.zip", true)]
[DataRow("zbsm.zip", false)]
[DataRow("zbsm.zip", true)]
[DataRow("zbxl.zip", false)]
[DataRow("zbxl.zip", true)]
public void TestQuineBombs(string fileName, bool parallel)
{
var extractor = new Extractor();
var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", fileName);
IEnumerable<FileEntry> results;
try
{
results = extractor.ExtractFile(path, parallel).ToList();
// Getting here means we didnt catch the bomb
}
// We should throw an overflow exception when we detect a quine or bomb
catch (Exception e) when (
e is OverflowException)
{
return;
}
catch (Exception e)
{
Logger.Debug(e, "Shouldn't hit other exceptions in this test.");
}
// Getting here means we didnt catch the bomb
Assert.Fail();
}
protected static readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
}
}

Просмотреть файл

@ -0,0 +1,124 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
<PackageReference Include="MSTest.TestAdapter" Version="2.1.0" />
<PackageReference Include="MSTest.TestFramework" Version="2.1.0" />
<PackageReference Include="DiscUtils.Btrfs" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.HfsPlus" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.SquashFs" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Xfs" Version="0.15.1-ci0002" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.0-preview-20200428-01" />
<PackageReference Include="MSTest.TestAdapter" Version="2.1.1" />
<PackageReference Include="MSTest.TestFramework" Version="2.1.1" />
<PackageReference Include="coverlet.collector" Version="1.2.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Sarif.Sdk" Version="2.2.5" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\RecursiveExtractor\RecursiveExtractor.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="TestData\10GB.7z.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\10GB.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\10GB.gz.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\10GB.rar.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\10GB.xz.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\10GB.zip.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\droste.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Empty.vmdk">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Nested.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TextFile.md">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.7z">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.a">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.ar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.deb">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.iso">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.rar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.rar4">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.tar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.tar.bz2">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.tar.gz">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.tar.xz">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.vhd">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.vhdx">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.wim">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\sysvbanner_1.0-17fakesync1_amd64.deb">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\zblg.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\Shared.bsd.ar">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\zbsm.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\zbxl.zip">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<PackageReference Update="Nerdbank.GitVersioning" Version="3.1.91" />
</ItemGroup>
</Project>

Двоичные данные
RecursiveExtractor.Tests/TestData/10GB.7z.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/10GB.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/10GB.gz.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/10GB.rar.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/10GB.xz.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/10GB.zip.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Empty.vmdk Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Nested.zip Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.7z Normal file

Двоичный файл не отображается.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.bsd.ar Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.deb Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.iso Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.rar Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.rar4 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.tar Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.tar.bz2 Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.tar.gz Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.tar.xz Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.vhd Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.vhdx Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.wim Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/Shared.zip Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,57 @@
## RecursiveExtractor
RecursiveExtractor is a general-purpose file extractor.
### Format Support
RecursiveExtractor supports extracting the following types of archives:
* GNU AR
* BZip2
* [deb](https://en.wikipedia.org/wiki/Deb_(file_format))
* ISO
* tar
* VHD
* VHDX
* VMDK
* WIM
* XZip
* zip
## Using RecursiveExtractor
To use RecursiveExtractor, just instantiate an `Extractor` object and call the `ExtractFile`
method with either a filename or a byte array. This method will return an IEnumerable
of FileEntry objects, each one of which will contain the name of the file and its
contents, plus some additional metadata.
```
using Microsoft.CST.RecursiveExtractor;
...
// Initialize the RecursiveExtractor extractor
var extractor = new Extractor();
// Extract from an existing file
foreach (var fileEntry in extractor.ExtractFile("test.zip"))
{
Console.WriteLine(fileEntry.FullPath);
}
// Extract from a byte array
byte[] bytes = ...;
// The "nonexistent.zip" name doesn't really matter, but is used as part of the
// FileEntry.FullPath string.
foreach (var fileEntry in extractor.ExtractFile("nonexistent.zip", bytes))
{
Console.WriteLine(fileEntry.FullPath);
}
```
## Issues
If you find any issues with RecursiveExtractor, please [open an issue](https://github.com/Microsoft/OSSGadget/issues/new)
in the [Microsoft/OSSGadget](https://github.com/Microsoft/OSSGadget) repository.

Двоичные данные
RecursiveExtractor.Tests/TestData/droste.zip Normal file

Двоичный файл не отображается.

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/zblg.zip Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/zbsm.zip Normal file

Двоичный файл не отображается.

Двоичные данные
RecursiveExtractor.Tests/TestData/zbxl.zip Normal file

Двоичный файл не отображается.

31
RecursiveExtractor.sln Normal file
Просмотреть файл

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30309.148
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor", "RecursiveExtractor\RecursiveExtractor.csproj", "{A7F7492B-60E0-468C-B267-BA60EC131E86}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RecursiveExtractor.Tests", "RecursiveExtractor.Tests\RecursiveExtractor.Tests.csproj", "{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A7F7492B-60E0-468C-B267-BA60EC131E86}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A7F7492B-60E0-468C-B267-BA60EC131E86}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A7F7492B-60E0-468C-B267-BA60EC131E86}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A7F7492B-60E0-468C-B267-BA60EC131E86}.Release|Any CPU.Build.0 = Release|Any CPU
{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BB4A44C9-47E4-4BF5-A04A-D3A65E46D115}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {38234B7F-8828-462C-8C2A-747A4A195D7F}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -0,0 +1,302 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Microsoft.CST.OpenSource.RecursiveExtractor
{
/**
* Gnu Ar file parser. Supports SystemV style lookup tables in both 32 and 64 bit mode as well as BSD and GNU formatted .ars.
*/
public static class ArFile
{
// Simple method which returns a the file entries. We can't make this a continuation because we're
// using spans.
public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry)
{
if (fileEntry == null)
{
yield break;
}
// First, cut out the file signature (8 bytes)
fileEntry.Content.Position = 8;
var filenameLookup = new Dictionary<int, string>();
byte[] headerBuffer = new byte[60];
while (true)
{
if (fileEntry.Content.Length - fileEntry.Content.Position < 60) // The header for each file is 60 bytes
{
break;
}
fileEntry.Content.Read(headerBuffer, 0, 60);
if (long.TryParse(Encoding.ASCII.GetString(headerBuffer[48..58]), out long size))// header size in bytes
{
var filename = Encoding.ASCII.GetString(headerBuffer[0..16]).Trim();
// Header with list of file names
if (filename.StartsWith("//"))
{
// This should just be a list of names, size should be safe to load in memory and cast
// to int
var fileNamesBytes = new byte[size];
fileEntry.Content.Read(fileNamesBytes, 0, (int)size);
var name = new StringBuilder();
var index = 0;
for (int i = 0; i < fileNamesBytes.Length; i++)
{
if (fileNamesBytes[i] == '/')
{
filenameLookup.Add(index, name.ToString());
name.Clear();
}
else if (fileNamesBytes[i] == '\n')
{
// The next filename would start on the next line
index = i + 1;
}
else
{
name.Append((char)fileNamesBytes[i]);
}
}
}
else if (filename.StartsWith("#1/"))
{
// We should be positioned right after the header
if (int.TryParse(filename.Substring(3), out int nameLength))
{
byte[] nameSpan = new byte[nameLength];
// This should move us right to the file
fileEntry.Content.Read(nameSpan,0,nameLength);
var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
// The name length is included in the total size reported in the header
CopyStreamBytes(fileEntry.Content, entryStream, size - nameLength);
yield return new FileEntry(Encoding.ASCII.GetString(nameSpan), entryStream, fileEntry, true);
}
}
else if (filename.Equals('/'))
{
// System V symbol lookup table N = 32 bit big endian integers (entries in table) then
// N 32 bit big endian integers representing prositions in archive then N \0
// terminated strings "symbol name" (possibly filename)
var tableContents = new byte[size];
fileEntry.Content.Read(tableContents,0,(int)size);
var numEntries = IntFromBigEndianBytes(tableContents[0..4]);
var filePositions = new int[numEntries];
for (int i = 0; i < numEntries; i++)
{
var start = (i + 1) * 4;
var end = start + 4;
filePositions[i] = IntFromBigEndianBytes(tableContents[start..end]);
}
var index = 0;
var sb = new StringBuilder();
var fileEntries = new List<(int, string)>();
for (int i = 0; i < tableContents.Length; i++)
{
if (tableContents[i] == '\0')
{
fileEntries.Add((filePositions[index++], sb.ToString()));
sb.Clear();
}
else
{
sb.Append(tableContents[i]);
}
}
foreach (var entry in fileEntries)
{
fileEntry.Content.Position = entry.Item1;
fileEntry.Content.Read(headerBuffer, 0, 60);
if (long.TryParse(Encoding.ASCII.GetString(headerBuffer[48..58]), out long innerSize))// header size in bytes
{
if (filename.StartsWith("/"))
{
if (int.TryParse(filename[1..], out int innerIndex))
{
try
{
filename = filenameLookup[innerIndex];
}
catch (Exception)
{
Logger.Debug("Expected to find a filename at index {0}", innerIndex);
}
}
}
else
{
filename = entry.Item2;
}
var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
yield return new FileEntry(filename, entryStream, fileEntry);
}
}
fileEntry.Content.Position = fileEntry.Content.Length - 1;
}
else if (filename.Equals("/SYM64/"))
{
// https://en.wikipedia.org/wiki/Ar_(Unix)#System_V_(or_GNU)_variant GNU lookup table
// (archives larger than 4GB) N = 64 bit big endian integers (entries in table) then N
// 64 bit big endian integers representing positions in archive then N \0 terminated
// strings "symbol name" (possibly filename)
var buffer = new byte[8];
fileEntry.Content.Read(buffer, 0, 8);
var numEntries = Int64FromBigEndianBytes(buffer);
var filePositions = new long[numEntries];
for (int i = 0; i < numEntries; i++)
{
fileEntry.Content.Read(buffer, 0, 8);
filePositions[i] = Int64FromBigEndianBytes(buffer);
}
var index = 0;
var sb = new StringBuilder();
var fileEntries = new List<(long, string)>();
while (fileEntry.Content.Position < size)
{
fileEntry.Content.Read(buffer, 0, 1);
if (buffer[0] == '\0')
{
fileEntries.Add((filePositions[index++], sb.ToString()));
sb.Clear();
}
else
{
sb.Append(buffer[0]);
}
}
foreach (var innerEntry in fileEntries)
{
fileEntry.Content.Position = innerEntry.Item1;
fileEntry.Content.Read(headerBuffer, 0, 60);
if (long.TryParse(Encoding.ASCII.GetString(headerBuffer[48..58]), out long innerSize))// header size in bytes
{
if (filename.StartsWith("/"))
{
if (int.TryParse(filename[1..], out int innerIndex))
{
try
{
filename = filenameLookup[innerIndex];
}
catch (Exception)
{
Logger.Debug("Expected to find a filename at index {0}", innerIndex);
}
}
}
else
{
filename = innerEntry.Item2;
}
var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
CopyStreamBytes(fileEntry.Content, entryStream, innerSize);
yield return new FileEntry(filename, entryStream, fileEntry);
}
}
fileEntry.Content.Position = fileEntry.Content.Length - 1;
}
else if (filename.StartsWith("/"))
{
if (int.TryParse(filename[1..], out int index))
{
try
{
filename = filenameLookup[index];
}
catch (Exception)
{
Logger.Debug("Expected to find a filename at index {0}", index);
}
}
var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
CopyStreamBytes(fileEntry.Content, entryStream, size);
yield return new FileEntry(filename, entryStream, fileEntry, true);
}
else
{
var entryStream = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
CopyStreamBytes(fileEntry.Content, entryStream, size);
yield return new FileEntry(filename, entryStream, fileEntry, true);
}
}
else
{
// Not a valid header, we couldn't parse the file size.
yield break;
}
// Entries are padded on even byte boundaries https://docs.oracle.com/cd/E36784_01/html/E36873/ar.h-3head.html
fileEntry.Content.Position = fileEntry.Content.Position % 2 == 1 ? fileEntry.Content.Position + 1 : fileEntry.Content.Position;
}
}
public static long Int64FromBigEndianBytes(byte[] value)
{
if (value.Length == 8)
{
if (BitConverter.IsLittleEndian)
{
Array.Reverse(value);
}
return BitConverter.ToInt64(value,0);
}
return -1;
}
public static int IntFromBigEndianBytes(byte[] value)
{
if (value.Length == 4)
{
if (BitConverter.IsLittleEndian)
{
Array.Reverse(value);
}
return BitConverter.ToInt32(value,0);
}
return -1;
}
internal static void CopyStreamBytes(Stream input, Stream output, long bytes)
{
byte[] buffer = new byte[32768];
long read;
while (bytes > 0 &&
(read = input.Read(buffer, 0, (int)Math.Min(buffer.Length, bytes))) > 0)
{
output.Write(buffer, 0, (int)read);
bytes -= read;
}
}
private static NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
}
}

Просмотреть файл

@ -0,0 +1,48 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Microsoft.CST.OpenSource.RecursiveExtractor
{
/**
* Very simple implementation of an .Deb format parser, needed for Debian .deb archives.
* See: https://en.wikipedia.org/wiki/Deb_(file_format)#/media/File:Deb_File_Structure.svg
*/
public static class DebArchiveFile
{
public static IEnumerable<FileEntry> GetFileEntries(FileEntry fileEntry)
{
if (fileEntry == null)
{
yield break;
}
// First, cut out the file signature (8 bytes) and global header (64 bytes)
fileEntry.Content.Position = 72;
var headerBytes = new byte[60];
while (true)
{
if (fileEntry.Content.Length - fileEntry.Content.Position < 60) // The header for each file is 60 bytes
{
break;
}
fileEntry.Content.Read(headerBytes, 0, 60);
var filename = Encoding.ASCII.GetString(headerBytes[0..16]).Trim(); // filename is 16 bytes
var fileSizeBytes = headerBytes[48..58]; // File size is decimal-encoded, 10 bytes long
if (int.TryParse(Encoding.ASCII.GetString(fileSizeBytes).Trim(), out int fileSize))
{
var entryContent = new byte[fileSize];
fileEntry.Content.Read(entryContent, 0, fileSize);
using var stream = new MemoryStream(entryContent);
yield return new FileEntry(filename, stream, fileEntry);
}
else
{
break;
}
}
}
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,40 @@
using System;
namespace Microsoft.CST.OpenSource.RecursiveExtractor
{
public class ExtractorOptions
{
/// <summary>
/// The maximum number of items to take at once if parallelization is enabled
/// </summary>
public int BatchSize { get; set; } = 50;
/// <summary>
/// Enable timing limit for processing.
/// </summary>
public bool EnableTiming { get; set; } = false;
/// <summary>
/// If an archive cannot be extracted return a single file entry for the archive itself.
/// </summary>
public bool ExtractSelfOnFail { get; set; } = true;
/// <summary>
/// The maximum number of bytes to extract from the archive and all embedded archives. Set to 0 to
/// remove limit. Note that MaxExpansionRatio may also apply. Defaults to 0.
/// </summary>
public long MaxExtractedBytes { get; set; } = 0;
/// <summary>
/// By default, stop extracting if the total number of bytes seen is greater than this multiple of
/// the original archive size. Used to avoid denial of service (zip bombs and the like).
/// </summary>
public double MaxExtractedBytesRatio { get; set; } = 60.0;
/// <summary>
/// If timing is enabled, stop processing after this time span. Used to avoid denial of service
/// (zip bombs and the like).
/// </summary>
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(300);
}
}

Просмотреть файл

@ -0,0 +1,121 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
using System;
using System.IO;
using System.Threading.Tasks;
namespace Microsoft.CST.OpenSource.RecursiveExtractor
{
public class FileEntry
{
/// <summary>
/// Constructs a FileEntry object from a Stream. If passthroughStream is set to true, and the
/// stream is seekable, it will directly use inputStream. If passthroughStream is false or it is
/// not seekable, it will copy the full contents of inputStream to a new internal FileStream and
/// attempt to reset the position of inputstream. The finalizer for this class Disposes the
/// contained Stream.
/// </summary>
/// <param name="name"> </param>
/// <param name="parentPath"> </param>
/// <param name="inputStream"> </param>
/// <param name="parent"> </param>
/// <param name="passthroughStream"> </param>
public FileEntry(string name, Stream inputStream, FileEntry? parent = null, bool passthroughStream = false)
{
Parent = parent;
Name = name;
Passthrough = passthroughStream;
if (parent == null)
{
ParentPath = null;
FullPath = Name;
}
else
{
ParentPath = parent.FullPath;
FullPath = $"{ParentPath}{Path.PathSeparator}{Name}";
}
if (inputStream == null)
{
throw new ArgumentNullException(nameof(inputStream));
}
if (!inputStream.CanRead)
{
Content = new MemoryStream();
}
// We want to be able to seek, so ensure any passthrough stream is Seekable
if (passthroughStream && inputStream.CanSeek)
{
Content = inputStream;
if (Content.Position != 0)
{
Content.Position = 0;
}
}
else
{
// Back with a temporary filestream, this is optimized to be cached in memory when possible
// automatically by .NET
Content = new FileStream(Path.GetTempFileName(), FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite, 4096, FileOptions.DeleteOnClose);
long? initialPosition = null;
if (inputStream.CanSeek)
{
initialPosition = inputStream.Position;
if (inputStream.Position != 0)
{
inputStream.Position = 0;
}
}
try
{
inputStream.CopyTo(Content);
}
catch (NotSupportedException)
{
try
{
inputStream.CopyToAsync(Content).RunSynchronously();
}
catch (Exception f)
{
Logger.Debug("Failed to copy stream from {0} ({1}:{2})", FullPath, f.GetType(), f.Message);
}
}
catch(Exception e)
{
Logger.Debug("Failed to copy stream from {0} ({1}:{2})", FullPath, e.GetType(), e.Message);
}
if (inputStream.CanSeek && inputStream.Position != 0)
{
inputStream.Position = initialPosition ?? 0;
}
Content.Position = 0;
}
}
public Stream Content { get; }
public string FullPath { get; }
public string Name { get; }
public FileEntry? Parent { get; }
public string? ParentPath { get; }
private readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();
~FileEntry()
{
if (!Passthrough)
{
Content?.Dispose();
}
}
public bool Passthrough { get; }
}
}

Просмотреть файл

@ -0,0 +1,260 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace Microsoft.CST.OpenSource.RecursiveExtractor
{
/// <summary>
/// ArchiveTypes are the kinds of archive files that this module can process.
/// </summary>
public enum ArchiveFileType
{
UNKNOWN,
ZIP,
TAR,
XZ,
GZIP,
BZIP2,
RAR,
P7ZIP,
DEB,
AR,
ISO_9660,
VHDX,
VHD,
WIM,
VMDK
}
/// <summary>
/// MiniMagic is a tiny implementation of a file type identifier based on binary signatures.
/// </summary>
public static class MiniMagic
{
/// <summary>
/// Fallback using file extensions in case the binary signature doesn't match.
/// </summary>
private static readonly Dictionary<string, ArchiveFileType> FileExtensionMap = new Dictionary<string, ArchiveFileType>()
{
{"ZIP", ArchiveFileType.ZIP },
{"APK", ArchiveFileType.ZIP },
{"IPA", ArchiveFileType.ZIP },
{"JAR", ArchiveFileType.ZIP },
{"EAR", ArchiveFileType.ZIP },
{"WAR", ArchiveFileType.ZIP },
{"GZ", ArchiveFileType.GZIP },
{"TGZ", ArchiveFileType.GZIP },
{"TAR", ArchiveFileType.TAR },
{"GEM", ArchiveFileType.TAR },
{"XZ", ArchiveFileType.XZ },
{"BZ2", ArchiveFileType.BZIP2 },
{"RAR", ArchiveFileType.RAR },
{"RAR4", ArchiveFileType.RAR },
{"7Z", ArchiveFileType.P7ZIP },
{"DEB", ArchiveFileType.DEB },
{"AR", ArchiveFileType.AR },
{"ISO", ArchiveFileType.ISO_9660 },
{"VHDX", ArchiveFileType.VHDX },
{"VHD", ArchiveFileType.VHD },
{"WIM", ArchiveFileType.WIM },
{"VMDK", ArchiveFileType.VMDK }
};
public static ArchiveFileType DetectFileType(string filename)
{
#pragma warning disable SEC0116 // Path Tampering Unvalidated File Path
using var fs = new FileStream(filename, FileMode.Open);
#pragma warning restore SEC0116 // Path Tampering Unvalidated File Path
// If you don't pass passthroughStream: true here it will read the entire file into the stream in
// FileEntry This way it will only read the bytes minimagic uses
var fileEntry = new FileEntry(filename, fs, null, passthroughStream: true);
return DetectFileType(fileEntry);
}
/// <summary>
/// Detects the type of a file.
/// </summary>
/// <param name="fileEntry"> FileEntry containing the file data. </param>
/// <returns> </returns>
public static ArchiveFileType DetectFileType(FileEntry fileEntry)
{
if (fileEntry == null)
{
return ArchiveFileType.UNKNOWN;
}
var initialPosition = fileEntry.Content.Position;
byte[] buffer = new byte[9];
if (fileEntry.Content.Length >= 9)
{
fileEntry.Content.Position = 0;
fileEntry.Content.Read(buffer,0,9);
fileEntry.Content.Position = initialPosition;
if (buffer[0] == 0x50 && buffer[1] == 0x4B && buffer[2] == 0x03 && buffer[3] == 0x04)
{
return ArchiveFileType.ZIP;
}
if (buffer[0] == 0x1F && buffer[1] == 0x8B)
{
return ArchiveFileType.GZIP;
}
if (buffer[0] == 0xFD && buffer[1] == 0x37 && buffer[2] == 0x7A && buffer[3] == 0x58 && buffer[4] == 0x5A && buffer[5] == 0x00)
{
return ArchiveFileType.XZ;
}
if (buffer[0] == 0x42 && buffer[1] == 0x5A && buffer[2] == 0x68)
{
return ArchiveFileType.BZIP2;
}
if ((buffer[0] == 0x52 && buffer[1] == 0x61 && buffer[2] == 0x72 && buffer[3] == 0x21 && buffer[4] == 0x1A && buffer[5] == 0x07 && buffer[6] == 0x00) ||
(buffer[0] == 0x52 && buffer[1] == 0x61 && buffer[2] == 0x72 && buffer[3] == 0x21 && buffer[4] == 0x1A && buffer[5] == 0x07 && buffer[6] == 0x01 && buffer[7] == 0x00))
{
return ArchiveFileType.RAR;
}
if (buffer[0] == 0x37 && buffer[1] == 0x7A && buffer[2] == 0xBC && buffer[3] == 0xAF && buffer[4] == 0x27 && buffer[5] == 0x1C)
{
return ArchiveFileType.P7ZIP;
}
if (Encoding.ASCII.GetString(buffer[0..8]) == "MSWIM\0\0\0" || Encoding.ASCII.GetString(buffer[0..8]) == "WLPWM\0\0\0")
{
return ArchiveFileType.WIM;
}
if (Encoding.ASCII.GetString(buffer[0..4]) == "KDMV")
{
fileEntry.Content.Position = 512;
byte[] secondToken = new byte[21];
fileEntry.Content.Read(secondToken,0,21);
fileEntry.Content.Position = initialPosition;
if (Encoding.ASCII.GetString(secondToken) == "# Disk DescriptorFile")
{
return ArchiveFileType.VMDK;
}
}
// some kind of unix Archive https://en.wikipedia.org/wiki/Ar_(Unix)
if (buffer[0] == 0x21 && buffer[1] == 0x3c && buffer[2] == 0x61 && buffer[3] == 0x72 && buffer[4] == 0x63 && buffer[5] == 0x68 && buffer[6] == 0x3e)
{
// .deb https://manpages.debian.org/unstable/dpkg-dev/deb.5.en.html
fileEntry.Content.Position = 68;
fileEntry.Content.Read(buffer,0,4);
fileEntry.Content.Position = initialPosition;
var encoding = new ASCIIEncoding();
if (encoding.GetString(buffer[0..4]) == "2.0\n")
{
return ArchiveFileType.DEB;
}
else
{
byte[] headerBuffer = new byte[60];
// Created by GNU ar https://en.wikipedia.org/wiki/Ar_(Unix)#System_V_(or_GNU)_variant
fileEntry.Content.Position = 8;
fileEntry.Content.Read(headerBuffer,0,60);
fileEntry.Content.Position = initialPosition;
var size = int.Parse(Encoding.ASCII.GetString(headerBuffer[48..58])); // header size in bytes
if (size > 0)
{
// Defined ending characters for a header
if (headerBuffer[58] == '`' && headerBuffer[59] == '\n')
{
return ArchiveFileType.AR;
}
}
}
}
// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-VHDX/%5bMS-VHDX%5d.pdf
if (Encoding.UTF8.GetString(buffer[0..8]).Equals("vhdxfile"))
{
return ArchiveFileType.VHDX;
}
}
if (fileEntry.Content.Length >= 262)
{
fileEntry.Content.Position = 257;
fileEntry.Content.Read(buffer,0,5);
fileEntry.Content.Position = initialPosition;
if (buffer[0] == 0x75 && buffer[1] == 0x73 && buffer[2] == 0x74 && buffer[3] == 0x61 && buffer[4] == 0x72)
{
return ArchiveFileType.TAR;
}
}
// ISO Format https://en.wikipedia.org/wiki/ISO_9660#Overall_structure Reserved space + 1 header
if (fileEntry.Content.Length > 32768 + 2048)
{
fileEntry.Content.Position = 32769;
fileEntry.Content.Read(buffer,0,5);
fileEntry.Content.Position = initialPosition;
if (buffer[0] == 'C' && buffer[1] == 'D' && buffer[2] == '0' && buffer[3] == '0' && buffer[4] == '1')
{
return ArchiveFileType.ISO_9660;
}
}
//https://www.microsoft.com/en-us/download/details.aspx?id=23850 - 'Hard Disk Footer Format'
// Unlike other formats the magic string is stored in the footer, which is either the last 511 or 512 bytes
// The magic string is Magic string "conectix" (63 6F 6E 65 63 74 69 78)
if (fileEntry.Content.Length > 512)
{
byte[] vhdFooterCookie = new byte[] { 0x63, 0x6F, 0x6E, 0x65, 0x63, 0x74, 0x69, 0x78 };
fileEntry.Content.Position = fileEntry.Content.Length - 0x200; // Footer position
fileEntry.Content.Read(buffer,0,8);
fileEntry.Content.Position = initialPosition;
if (vhdFooterCookie.SequenceEqual(buffer[0..8]))
{
return ArchiveFileType.VHD;
}
fileEntry.Content.Position = fileEntry.Content.Length - 0x1FF; //If created on legacy platform footer is 511 bytes instead
fileEntry.Content.Read(buffer, 0, 8);
fileEntry.Content.Position = initialPosition;
if (vhdFooterCookie.SequenceEqual(buffer[0..8]))
{
return ArchiveFileType.VHD;
}
}
// Fall back to file extensions
string fileExtension = Path.GetExtension(fileEntry.Name.ToUpperInvariant());
if (fileExtension.StartsWith("."))
{
fileExtension = fileExtension.Substring(1);
}
if (!FileExtensionMap.TryGetValue(fileExtension, out ArchiveFileType fileType))
{
fileType = ArchiveFileType.UNKNOWN;
}
return fileType;
}
}
}

Просмотреть файл

@ -0,0 +1,12 @@
using System;
namespace RecursiveExtractor
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello World!");
}
}
}

Просмотреть файл

@ -0,0 +1,57 @@
## RecursiveExtractor
RecursiveExtractor is a general-purpose file extractor.
### Format Support
RecursiveExtractor supports extracting the following types of archives:
* GNU AR
* BZip2
* [deb](https://en.wikipedia.org/wiki/Deb_(file_format))
* ISO
* tar
* VHD
* VHDX
* VMDK
* WIM
* XZip
* zip
## Using RecursiveExtractor
To use RecursiveExtractor, just instantiate an `Extractor` object and call the `ExtractFile`
method with either a filename or a byte array. This method will return an IEnumerable
of FileEntry objects, each one of which will contain the name of the file and its
contents, plus some additional metadata.
```
using Microsoft.CST.RecursiveExtractor;
...
// Initialize the RecursiveExtractor extractor
var extractor = new Extractor();
// Extract from an existing file
foreach (var fileEntry in extractor.ExtractFile("test.zip"))
{
Console.WriteLine(fileEntry.FullPath);
}
// Extract from a byte array
byte[] bytes = ...;
// The "nonexistent.zip" name doesn't really matter, but is used as part of the
// FileEntry.FullPath string.
foreach (var fileEntry in extractor.ExtractFile("nonexistent.zip", bytes))
{
Console.WriteLine(fileEntry.FullPath);
}
```
## Issues
If you find any issues with RecursiveExtractor, please [open an issue](https://github.com/Microsoft/OSSGadget/issues/new)
in the [Microsoft/OSSGadget](https://github.com/Microsoft/OSSGadget) repository.

276
RecursiveExtractor/Range.cs Normal file
Просмотреть файл

@ -0,0 +1,276 @@
// https://github.com/dotnet/corefx/blob/1597b894a2e9cac668ce6e484506eca778a85197/src/Common/src/CoreLib/System/Index.cs
// https://github.com/dotnet/corefx/blob/1597b894a2e9cac668ce6e484506eca778a85197/src/Common/src/CoreLib/System/Range.cs
#if NETSTANDARD2_0
using System.Runtime.CompilerServices;
namespace System
{
/// <summary>Represent a type can be used to index a collection either from the start or the end.</summary>
/// <remarks>
/// Index is used by the C# compiler to support the new index syntax
/// <code>
/// int[] someArray = new int[5] { 1, 2, 3, 4, 5 } ;
/// int lastElement = someArray[^1]; // lastElement = 5
/// </code>
/// </remarks>
internal readonly struct Index : IEquatable<Index>
{
private readonly int _value;
/// <summary>Construct an Index using a value and indicating if the index is from the start or from the end.</summary>
/// <param name="value">The index value. it has to be zero or positive number.</param>
/// <param name="fromEnd">Indicating if the index is from the start or from the end.</param>
/// <remarks>
/// If the Index constructed from the end, index value 1 means pointing at the last element and index value 0 means pointing at beyond last element.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Index(int value, bool fromEnd = false)
{
if (value < 0)
{
throw new ArgumentOutOfRangeException(nameof(value), "value must be non-negative");
}
if (fromEnd)
_value = ~value;
else
_value = value;
}
// The following private constructors mainly created for perf reason to avoid the checks
private Index(int value)
{
_value = value;
}
/// <summary>Create an Index pointing at first element.</summary>
public static Index Start => new Index(0);
/// <summary>Create an Index pointing at beyond last element.</summary>
public static Index End => new Index(~0);
/// <summary>Create an Index from the start at the position indicated by the value.</summary>
/// <param name="value">The index value from the start.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Index FromStart(int value)
{
if (value < 0)
{
throw new ArgumentOutOfRangeException(nameof(value), "value must be non-negative");
}
return new Index(value);
}
/// <summary>Create an Index from the end at the position indicated by the value.</summary>
/// <param name="value">The index value from the end.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Index FromEnd(int value)
{
if (value < 0)
{
throw new ArgumentOutOfRangeException(nameof(value), "value must be non-negative");
}
return new Index(~value);
}
/// <summary>Returns the index value.</summary>
public int Value
{
get
{
if (_value < 0)
{
return ~_value;
}
else
{
return _value;
}
}
}
/// <summary>Indicates whether the index is from the start or the end.</summary>
public bool IsFromEnd => _value < 0;
/// <summary>Calculate the offset from the start using the giving collection length.</summary>
/// <param name="length">The length of the collection that the Index will be used with. length has to be a positive value</param>
/// <remarks>
/// For performance reason, we don't validate the input length parameter and the returned offset value against negative values.
/// we don't validate either the returned offset is greater than the input length.
/// It is expected Index will be used with collections which always have non negative length/count. If the returned offset is negative and
/// then used to index a collection will get out of range exception which will be same affect as the validation.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffset(int length)
{
var offset = _value;
if (IsFromEnd)
{
// offset = length - (~value)
// offset = length + (~(~value) + 1)
// offset = length + value + 1
offset += length + 1;
}
return offset;
}
/// <summary>Indicates whether the current Index object is equal to another object of the same type.</summary>
/// <param name="value">An object to compare with this object</param>
public override bool Equals(object? value) => value is Index && _value == ((Index)value)._value;
/// <summary>Indicates whether the current Index object is equal to another Index object.</summary>
/// <param name="other">An object to compare with this object</param>
public bool Equals(Index other) => _value == other._value;
/// <summary>Returns the hash code for this instance.</summary>
public override int GetHashCode() => _value;
/// <summary>Converts integer number to an Index.</summary>
public static implicit operator Index(int value) => FromStart(value);
/// <summary>Converts the value of the current Index object to its equivalent string representation.</summary>
public override string ToString()
{
if (IsFromEnd)
return "^" + ((uint)Value).ToString();
return ((uint)Value).ToString();
}
}
/// <summary>Represent a range has start and end indexes.</summary>
/// <remarks>
/// Range is used by the C# compiler to support the range syntax.
/// <code>
/// int[] someArray = new int[5] { 1, 2, 3, 4, 5 };
/// int[] subArray1 = someArray[0..2]; // { 1, 2 }
/// int[] subArray2 = someArray[1..^0]; // { 2, 3, 4, 5 }
/// </code>
/// </remarks>
internal readonly struct Range : IEquatable<Range>
{
/// <summary>Represent the inclusive start index of the Range.</summary>
public Index Start { get; }
/// <summary>Represent the exclusive end index of the Range.</summary>
public Index End { get; }
/// <summary>Construct a Range object using the start and end indexes.</summary>
/// <param name="start">Represent the inclusive start index of the range.</param>
/// <param name="end">Represent the exclusive end index of the range.</param>
public Range(Index start, Index end)
{
Start = start;
End = end;
}
/// <summary>Indicates whether the current Range object is equal to another object of the same type.</summary>
/// <param name="value">An object to compare with this object</param>
public override bool Equals(object? value) =>
value is Range r &&
r.Start.Equals(Start) &&
r.End.Equals(End);
/// <summary>Indicates whether the current Range object is equal to another Range object.</summary>
/// <param name="other">An object to compare with this object</param>
public bool Equals(Range other) => other.Start.Equals(Start) && other.End.Equals(End);
/// <summary>Returns the hash code for this instance.</summary>
public override int GetHashCode()
{
return Start.GetHashCode() * 31 + End.GetHashCode();
}
/// <summary>Converts the value of the current Range object to its equivalent string representation.</summary>
public override string ToString()
{
return Start + ".." + End;
}
/// <summary>Create a Range object starting from start index to the end of the collection.</summary>
public static Range StartAt(Index start) => new Range(start, Index.End);
/// <summary>Create a Range object starting from first element in the collection to the end Index.</summary>
public static Range EndAt(Index end) => new Range(Index.Start, end);
/// <summary>Create a Range object starting from first element to the end.</summary>
public static Range All => new Range(Index.Start, Index.End);
/// <summary>Calculate the start offset and length of range object using a collection length.</summary>
/// <param name="length">The length of the collection that the range will be used with. length has to be a positive value.</param>
/// <remarks>
/// For performance reason, we don't validate the input length parameter against negative values.
/// It is expected Range will be used with collections which always have non negative length/count.
/// We validate the range is inside the length scope though.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public (int Offset, int Length) GetOffsetAndLength(int length)
{
int start;
var startIndex = Start;
if (startIndex.IsFromEnd)
start = length - startIndex.Value;
else
start = startIndex.Value;
int end;
var endIndex = End;
if (endIndex.IsFromEnd)
end = length - endIndex.Value;
else
end = endIndex.Value;
if ((uint)end > (uint)length || (uint)start > (uint)end)
{
throw new ArgumentOutOfRangeException(nameof(length));
}
return (start, end - start);
}
}
}
namespace System.Runtime.CompilerServices
{
internal static class RuntimeHelpers
{
/// <summary>
/// Slices the specified array using the specified range.
/// </summary>
public static T[] GetSubArray<T>(T[] array, Range range)
{
if (array == null)
{
throw new ArgumentNullException(nameof(array));
}
(var offset, var length) = range.GetOffsetAndLength(array.Length);
if (default(T) != null || typeof(T[]) == array.GetType())
{
// We know the type of the array to be exactly T[].
if (length == 0)
{
return Array.Empty<T>();
}
var dest = new T[length];
Array.Copy(array, offset, dest, 0, length);
return dest;
}
else
{
// The array is actually a U[] where U:T.
var dest = (T[])Array.CreateInstance(array.GetType().GetElementType(), length);
Array.Copy(array, offset, dest, 0, length);
return dest;
}
}
}
}
#endif

Просмотреть файл

@ -0,0 +1,53 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;netstandard2.1</TargetFrameworks>
<RootNamespace>Microsoft.CST.OpenSource</RootNamespace>
<Version>0.0.0-placeholder</Version>
<Company>Microsoft</Company>
<Authors>Microsoft</Authors>
<copyright>© Microsoft Corporation. All rights reserved.</copyright>
<RepositoryType>GitHub</RepositoryType>
<RepositoryUrl>https://github.com/Microsoft/OSSGadget</RepositoryUrl>
<Configurations>Debug;Release</Configurations>
<LangVersion>8.0</LangVersion>
<Nullable>Enable</Nullable>
<GeneratePackageOnBuild>false</GeneratePackageOnBuild>
<Description>RecursiveExtractor is able to process the following formats: ar, bzip2, deb, gzip, iso, tar, vhd, vhdx, vmdk, wim, xzip, and zip. RecursiveExtractor automatically detects the archive type and fails gracefully when attempting to process malformed content.</Description>
<PackageId>Microsoft.CST.RecursiveExtractor</PackageId>
<PackageTags>unzip extract extractor</PackageTags>
<PackageVersion>0.0.0-placeholder</PackageVersion>
<PackageProjectUrl>https://github.com/microsoft/OSSGadget</PackageProjectUrl>
<PackageIcon>icon-128.png</PackageIcon>
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
<IncludeSymbols>true</IncludeSymbols>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="DiscUtils.Btrfs" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Core" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Ext" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Fat" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.HfsPlus" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Iso9660" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Ntfs" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Vhd" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Vhdx" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Vmdk" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Wim" Version="0.15.1-ci0002" />
<PackageReference Include="DiscUtils.Xfs" Version="0.15.1-ci0002" />
<PackageReference Include="NLog" Version="4.7.2" />
<PackageReference Include="SharpCompress" Version="0.25.1" />
<PackageReference Include="SharpZipLib" Version="1.2.0" />
</ItemGroup>
<ItemGroup>
<PackageReference Update="Nerdbank.GitVersioning" Version="3.1.91" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\LICENSE.txt" Pack="true" PackagePath="" />
<None Include="..\..\..\icon-128.png" Pack="true" PackagePath="" />
</ItemGroup>
</Project>