89 строки
3.2 KiB
XML
89 строки
3.2 KiB
XML
<Project>
|
|
<UsingTask TaskName="CompressFile"
|
|
TaskFactory="RoslynCodeTaskFactory"
|
|
AssemblyFile="$(MSBuildToolsPath)\Microsoft.Build.Tasks.Core.dll" >
|
|
<ParameterGroup>
|
|
<Files ParameterType="Microsoft.Build.Framework.ITaskItem[]" Required="true" />
|
|
</ParameterGroup>
|
|
<Task>
|
|
<Using Namespace="System.Globalization" />
|
|
<Using Namespace="System.IO" />
|
|
<Using Namespace="System.IO.Compression" />
|
|
<Code Type="Fragment" Language="cs">
|
|
<![CDATA[
|
|
foreach (var file in Files)
|
|
{
|
|
string fileName = file.GetMetadata("FullPath");
|
|
string fileContent = File.ReadAllText(fileName);
|
|
int capacity = 1;
|
|
int eolIndex = 0;
|
|
do
|
|
{
|
|
if ((eolIndex = fileContent.IndexOf('\n', eolIndex)) >= 0)
|
|
{
|
|
eolIndex++;
|
|
capacity++;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
} while (eolIndex < fileContent.Length);
|
|
|
|
using var sourceStream = File.OpenRead(fileName);
|
|
using var reader = new StreamReader(sourceStream);
|
|
using var destStream = new DeflateStream(File.Create(file.GetMetadata("Destination")), CompressionLevel.Optimal);
|
|
using var streamWriter = new StreamWriter(destStream);
|
|
|
|
streamWriter.WriteLine($"Capacity: {capacity.ToString(CultureInfo.InvariantCulture)}");
|
|
|
|
string line;
|
|
int destLineNumber = 0;
|
|
|
|
while ((line = reader.ReadLine()) != null)
|
|
{
|
|
if (line.Length == 0) { continue; }
|
|
int index = line.IndexOf(' ');
|
|
|
|
if (index <= 0 || index == line.Length - 1 || !int.TryParse(line.Substring(index + 1), out int id) || id < destLineNumber)
|
|
{
|
|
Log.LogError($"Invalid format in the file {file.GetMetadata("FullPath")} line {line}");
|
|
break;
|
|
}
|
|
|
|
while (destLineNumber < id)
|
|
{
|
|
// ensure id always aligns with the line number
|
|
streamWriter.WriteLine();
|
|
destLineNumber++;
|
|
}
|
|
|
|
streamWriter.WriteLine(line.Substring(0, index));
|
|
destLineNumber++;
|
|
}
|
|
}
|
|
]]>
|
|
</Code>
|
|
</Task>
|
|
</UsingTask>
|
|
|
|
<Target Name="CompressTiktokenData"
|
|
BeforeTargets="AssignTargetPaths"
|
|
DependsOnTargets="_EnsureTokenizerDataEmbeddedResourceDestination"
|
|
Inputs="@(TokenizerDataEmbeddedResource)"
|
|
Outputs="@(TokenizerDataEmbeddedResource->'%(Destination)')">
|
|
|
|
<CompressFile Files="@(TokenizerDataEmbeddedResource)" />
|
|
|
|
<ItemGroup>
|
|
<EmbeddedResource Include="@(TokenizerDataEmbeddedResource->'%(Destination)')" LogicalName="%(FileName)%(Extension).deflate" />
|
|
</ItemGroup>
|
|
</Target>
|
|
|
|
<Target Name="_EnsureTokenizerDataEmbeddedResourceDestination" >
|
|
<ItemGroup>
|
|
<TokenizerDataEmbeddedResource Condition="'%(TokenizerDataEmbeddedResource.Destination)' == ''" Destination="$(IntermediateOutputPath)%(FileName).deflate" />
|
|
</ItemGroup>
|
|
</Target>
|
|
</Project>
|