Merge branch 'main' into main
This commit is contained in:
Коммит
4b5dab92e0
|
@ -63,3 +63,59 @@ steps:
|
|||
SessionTimeout: 90
|
||||
ServiceEndpointUrl: 'https://api.esrp.microsoft.com/api/v2'
|
||||
MaxConcurrency: 25
|
||||
|
||||
- task: PowerShell@2
|
||||
displayName: 'Signature validation for signed file(s)'
|
||||
inputs:
|
||||
targetType: 'inline'
|
||||
script: |
|
||||
Write-Host "FolderPath: ${{ parameters.FolderPath }}"
|
||||
Write-Host "Pattern(s): ${{ parameters.Pattern }}"
|
||||
|
||||
if ("${{ parameters.Pattern }}" -eq "")
|
||||
{
|
||||
Write-Host "Pattern is empty."
|
||||
exit 0
|
||||
}
|
||||
|
||||
$valid_flag=$true
|
||||
$normal_sign_status="Valid"
|
||||
|
||||
$patterns="${{ parameters.Pattern }}" -split ','
|
||||
|
||||
foreach($pattern_original in $patterns)
|
||||
{
|
||||
$pattern=$pattern_original.Trim()
|
||||
Write-Host "Validating pattern:" $pattern
|
||||
|
||||
$file_names=Get-ChildItem -Path ${{ parameters.FolderPath }} .\$pattern -Name -Recurse -Force
|
||||
|
||||
foreach($file in $file_names)
|
||||
{
|
||||
$file_path=Join-Path ${{ parameters.FolderPath }} -ChildPath $file
|
||||
$sign=Get-AuthenticodeSignature -FilePath $file_path
|
||||
$sign_status=$sign.Status.ToString()
|
||||
Write-Host "File:" $file
|
||||
Write-Host "Signature Status:" $sign_status
|
||||
if ($sign_status -ne $normal_sign_status)
|
||||
{
|
||||
Write-Host "File" $file "does not have valid signature."
|
||||
Write-Host "Signature status:" $sign.status
|
||||
Write-Host "Signature message:" $sign.StatusMessage
|
||||
$valid_flag=$false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($valid_flag -eq $false)
|
||||
{
|
||||
Write-Host "Signature validation failed."
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host "Signature validation passed."
|
||||
exit 0
|
||||
}
|
||||
workingDirectory: ${{ parameters.FolderPath }}
|
|
@ -416,7 +416,53 @@ std::vector<int64_t> KernelBpeTokenizer::SpmTokenize(ustring& input,
|
|||
// Get byte encodings prior to performing BPE
|
||||
std::list<std::pair<uint32_t, uint32_t>> byte_list;
|
||||
|
||||
while (res.size() < max_length && char_pos < ustr.length()) {
|
||||
while (res.size() < max_length && char_pos <= ustr.length()) {
|
||||
bool split_now = false;
|
||||
if (char_pos == ustr.length()) {
|
||||
split_now = true;
|
||||
}
|
||||
|
||||
// temporary split logic, will be replaced regex based split after it is implemented
|
||||
if (!split_now && byte_list.size() > 10) {
|
||||
auto is_split_char = [](char32_t ch) {
|
||||
return ch == U' ' || ch == U'\n' || ch == U'\r' || ch == U'▁';
|
||||
};
|
||||
if (!is_split_char(ustr[char_pos - 1]) && is_split_char(ustr[char_pos])) {
|
||||
split_now = true;
|
||||
}
|
||||
// split immediately to avoid too long byte_list for extreme cases, which is slow.
|
||||
if (!split_now && byte_list.size() > 100) {
|
||||
split_now = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (split_now) {
|
||||
// Perform BPE
|
||||
bbpe_tokenizer_->PerformBPE(byte_list);
|
||||
|
||||
// Add output to result
|
||||
for (auto p : byte_list) {
|
||||
if (res.size() >= max_length) {
|
||||
break;
|
||||
}
|
||||
|
||||
res.push_back(p.first);
|
||||
|
||||
if (compute_offset_mapping) {
|
||||
offset_mapping.emplace_back(std::make_pair(
|
||||
offset,
|
||||
ort_extensions::narrow<size_t>(offset + (size_t)p.second)));
|
||||
offset += ((size_t)p.second);
|
||||
}
|
||||
}
|
||||
|
||||
byte_list.clear();
|
||||
}
|
||||
|
||||
if (char_pos == ustr.length()) {
|
||||
break;
|
||||
}
|
||||
|
||||
auto chr = ustr[char_pos];
|
||||
if (chr == U' ') {
|
||||
chr = 0x2581; // UTF-8 string '\xe2\x96\x81'
|
||||
|
@ -436,26 +482,6 @@ std::vector<int64_t> KernelBpeTokenizer::SpmTokenize(ustring& input,
|
|||
|
||||
char_pos++;
|
||||
}
|
||||
{
|
||||
// Perform BPE
|
||||
bbpe_tokenizer_->PerformBPE(byte_list);
|
||||
|
||||
// Add output to result
|
||||
for (auto p : byte_list) {
|
||||
if (res.size() >= max_length) {
|
||||
break;
|
||||
}
|
||||
|
||||
res.push_back(p.first);
|
||||
|
||||
if (compute_offset_mapping) {
|
||||
offset_mapping.emplace_back(std::make_pair(
|
||||
offset,
|
||||
ort_extensions::narrow<size_t>(offset + (size_t)p.second)));
|
||||
offset += ((size_t)p.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (compute_offset_mapping) {
|
||||
// Add offset mappings for input in this instance to list of offset mappings for all inputs
|
||||
|
@ -463,6 +489,12 @@ std::vector<int64_t> KernelBpeTokenizer::SpmTokenize(ustring& input,
|
|||
}
|
||||
}
|
||||
|
||||
if (res.size() > 0 && res.front() == bos_token_id_) {
|
||||
if (add_bos_token_.has_value() && add_bos_token_.value() == false) {
|
||||
res.erase(res.begin());
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче