Fix the code security issue and 0.5 C++ release preparation. (#274)
* Fix the code security issue and 0.5 C++ release preparation. * more fixings * vswhere
This commit is contained in:
Родитель
584099394c
Коммит
5320af1eea
|
@ -223,6 +223,20 @@ jobs:
|
|||
displayName: Unpack ONNXRuntime package.
|
||||
|
||||
- script: |
|
||||
@echo off
|
||||
set vswherepath="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
|
||||
for /f "usebackq delims=" %%i in (`%vswherepath% -latest -property installationPath`) do (
|
||||
if exist "%%i\Common7\Tools\vsdevcmd.bat" (
|
||||
set vsdevcmd="%%i\Common7\Tools\vsdevcmd.bat"
|
||||
)
|
||||
)
|
||||
|
||||
@echo %vsdevcmd% will be used as the VC compiler
|
||||
@echo ##vso[task.setvariable variable=vsdevcmd]%vsdevcmd%
|
||||
displayName: 'locate vsdevcmd via vswhere'
|
||||
|
||||
- script: |
|
||||
call $(vsdevcmd)
|
||||
call .\build.bat -DONNXRUNTIME_LIB_DIR=.\onnxruntime-win-x64-$(ort.version)\lib -DOCOS_ENABLE_CTEST=ON
|
||||
displayName: build the customop library with onnxruntime
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@ endif()
|
|||
|
||||
set(CPACK_PACKAGE_NAME "onnxruntime_extensions")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR "0")
|
||||
set(CPACK_PACKAGE_VERSION_MINOR "3")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "1")
|
||||
set(CPACK_PACKAGE_VERSION_MINOR "5")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "0")
|
||||
set(VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
|
||||
|
||||
|
||||
|
@ -244,7 +244,7 @@ if (OCOS_ENABLE_TF_STRING)
|
|||
target_include_directories(ocos_operators PUBLIC
|
||||
${googlere2_SOURCE_DIR}
|
||||
${farmhash_SOURCE_DIR}/src)
|
||||
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_TF_STRING NOMINMAX FARMHASH_NO_BUILTIN_EXPECT)
|
||||
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_TF_STRING NOMINMAX FARMHASH_NO_BUILTIN_EXPECT FARMHASH_DEBUG=0)
|
||||
list(APPEND ocos_libraries re2)
|
||||
endif()
|
||||
|
||||
|
|
14
build.bat
14
build.bat
|
@ -1,14 +1,8 @@
|
|||
@ECHO OFF
|
||||
ECHO Copy this file to mybuild.bat and make any changes you deem necessary
|
||||
SETLOCAL ENABLEDELAYEDEXPANSION
|
||||
IF DEFINED VSINSTALLDIR GOTO :VSDEV_CMD
|
||||
set VCVARS="NOT/EXISTED"
|
||||
FOR %%I in (Enterprise Professional Community BuildTools^
|
||||
) DO IF EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\%%I\VC\Auxiliary\Build\vcvars64.bat" (
|
||||
SET VCVARS="%ProgramFiles(x86)%\Microsoft Visual Studio\2019\%%I\VC\Auxiliary\Build\vcvars64.bat" )
|
||||
IF NOT DEFINED VCVARS GOTO :NOT_FOUND
|
||||
|
||||
IF NOT EXIST %VCVARS% GOTO :NOT_FOUND
|
||||
ECHO Found %VCVARS%
|
||||
CALL %VCVARS%
|
||||
|
||||
:VSDEV_CMD
|
||||
|
@ -18,15 +12,15 @@ set GENERATOR="Visual Studio 17 2022"
|
|||
|
||||
:START_BUILD
|
||||
mkdir .\out\Windows\ 2>NUL
|
||||
cmake -G %GENERATOR% -A x64 %* -B out\Windows -S .
|
||||
"%VSINSTALLDIR%Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G %GENERATOR% -A x64 %* -B out\Windows -S .
|
||||
IF %ERRORLEVEL% NEQ 0 EXIT /B %ERRORLEVEL%
|
||||
cmake --build out\Windows --config RelWithDebInfo
|
||||
IF %ERRORLEVEL% NEQ 0 EXIT /B %ERRORLEVEL%
|
||||
GOTO :EOF
|
||||
|
||||
:NOT_FOUND
|
||||
ECHO "No Microsoft Visual Studio 2019 installation found!"
|
||||
ECHO " Or not run from Developer Command Prompt for VS 2022"
|
||||
ECHO "No Microsoft Visual Studio installation found!"
|
||||
ECHO " Please run build from Developer Command Prompt"
|
||||
EXIT /B 1
|
||||
|
||||
ENDLOCAL
|
||||
|
|
|
@ -466,8 +466,8 @@ struct MemoryAllocation {
|
|||
~MemoryAllocation();
|
||||
MemoryAllocation(const MemoryAllocation&) = delete;
|
||||
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
|
||||
MemoryAllocation(MemoryAllocation&&);
|
||||
MemoryAllocation& operator=(MemoryAllocation&&);
|
||||
MemoryAllocation(MemoryAllocation&&) noexcept;
|
||||
MemoryAllocation& operator=(MemoryAllocation&&) noexcept;
|
||||
|
||||
void* get() { return p_; }
|
||||
size_t size() const { return size_; }
|
||||
|
|
|
@ -64,11 +64,11 @@ inline MemoryAllocation::~MemoryAllocation() {
|
|||
}
|
||||
}
|
||||
|
||||
inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) : allocator_(nullptr), p_(nullptr), size_(0) {
|
||||
inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) noexcept : allocator_(nullptr), p_(nullptr), size_(0) {
|
||||
*this = std::move(o);
|
||||
}
|
||||
|
||||
inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) {
|
||||
inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) noexcept {
|
||||
OrtAllocator* alloc = nullptr;
|
||||
void* p = nullptr;
|
||||
size_t sz = 0;
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
# license information.
|
||||
###############################################################################
|
||||
|
||||
__version__ = "0.4.2"
|
||||
__version__ = "0.5.0"
|
||||
|
|
|
@ -174,7 +174,7 @@ uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
|
|||
h ^= ByteAs64(data[2]) << 16;
|
||||
case 2:
|
||||
h ^= ByteAs64(data[1]) << 8;
|
||||
case 1:
|
||||
default: // case 1: make some code analyzer be happier.
|
||||
h ^= ByteAs64(data[0]);
|
||||
h *= m;
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ class BroadcastIteratorRight {
|
|||
}
|
||||
|
||||
struct BroadcastIteratorRightState {
|
||||
const BroadcastIteratorRight<T1, T2, T3>* parent;
|
||||
const BroadcastIteratorRight<T1, T2, T3>* parent = nullptr;
|
||||
std::vector<int64_t> index1;
|
||||
const T1* p1;
|
||||
const T1* end_;
|
||||
|
|
|
@ -30,7 +30,7 @@ void ECMARegexSplitImpl(const std::string& input, const std::regex& pattern,
|
|||
std::vector<std::string_view>& tokens,
|
||||
std::vector<T>& begin_offsets,
|
||||
std::vector<T>& end_offsets) {
|
||||
int prev_pos = 0;
|
||||
size_t prev_pos = 0;
|
||||
for (auto it = std::sregex_iterator(input.begin(), input.end(), pattern); it != std::sregex_iterator(); it++) {
|
||||
int cur_pos = it->position();
|
||||
int matched_length = it->length();
|
||||
|
|
|
@ -17,7 +17,7 @@ void KernelStringLower::Compute(OrtKernelContext* context) {
|
|||
GetTensorMutableDataString(api_, ort_, context, input_X, X);
|
||||
|
||||
for (int64_t i = 0; i < (int64_t)X.size(); ++i) {
|
||||
std::transform(X[i].begin(), X[i].end(), X[i].begin(), ToLower);
|
||||
std::transform(X[i].begin(), X[i].end(), X[i].begin(), [](char c) {return static_cast<char>(ToLower(c));});
|
||||
}
|
||||
|
||||
OrtTensorDimensions dimensions(ort_, input_X);
|
||||
|
|
|
@ -97,8 +97,8 @@ void WordpieceTokenizer::GreedySearch(const ustring& token, std::vector<ustring>
|
|||
return;
|
||||
}
|
||||
|
||||
int start = 0;
|
||||
int end = -1;
|
||||
size_t start = 0;
|
||||
size_t end = 0;
|
||||
ustring substr;
|
||||
for (; start < token.size();) {
|
||||
end = token.size();
|
||||
|
@ -146,12 +146,12 @@ void TruncateStrategy::Truncate(std::vector<int64_t>& ids1, std::vector<int64_t>
|
|||
case TruncateStrategyType::LONGEST_FROM_BACK:
|
||||
|
||||
if ((ids1_keep_len > half_max_len) && (ids2_keep_len > half_max_len)) {
|
||||
ids1_keep_len = max_len - half_max_len;
|
||||
ids1_keep_len = static_cast<size_t>(max_len) - half_max_len;
|
||||
ids2_keep_len = half_max_len;
|
||||
} else if (ids2_keep_len > ids1_keep_len) {
|
||||
ids2_keep_len = max_len - ids1_keep_len;
|
||||
ids2_keep_len = static_cast<size_t>(max_len) - ids1_keep_len;
|
||||
} else {
|
||||
ids1_keep_len = max_len - ids2_keep_len;
|
||||
ids1_keep_len = static_cast<size_t>(max_len) - ids2_keep_len;
|
||||
}
|
||||
|
||||
if (strategy_ == TruncateStrategyType::LONGEST_FIRST) {
|
||||
|
@ -179,6 +179,7 @@ BertTokenizer::BertTokenizer(
|
|||
const std::string& truncation_strategy)
|
||||
: do_basic_tokenize_(do_basic_tokenize), max_length_(max_len)
|
||||
, truncate_(std::make_unique<TruncateStrategy>(truncation_strategy)) {
|
||||
|
||||
vocab_ = std::make_shared<BertTokenizerVocab>(vocab);
|
||||
|
||||
if (do_basic_tokenize) {
|
||||
|
|
|
@ -19,11 +19,11 @@ class BertTokenizerDecoder {
|
|||
|
||||
private:
|
||||
std::string unk_token_;
|
||||
int32_t unk_token_id_;
|
||||
int32_t sep_token_id_;
|
||||
int32_t pad_token_id_;
|
||||
int32_t cls_token_id_;
|
||||
int32_t mask_token_id_;
|
||||
int32_t unk_token_id_ = -1;
|
||||
int32_t sep_token_id_ = -1;
|
||||
int32_t pad_token_id_ = -1;
|
||||
int32_t cls_token_id_ = -1;
|
||||
int32_t mask_token_id_ = -1;
|
||||
std::string suffix_indicator_;
|
||||
std::vector<std::string_view> vocab_;
|
||||
std::string raw_vocab_;
|
||||
|
|
|
@ -535,7 +535,7 @@ std::vector<int64_t> KernelBpeTokenizer::Tokenize(const ustring& input, int64_t
|
|||
}
|
||||
}
|
||||
|
||||
return std::move(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
void KernelBpeTokenizer::Compute(OrtKernelContext* context) {
|
||||
|
|
|
@ -28,8 +28,8 @@ void KernelWordpieceTokenizer_Split(const std::u32string& suffix_indicator,
|
|||
const std::u32string& text,
|
||||
std::vector<std::u32string>& words) {
|
||||
ustring space(" ");
|
||||
int pos = 0;
|
||||
int last = 0;
|
||||
size_t pos = 0;
|
||||
size_t last = 0;
|
||||
words.clear();
|
||||
for (; pos < text.size(); ++pos) {
|
||||
if (text[pos] == space[0]) {
|
||||
|
@ -57,7 +57,7 @@ void KernelWordpieceTokenizer_Tokenizer(const std::unordered_map<std::u32string,
|
|||
std::vector<std::u32string> words;
|
||||
bool is_bad;
|
||||
bool no_existing_rows = n_existing_rows == 0;
|
||||
int start, end;
|
||||
size_t start = 0, end = 0;
|
||||
std::u32string substr;
|
||||
int64_t cur_substr;
|
||||
tokens.clear();
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
struct PyCustomOpDef {
|
||||
std::string op_type;
|
||||
uint64_t obj_id;
|
||||
uint64_t obj_id = 0;
|
||||
std::vector<int> input_types;
|
||||
std::vector<int> output_types;
|
||||
std::vector<std::string> attrs;
|
||||
|
@ -88,7 +88,7 @@ struct PyCustomOpFactory : Ort::CustomOpBase<PyCustomOpFactory, PyCustomOpKernel
|
|||
return static_cast<ONNXTensorElementDataType>(opdef_->output_types[idx]);
|
||||
}
|
||||
|
||||
const PyCustomOpDef* opdef_;
|
||||
const PyCustomOpDef* opdef_ = nullptr;
|
||||
std::string op_type_;
|
||||
std::string op_domain_;
|
||||
};
|
||||
|
|
Загрузка…
Ссылка в новой задаче