diff --git a/.az/mshost.yaml b/.az/mshost.yaml index a5daa9fc..1bbe6ff2 100644 --- a/.az/mshost.yaml +++ b/.az/mshost.yaml @@ -223,6 +223,20 @@ jobs: displayName: Unpack ONNXRuntime package. - script: | + @echo off + set vswherepath="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" + for /f "usebackq delims=" %%i in (`%vswherepath% -latest -property installationPath`) do ( + if exist "%%i\Common7\Tools\vsdevcmd.bat" ( + set vsdevcmd="%%i\Common7\Tools\vsdevcmd.bat" + ) + ) + + @echo %vsdevcmd% will be used as the VC compiler + @echo ##vso[task.setvariable variable=vsdevcmd]%vsdevcmd% + displayName: 'locate vsdevcmd via vswhere' + + - script: | + call $(vsdevcmd) call .\build.bat -DONNXRUNTIME_LIB_DIR=.\onnxruntime-win-x64-$(ort.version)\lib -DOCOS_ENABLE_CTEST=ON displayName: build the customop library with onnxruntime diff --git a/CMakeLists.txt b/CMakeLists.txt index 41b07522..500b9de0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,8 +10,8 @@ endif() set(CPACK_PACKAGE_NAME "onnxruntime_extensions") set(CPACK_PACKAGE_VERSION_MAJOR "0") -set(CPACK_PACKAGE_VERSION_MINOR "3") -set(CPACK_PACKAGE_VERSION_PATCH "1") +set(CPACK_PACKAGE_VERSION_MINOR "5") +set(CPACK_PACKAGE_VERSION_PATCH "0") set(VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) @@ -244,7 +244,7 @@ if (OCOS_ENABLE_TF_STRING) target_include_directories(ocos_operators PUBLIC ${googlere2_SOURCE_DIR} ${farmhash_SOURCE_DIR}/src) - list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_TF_STRING NOMINMAX FARMHASH_NO_BUILTIN_EXPECT) + list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_TF_STRING NOMINMAX FARMHASH_NO_BUILTIN_EXPECT FARMHASH_DEBUG=0) list(APPEND ocos_libraries re2) endif() diff --git a/build.bat b/build.bat index c8dfd3ef..15f86ae2 100644 --- a/build.bat +++ b/build.bat @@ -1,14 +1,8 @@ @ECHO OFF -ECHO Copy this file to mybuild.bat and make any changes you deem necessary SETLOCAL ENABLEDELAYEDEXPANSION IF DEFINED VSINSTALLDIR GOTO :VSDEV_CMD -set VCVARS="NOT/EXISTED" -FOR %%I in (Enterprise Professional Community BuildTools^ - ) DO IF EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\%%I\VC\Auxiliary\Build\vcvars64.bat" ( - SET VCVARS="%ProgramFiles(x86)%\Microsoft Visual Studio\2019\%%I\VC\Auxiliary\Build\vcvars64.bat" ) +IF NOT DEFINED VCVARS GOTO :NOT_FOUND -IF NOT EXIST %VCVARS% GOTO :NOT_FOUND -ECHO Found %VCVARS% CALL %VCVARS% :VSDEV_CMD @@ -18,15 +12,15 @@ set GENERATOR="Visual Studio 17 2022" :START_BUILD mkdir .\out\Windows\ 2>NUL -cmake -G %GENERATOR% -A x64 %* -B out\Windows -S . +"%VSINSTALLDIR%Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G %GENERATOR% -A x64 %* -B out\Windows -S . IF %ERRORLEVEL% NEQ 0 EXIT /B %ERRORLEVEL% cmake --build out\Windows --config RelWithDebInfo IF %ERRORLEVEL% NEQ 0 EXIT /B %ERRORLEVEL% GOTO :EOF :NOT_FOUND -ECHO "No Microsoft Visual Studio 2019 installation found!" -ECHO " Or not run from Developer Command Prompt for VS 2022" +ECHO "No Microsoft Visual Studio installation found!" +ECHO " Please run build from Developer Command Prompt" EXIT /B 1 ENDLOCAL diff --git a/includes/onnxruntime/onnxruntime_cxx_api.h b/includes/onnxruntime/onnxruntime_cxx_api.h index 9907c56a..0f7ae0ac 100644 --- a/includes/onnxruntime/onnxruntime_cxx_api.h +++ b/includes/onnxruntime/onnxruntime_cxx_api.h @@ -466,8 +466,8 @@ struct MemoryAllocation { ~MemoryAllocation(); MemoryAllocation(const MemoryAllocation&) = delete; MemoryAllocation& operator=(const MemoryAllocation&) = delete; - MemoryAllocation(MemoryAllocation&&); - MemoryAllocation& operator=(MemoryAllocation&&); + MemoryAllocation(MemoryAllocation&&) noexcept; + MemoryAllocation& operator=(MemoryAllocation&&) noexcept; void* get() { return p_; } size_t size() const { return size_; } diff --git a/includes/onnxruntime/onnxruntime_cxx_inline.h b/includes/onnxruntime/onnxruntime_cxx_inline.h index b4a733b3..b33daff6 100644 --- a/includes/onnxruntime/onnxruntime_cxx_inline.h +++ b/includes/onnxruntime/onnxruntime_cxx_inline.h @@ -64,11 +64,11 @@ inline MemoryAllocation::~MemoryAllocation() { } } -inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) : allocator_(nullptr), p_(nullptr), size_(0) { +inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) noexcept : allocator_(nullptr), p_(nullptr), size_(0) { *this = std::move(o); } -inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) { +inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) noexcept { OrtAllocator* alloc = nullptr; void* p = nullptr; size_t sz = 0; diff --git a/onnxruntime_extensions/_version.py b/onnxruntime_extensions/_version.py index 038a19f7..60139ccd 100644 --- a/onnxruntime_extensions/_version.py +++ b/onnxruntime_extensions/_version.py @@ -3,4 +3,4 @@ # license information. ############################################################################### -__version__ = "0.4.2" +__version__ = "0.5.0" diff --git a/operators/string_utils.cc b/operators/string_utils.cc index 47eed78e..ecb67132 100644 --- a/operators/string_utils.cc +++ b/operators/string_utils.cc @@ -174,7 +174,7 @@ uint64_t Hash64(const char* data, size_t n, uint64_t seed) { h ^= ByteAs64(data[2]) << 16; case 2: h ^= ByteAs64(data[1]) << 8; - case 1: + default: // case 1: make some code analyzer be happier. h ^= ByteAs64(data[0]); h *= m; } diff --git a/operators/text/op_equal_impl.hpp b/operators/text/op_equal_impl.hpp index 9186cf46..b29ba546 100644 --- a/operators/text/op_equal_impl.hpp +++ b/operators/text/op_equal_impl.hpp @@ -37,7 +37,7 @@ class BroadcastIteratorRight { } struct BroadcastIteratorRightState { - const BroadcastIteratorRight* parent; + const BroadcastIteratorRight* parent = nullptr; std::vector index1; const T1* p1; const T1* end_; diff --git a/operators/text/string_ecmaregex_split.hpp b/operators/text/string_ecmaregex_split.hpp index e1ad0968..0fa142e6 100644 --- a/operators/text/string_ecmaregex_split.hpp +++ b/operators/text/string_ecmaregex_split.hpp @@ -30,7 +30,7 @@ void ECMARegexSplitImpl(const std::string& input, const std::regex& pattern, std::vector& tokens, std::vector& begin_offsets, std::vector& end_offsets) { - int prev_pos = 0; + size_t prev_pos = 0; for (auto it = std::sregex_iterator(input.begin(), input.end(), pattern); it != std::sregex_iterator(); it++) { int cur_pos = it->position(); int matched_length = it->length(); diff --git a/operators/text/string_lower.cc b/operators/text/string_lower.cc index de35a030..b5dc21ed 100644 --- a/operators/text/string_lower.cc +++ b/operators/text/string_lower.cc @@ -17,7 +17,7 @@ void KernelStringLower::Compute(OrtKernelContext* context) { GetTensorMutableDataString(api_, ort_, context, input_X, X); for (int64_t i = 0; i < (int64_t)X.size(); ++i) { - std::transform(X[i].begin(), X[i].end(), X[i].begin(), ToLower); + std::transform(X[i].begin(), X[i].end(), X[i].begin(), [](char c) {return static_cast(ToLower(c));}); } OrtTensorDimensions dimensions(ort_, input_X); diff --git a/operators/tokenizer/bert_tokenizer.cc b/operators/tokenizer/bert_tokenizer.cc index 5e600a95..0ff031b7 100644 --- a/operators/tokenizer/bert_tokenizer.cc +++ b/operators/tokenizer/bert_tokenizer.cc @@ -97,8 +97,8 @@ void WordpieceTokenizer::GreedySearch(const ustring& token, std::vector return; } - int start = 0; - int end = -1; + size_t start = 0; + size_t end = 0; ustring substr; for (; start < token.size();) { end = token.size(); @@ -146,12 +146,12 @@ void TruncateStrategy::Truncate(std::vector& ids1, std::vector case TruncateStrategyType::LONGEST_FROM_BACK: if ((ids1_keep_len > half_max_len) && (ids2_keep_len > half_max_len)) { - ids1_keep_len = max_len - half_max_len; + ids1_keep_len = static_cast(max_len) - half_max_len; ids2_keep_len = half_max_len; } else if (ids2_keep_len > ids1_keep_len) { - ids2_keep_len = max_len - ids1_keep_len; + ids2_keep_len = static_cast(max_len) - ids1_keep_len; } else { - ids1_keep_len = max_len - ids2_keep_len; + ids1_keep_len = static_cast(max_len) - ids2_keep_len; } if (strategy_ == TruncateStrategyType::LONGEST_FIRST) { @@ -179,6 +179,7 @@ BertTokenizer::BertTokenizer( const std::string& truncation_strategy) : do_basic_tokenize_(do_basic_tokenize), max_length_(max_len) , truncate_(std::make_unique(truncation_strategy)) { + vocab_ = std::make_shared(vocab); if (do_basic_tokenize) { diff --git a/operators/tokenizer/bert_tokenizer_decoder.hpp b/operators/tokenizer/bert_tokenizer_decoder.hpp index 353a398b..7533758a 100644 --- a/operators/tokenizer/bert_tokenizer_decoder.hpp +++ b/operators/tokenizer/bert_tokenizer_decoder.hpp @@ -19,11 +19,11 @@ class BertTokenizerDecoder { private: std::string unk_token_; - int32_t unk_token_id_; - int32_t sep_token_id_; - int32_t pad_token_id_; - int32_t cls_token_id_; - int32_t mask_token_id_; + int32_t unk_token_id_ = -1; + int32_t sep_token_id_ = -1; + int32_t pad_token_id_ = -1; + int32_t cls_token_id_ = -1; + int32_t mask_token_id_ = -1; std::string suffix_indicator_; std::vector vocab_; std::string raw_vocab_; diff --git a/operators/tokenizer/gpt2_tokenizer.cc b/operators/tokenizer/gpt2_tokenizer.cc index a9d70a5b..2151581e 100644 --- a/operators/tokenizer/gpt2_tokenizer.cc +++ b/operators/tokenizer/gpt2_tokenizer.cc @@ -535,7 +535,7 @@ std::vector KernelBpeTokenizer::Tokenize(const ustring& input, int64_t } } - return std::move(res); + return res; } void KernelBpeTokenizer::Compute(OrtKernelContext* context) { diff --git a/operators/tokenizer/wordpiece_tokenizer.cc b/operators/tokenizer/wordpiece_tokenizer.cc index 4822535b..9575ae7b 100644 --- a/operators/tokenizer/wordpiece_tokenizer.cc +++ b/operators/tokenizer/wordpiece_tokenizer.cc @@ -28,8 +28,8 @@ void KernelWordpieceTokenizer_Split(const std::u32string& suffix_indicator, const std::u32string& text, std::vector& words) { ustring space(" "); - int pos = 0; - int last = 0; + size_t pos = 0; + size_t last = 0; words.clear(); for (; pos < text.size(); ++pos) { if (text[pos] == space[0]) { @@ -57,7 +57,7 @@ void KernelWordpieceTokenizer_Tokenizer(const std::unordered_map words; bool is_bad; bool no_existing_rows = n_existing_rows == 0; - int start, end; + size_t start = 0, end = 0; std::u32string substr; int64_t cur_substr; tokens.clear(); diff --git a/pyop/pykernel.h b/pyop/pykernel.h index e524a0af..30d42b05 100644 --- a/pyop/pykernel.h +++ b/pyop/pykernel.h @@ -9,7 +9,7 @@ struct PyCustomOpDef { std::string op_type; - uint64_t obj_id; + uint64_t obj_id = 0; std::vector input_types; std::vector output_types; std::vector attrs; @@ -88,7 +88,7 @@ struct PyCustomOpFactory : Ort::CustomOpBase(opdef_->output_types[idx]); } - const PyCustomOpDef* opdef_; + const PyCustomOpDef* opdef_ = nullptr; std::string op_type_; std::string op_domain_; };