Fix the code security issue and 0.5 C++ release preparation. (#274)

* Fix the code security issue and 0.5 C++ release preparation.

* more fixings

* vswhere
This commit is contained in:
Wenbing Li 2022-08-02 10:09:35 -07:00 коммит произвёл GitHub
Родитель 584099394c
Коммит 5320af1eea
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
15 изменённых файлов: 47 добавлений и 38 удалений

Просмотреть файл

@ -223,6 +223,20 @@ jobs:
displayName: Unpack ONNXRuntime package.
- script: |
@echo off
set vswherepath="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
for /f "usebackq delims=" %%i in (`%vswherepath% -latest -property installationPath`) do (
if exist "%%i\Common7\Tools\vsdevcmd.bat" (
set vsdevcmd="%%i\Common7\Tools\vsdevcmd.bat"
)
)
@echo %vsdevcmd% will be used as the VC compiler
@echo ##vso[task.setvariable variable=vsdevcmd]%vsdevcmd%
displayName: 'locate vsdevcmd via vswhere'
- script: |
call $(vsdevcmd)
call .\build.bat -DONNXRUNTIME_LIB_DIR=.\onnxruntime-win-x64-$(ort.version)\lib -DOCOS_ENABLE_CTEST=ON
displayName: build the customop library with onnxruntime

Просмотреть файл

@ -10,8 +10,8 @@ endif()
set(CPACK_PACKAGE_NAME "onnxruntime_extensions")
set(CPACK_PACKAGE_VERSION_MAJOR "0")
set(CPACK_PACKAGE_VERSION_MINOR "3")
set(CPACK_PACKAGE_VERSION_PATCH "1")
set(CPACK_PACKAGE_VERSION_MINOR "5")
set(CPACK_PACKAGE_VERSION_PATCH "0")
set(VERSION ${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH})
@ -244,7 +244,7 @@ if (OCOS_ENABLE_TF_STRING)
target_include_directories(ocos_operators PUBLIC
${googlere2_SOURCE_DIR}
${farmhash_SOURCE_DIR}/src)
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_TF_STRING NOMINMAX FARMHASH_NO_BUILTIN_EXPECT)
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_TF_STRING NOMINMAX FARMHASH_NO_BUILTIN_EXPECT FARMHASH_DEBUG=0)
list(APPEND ocos_libraries re2)
endif()

Просмотреть файл

@ -1,14 +1,8 @@
@ECHO OFF
ECHO Copy this file to mybuild.bat and make any changes you deem necessary
SETLOCAL ENABLEDELAYEDEXPANSION
IF DEFINED VSINSTALLDIR GOTO :VSDEV_CMD
set VCVARS="NOT/EXISTED"
FOR %%I in (Enterprise Professional Community BuildTools^
) DO IF EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\%%I\VC\Auxiliary\Build\vcvars64.bat" (
SET VCVARS="%ProgramFiles(x86)%\Microsoft Visual Studio\2019\%%I\VC\Auxiliary\Build\vcvars64.bat" )
IF NOT DEFINED VCVARS GOTO :NOT_FOUND
IF NOT EXIST %VCVARS% GOTO :NOT_FOUND
ECHO Found %VCVARS%
CALL %VCVARS%
:VSDEV_CMD
@ -18,15 +12,15 @@ set GENERATOR="Visual Studio 17 2022"
:START_BUILD
mkdir .\out\Windows\ 2>NUL
cmake -G %GENERATOR% -A x64 %* -B out\Windows -S .
"%VSINSTALLDIR%Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe" -G %GENERATOR% -A x64 %* -B out\Windows -S .
IF %ERRORLEVEL% NEQ 0 EXIT /B %ERRORLEVEL%
cmake --build out\Windows --config RelWithDebInfo
IF %ERRORLEVEL% NEQ 0 EXIT /B %ERRORLEVEL%
GOTO :EOF
:NOT_FOUND
ECHO "No Microsoft Visual Studio 2019 installation found!"
ECHO " Or not run from Developer Command Prompt for VS 2022"
ECHO "No Microsoft Visual Studio installation found!"
ECHO " Please run build from Developer Command Prompt"
EXIT /B 1
ENDLOCAL

Просмотреть файл

@ -466,8 +466,8 @@ struct MemoryAllocation {
~MemoryAllocation();
MemoryAllocation(const MemoryAllocation&) = delete;
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
MemoryAllocation(MemoryAllocation&&);
MemoryAllocation& operator=(MemoryAllocation&&);
MemoryAllocation(MemoryAllocation&&) noexcept;
MemoryAllocation& operator=(MemoryAllocation&&) noexcept;
void* get() { return p_; }
size_t size() const { return size_; }

Просмотреть файл

@ -64,11 +64,11 @@ inline MemoryAllocation::~MemoryAllocation() {
}
}
inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) : allocator_(nullptr), p_(nullptr), size_(0) {
inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) noexcept : allocator_(nullptr), p_(nullptr), size_(0) {
*this = std::move(o);
}
inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) {
inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) noexcept {
OrtAllocator* alloc = nullptr;
void* p = nullptr;
size_t sz = 0;

Просмотреть файл

@ -3,4 +3,4 @@
# license information.
###############################################################################
__version__ = "0.4.2"
__version__ = "0.5.0"

Просмотреть файл

@ -174,7 +174,7 @@ uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
h ^= ByteAs64(data[2]) << 16;
case 2:
h ^= ByteAs64(data[1]) << 8;
case 1:
default: // case 1: make some code analyzer be happier.
h ^= ByteAs64(data[0]);
h *= m;
}

Просмотреть файл

@ -37,7 +37,7 @@ class BroadcastIteratorRight {
}
struct BroadcastIteratorRightState {
const BroadcastIteratorRight<T1, T2, T3>* parent;
const BroadcastIteratorRight<T1, T2, T3>* parent = nullptr;
std::vector<int64_t> index1;
const T1* p1;
const T1* end_;

Просмотреть файл

@ -30,7 +30,7 @@ void ECMARegexSplitImpl(const std::string& input, const std::regex& pattern,
std::vector<std::string_view>& tokens,
std::vector<T>& begin_offsets,
std::vector<T>& end_offsets) {
int prev_pos = 0;
size_t prev_pos = 0;
for (auto it = std::sregex_iterator(input.begin(), input.end(), pattern); it != std::sregex_iterator(); it++) {
int cur_pos = it->position();
int matched_length = it->length();

Просмотреть файл

@ -17,7 +17,7 @@ void KernelStringLower::Compute(OrtKernelContext* context) {
GetTensorMutableDataString(api_, ort_, context, input_X, X);
for (int64_t i = 0; i < (int64_t)X.size(); ++i) {
std::transform(X[i].begin(), X[i].end(), X[i].begin(), ToLower);
std::transform(X[i].begin(), X[i].end(), X[i].begin(), [](char c) {return static_cast<char>(ToLower(c));});
}
OrtTensorDimensions dimensions(ort_, input_X);

Просмотреть файл

@ -97,8 +97,8 @@ void WordpieceTokenizer::GreedySearch(const ustring& token, std::vector<ustring>
return;
}
int start = 0;
int end = -1;
size_t start = 0;
size_t end = 0;
ustring substr;
for (; start < token.size();) {
end = token.size();
@ -146,12 +146,12 @@ void TruncateStrategy::Truncate(std::vector<int64_t>& ids1, std::vector<int64_t>
case TruncateStrategyType::LONGEST_FROM_BACK:
if ((ids1_keep_len > half_max_len) && (ids2_keep_len > half_max_len)) {
ids1_keep_len = max_len - half_max_len;
ids1_keep_len = static_cast<size_t>(max_len) - half_max_len;
ids2_keep_len = half_max_len;
} else if (ids2_keep_len > ids1_keep_len) {
ids2_keep_len = max_len - ids1_keep_len;
ids2_keep_len = static_cast<size_t>(max_len) - ids1_keep_len;
} else {
ids1_keep_len = max_len - ids2_keep_len;
ids1_keep_len = static_cast<size_t>(max_len) - ids2_keep_len;
}
if (strategy_ == TruncateStrategyType::LONGEST_FIRST) {
@ -179,6 +179,7 @@ BertTokenizer::BertTokenizer(
const std::string& truncation_strategy)
: do_basic_tokenize_(do_basic_tokenize), max_length_(max_len)
, truncate_(std::make_unique<TruncateStrategy>(truncation_strategy)) {
vocab_ = std::make_shared<BertTokenizerVocab>(vocab);
if (do_basic_tokenize) {

Просмотреть файл

@ -19,11 +19,11 @@ class BertTokenizerDecoder {
private:
std::string unk_token_;
int32_t unk_token_id_;
int32_t sep_token_id_;
int32_t pad_token_id_;
int32_t cls_token_id_;
int32_t mask_token_id_;
int32_t unk_token_id_ = -1;
int32_t sep_token_id_ = -1;
int32_t pad_token_id_ = -1;
int32_t cls_token_id_ = -1;
int32_t mask_token_id_ = -1;
std::string suffix_indicator_;
std::vector<std::string_view> vocab_;
std::string raw_vocab_;

Просмотреть файл

@ -535,7 +535,7 @@ std::vector<int64_t> KernelBpeTokenizer::Tokenize(const ustring& input, int64_t
}
}
return std::move(res);
return res;
}
void KernelBpeTokenizer::Compute(OrtKernelContext* context) {

Просмотреть файл

@ -28,8 +28,8 @@ void KernelWordpieceTokenizer_Split(const std::u32string& suffix_indicator,
const std::u32string& text,
std::vector<std::u32string>& words) {
ustring space(" ");
int pos = 0;
int last = 0;
size_t pos = 0;
size_t last = 0;
words.clear();
for (; pos < text.size(); ++pos) {
if (text[pos] == space[0]) {
@ -57,7 +57,7 @@ void KernelWordpieceTokenizer_Tokenizer(const std::unordered_map<std::u32string,
std::vector<std::u32string> words;
bool is_bad;
bool no_existing_rows = n_existing_rows == 0;
int start, end;
size_t start = 0, end = 0;
std::u32string substr;
int64_t cur_substr;
tokens.clear();

Просмотреть файл

@ -9,7 +9,7 @@
struct PyCustomOpDef {
std::string op_type;
uint64_t obj_id;
uint64_t obj_id = 0;
std::vector<int> input_types;
std::vector<int> output_types;
std::vector<std::string> attrs;
@ -88,7 +88,7 @@ struct PyCustomOpFactory : Ort::CustomOpBase<PyCustomOpFactory, PyCustomOpKernel
return static_cast<ONNXTensorElementDataType>(opdef_->output_types[idx]);
}
const PyCustomOpDef* opdef_;
const PyCustomOpDef* opdef_ = nullptr;
std::string op_type_;
std::string op_domain_;
};