From 4e9e01cb3c008335a2471c27dbdf7dd5d12e4224 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Sun, 19 Dec 2021 20:54:29 -0800 Subject: [PATCH] Fix SDL warnings in CPU EP (#9975) --- cmake/CMakeLists.txt | 9 +- cmake/EnableVisualStudioCodeAnalysis.props | 12 + cmake/Sdl.ruleset | 268 ++++++++++++++++++ cmake/onnxruntime_common.cmake | 5 - cmake/onnxruntime_graph.cmake | 5 +- cmake/onnxruntime_mlas.cmake | 3 + cmake/onnxruntime_providers.cmake | 4 + cmake/onnxruntime_unittests.cmake | 29 +- include/onnxruntime/core/common/common.h | 8 +- .../onnxruntime/core/common/gsl_suppress.h | 15 + include/onnxruntime/core/common/status.h | 36 +-- .../core/framework/kernel_def_builder.h | 6 +- .../core/framework/kernel_registry.h | 2 +- .../onnxruntime/core/framework/op_kernel.h | 18 +- .../core/framework/op_kernel_info.h | 4 - include/onnxruntime/core/graph/basic_types.h | 5 +- include/onnxruntime/core/graph/graph.h | 30 +- include/onnxruntime/core/graph/node_arg.h | 4 +- .../platform/EigenNonBlockingThreadPool.h | 2 +- .../onnxruntime/core/session/environment.h | 12 +- .../core/session/onnxruntime_c_api.h | 1 + .../core/session/onnxruntime_cxx_api.h | 8 +- onnxruntime/contrib_ops/cpu/activations.h | 2 +- .../cpu/attnlstm/bahdanau_attention.cc | 3 +- .../cpu/attnlstm/deep_cpu_attn_lstm.cc | 1 + .../cpu/attnlstm/uni_dir_attn_lstm.cc | 1 + .../contrib_ops/cpu/bert/attention_cpu_base.h | 1 + .../contrib_ops/cpu/crop_and_resize.cc | 1 + onnxruntime/contrib_ops/cpu/nchwc_ops.cc | 6 +- .../cpu/quantization/qlinear_pool.cc | 12 +- onnxruntime/contrib_ops/cpu/tokenizer.cc | 4 +- .../cpu/transformers/beam_search.cc | 8 +- .../cpu/transformers/beam_search_scorer.cc | 7 +- .../cpu/transformers/gpt_subgraph.cc | 5 +- .../cpu/transformers/logits_processor.cc | 5 +- .../contrib_ops/cpu/transformers/sequences.cc | 5 +- onnxruntime/core/common/threadpool.cc | 19 +- onnxruntime/core/framework/allocator.cc | 12 +- onnxruntime/core/framework/bfc_arena.cc | 13 +- onnxruntime/core/framework/bfc_arena.h | 6 +- onnxruntime/core/framework/callback.cc | 4 +- onnxruntime/core/framework/data_types.cc | 5 +- onnxruntime/core/framework/error_code.cc | 5 +- onnxruntime/core/framework/func_kernel.h | 32 ++- .../core/framework/fuse_nodes_funcs.cc | 4 +- onnxruntime/core/framework/fuse_nodes_funcs.h | 8 +- .../core/framework/graph_partitioner.cc | 22 +- onnxruntime/core/framework/kernel_registry.cc | 6 +- .../core/framework/kernel_registry_manager.cc | 13 +- .../core/framework/kernel_registry_manager.h | 8 +- .../framework/onnxruntime_map_type_info.cc | 4 +- .../onnxruntime_sequence_type_info.cc | 18 +- .../core/framework/onnxruntime_typeinfo.cc | 4 +- .../core/framework/onnxruntime_typeinfo.h | 1 + onnxruntime/core/framework/op_kernel_info.cc | 7 +- onnxruntime/core/framework/run_options.cc | 4 +- onnxruntime/core/framework/session_options.h | 1 + onnxruntime/core/framework/session_state.cc | 6 +- onnxruntime/core/framework/session_state.h | 45 ++- onnxruntime/core/framework/sparse_tensor.cc | 4 +- .../core/framework/tensor_type_and_shape.cc | 12 +- .../core/framework/tensorprotoutils.cc | 12 +- onnxruntime/core/framework/utils.cc | 6 +- onnxruntime/core/graph/graph.cc | 31 +- onnxruntime/core/graph/model.cc | 6 +- onnxruntime/core/graph/model.h | 2 +- onnxruntime/core/mlas/lib/convolve.cpp | 1 + onnxruntime/core/mlas/lib/dgemm.cpp | 1 + .../core/mlas/lib/qgemm_kernel_default.cpp | 4 +- onnxruntime/core/mlas/lib/sgemm.cpp | 1 + .../optimizer/optimizer_execution_frame.cc | 5 +- .../qdq_selector_action_transformer.cc | 24 +- onnxruntime/core/platform/windows/env.cc | 71 ++++- .../core/platform/windows/stacktrace.cc | 2 +- onnxruntime/core/providers/common.h | 6 +- .../providers/cpu/activation/activations.h | 7 +- .../core/providers/cpu/controlflow/loop.cc | 30 +- .../core/providers/cpu/controlflow/scan_8.cc | 6 +- .../core/providers/cpu/controlflow/scan_9.cc | 6 +- .../providers/cpu/controlflow/scan_utils.cc | 4 +- .../providers/cpu/controlflow/scan_utils.h | 12 +- .../providers/cpu/cpu_provider_factory.cc | 4 +- .../cpu/element_wise_ranged_transform.h | 2 + .../core/providers/cpu/generator/range.cc | 5 +- onnxruntime/core/providers/cpu/math/clip.h | 4 +- onnxruntime/core/providers/cpu/math/det.cc | 5 + .../einsum_compute_preprocessor.cc | 2 +- .../providers/cpu/math/element_wise_ops.cc | 4 +- .../providers/cpu/math/element_wise_ops.h | 10 +- onnxruntime/core/providers/cpu/ml/ml_common.h | 2 +- .../core/providers/cpu/ml/svmclassifier.cc | 5 + .../core/providers/cpu/nn/batch_norm.h | 8 +- .../core/providers/cpu/nn/batch_norm_helper.h | 10 +- .../core/providers/cpu/nn/conv_attributes.h | 4 +- .../core/providers/cpu/nn/conv_transpose.cc | 2 +- .../cpu/nn/conv_transpose_attributes.h | 2 +- onnxruntime/core/providers/cpu/nn/lrn.cc | 8 +- .../core/providers/cpu/nn/pool_attributes.h | 9 +- onnxruntime/core/providers/cpu/nn/pool_base.h | 6 +- onnxruntime/core/providers/cpu/nn/shrink.cc | 5 +- .../core/providers/cpu/nn/tfidfvectorizer.cc | 6 +- .../object_detection/non_max_suppression.cc | 2 +- .../cpu/object_detection/roialign.cc | 10 +- .../providers/cpu/quantization/qlinearconv.cc | 2 +- .../providers/cpu/reduction/reduction_ops.cc | 15 +- .../providers/cpu/reduction/reduction_ops.h | 11 +- .../core/providers/cpu/rnn/deep_cpu_gru.cc | 12 +- .../core/providers/cpu/rnn/deep_cpu_lstm.cc | 2 +- .../core/providers/cpu/rnn/lstm_base.cc | 5 +- onnxruntime/core/providers/cpu/rnn/rnn.cc | 6 +- .../core/providers/cpu/rnn/rnn_helpers.cc | 32 ++- .../core/providers/cpu/rnn/rnn_helpers.h | 2 +- .../providers/cpu/rnn/uni_directional_lstm.cc | 8 +- .../providers/cpu/sequence/sequence_ops.cc | 4 +- .../core/providers/cpu/tensor/expand.cc | 6 +- .../core/providers/cpu/tensor/gather_nd.cc | 2 +- .../core/providers/cpu/tensor/nonzero_op.cc | 2 +- .../core/providers/cpu/tensor/onehot.cc | 2 +- .../providers/cpu/tensor/reverse_sequence.cc | 8 +- .../core/providers/cpu/tensor/scatter_nd.cc | 2 +- .../providers/cpu/tensor/space_depth_ops.cc | 2 +- .../core/providers/cpu/tensor/split.cc | 2 +- .../core/providers/cpu/tensor/transpose.cc | 2 +- .../core/providers/cpu/tensor/upsample.h | 13 +- onnxruntime/core/providers/cuda/cuda_kernel.h | 2 +- .../src/AbiCustomRegistry.cpp | 5 +- .../src/GraphPartitioner.cpp | 5 +- onnxruntime/core/session/IOBinding.h | 42 +-- .../core/session/abi_session_options.cc | 1 + .../core/session/allocator_adapters.cc | 4 +- onnxruntime/core/session/custom_ops.cc | 5 +- onnxruntime/core/session/environment.cc | 2 +- onnxruntime/core/session/inference_session.cc | 3 +- onnxruntime/core/session/onnxruntime_c_api.cc | 2 + onnxruntime/core/session/ort_env.cc | 31 +- onnxruntime/core/session/ort_env.h | 13 +- .../core/session/provider_bridge_ort.cc | 4 +- onnxruntime/core/util/qmath.h | 8 +- onnxruntime/core/util/thread_utils.cc | 11 +- onnxruntime/python/numpy_helper.h | 12 + .../python/onnxruntime_pybind_mlvalue.cc | 8 +- .../python/onnxruntime_pybind_mlvalue.h | 2 - .../python/onnxruntime_pybind_ortvalue.cc | 1 + .../onnxruntime_pybind_sparse_tensor.cc | 1 + .../python/onnxruntime_pybind_state.cc | 10 +- .../python/onnxruntime_pybind_state_common.h | 15 +- .../test/contrib_ops/tensor_op_test.cc | 18 +- onnxruntime/test/eager/ort_invoker_test.cc | 5 +- .../test/framework/allocation_planner_test.cc | 6 +- onnxruntime/test/framework/float_16_test.cc | 2 +- .../test/framework/kernel_registry_test.cc | 5 +- .../framework/local_kernel_registry_test.cc | 10 +- .../test/framework/opaque_kernels_test.cc | 4 +- .../test/framework/parallel_executor_test.cc | 6 +- .../test/framework/session_state_test.cc | 8 +- .../test/framework/sparse_kernels_test.cc | 30 +- onnxruntime/test/mlas/bench/bench_qgemm.cpp | 6 +- .../test/mlas/unittest/test_conv2d_nchwc.h | 8 +- onnxruntime/test/onnx/TestCase.cc | 24 +- onnxruntime/test/onnx/dataitem_request.cc | 2 +- onnxruntime/test/onnx/dataitem_request.h | 5 +- .../test/onnx/microbenchmark/activation.cc | 2 +- onnxruntime/test/onnx/tensorprotoutils.cc | 6 +- onnxruntime/test/onnx/testcase_request.cc | 2 +- onnxruntime/test/onnx/testcase_request.h | 6 +- .../test/optimizer/graph_transform_test.cc | 16 +- onnxruntime/test/perftest/TFModelInfo.cc | 5 +- onnxruntime/test/perftest/TFModelInfo.h | 2 +- .../test/perftest/performance_runner.cc | 5 +- .../cpu/activation/activation_op_test.cc | 30 +- .../providers/cpu/generator/random_test.cc | 64 ++--- .../cpu/math/element_wise_ops_test.cc | 3 +- .../cpu/math/quantize_linear_matmul_test.cc | 6 +- .../cpu/reduction/reduction_ops_test.cc | 18 +- .../test/providers/cpu/tensor/copy_test.cc | 20 +- .../providers/cpu/tensor/split_op_test.cc | 46 +-- .../providers/cpu/tensor/tensor_op_test.cc | 4 +- .../providers/cpu/tensor/unique_op_test.cc | 16 +- .../internal_testing_partitioning_tests.cc | 12 +- .../internal_testing_tests.cc | 14 +- .../my_allocator.cc | 1 + onnxruntime/test/util/test_allocator.cc | 1 + tools/ci_build/build.py | 3 +- .../azure-pipelines/nuget/templates/gpu.yml | 8 +- .../{ => nuget}/templates/win-ci-2019.yml | 10 +- .../azure-pipelines/templates/win-cpu-ci.yml | 41 +-- .../azure-pipelines/templates/win-gpu-ci.yml | 57 ++-- .../azure-pipelines/win-gpu-ci-pipeline.yml | 255 +++-------------- 188 files changed, 1328 insertions(+), 953 deletions(-) create mode 100644 cmake/EnableVisualStudioCodeAnalysis.props create mode 100644 cmake/Sdl.ruleset create mode 100644 include/onnxruntime/core/common/gsl_suppress.h create mode 100644 onnxruntime/python/numpy_helper.h rename tools/ci_build/github/azure-pipelines/{ => nuget}/templates/win-ci-2019.yml (97%) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 59a9bb9688..adfaa4c11f 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1114,7 +1114,6 @@ if (onnxruntime_USE_MIGRAPHX) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_MIGRAPHX=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES migraphx) endif() - if (onnxruntime_USE_ARMNN) list(APPEND ORT_PROVIDER_FLAGS -DUSE_ARMNN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_ARMNN=1) @@ -1142,8 +1141,8 @@ function(onnxruntime_set_compile_flags target_name) set_target_properties(${target_name} PROPERTIES VS_GLOBAL_CAExcludePath "${ORT_BINARY_DIR};${ORT_SOURCE_DIR}") if (onnxruntime_ENABLE_STATIC_ANALYSIS) - target_compile_options(${target_name} PRIVATE "$<$>:/analyze:stacksize 131072>") - target_compile_options(${target_name} PRIVATE "$<$>:/analyze:external->") + target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze>" "$<$>:/analyze>") + target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze:external->" "$<$>:/analyze:external->") endif() else() # Enable warning @@ -1190,6 +1189,10 @@ function(onnxruntime_configure_target target_name) target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS}) onnxruntime_set_compile_flags(${target_name}) onnxruntime_set_source_file_properties(${target_name}) + #Uncomment the following three lines to reproduce static analysis errors locally + #if(WIN32 AND onnxruntime_ENABLE_STATIC_ANALYSIS) + # set_target_properties(${target_name} PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/EnableVisualStudioCodeAnalysis.props) + #endif() target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT}) if (onnxruntime_ENABLE_LTO) set_target_properties(${target_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE) diff --git a/cmake/EnableVisualStudioCodeAnalysis.props b/cmake/EnableVisualStudioCodeAnalysis.props new file mode 100644 index 0000000000..44f93c37bf --- /dev/null +++ b/cmake/EnableVisualStudioCodeAnalysis.props @@ -0,0 +1,12 @@ + + + + $(MSBuildThisFileDirectory)Sdl.ruleset + + $(SolutionDir);$(MSBuildThisFileDirectory) + true + + diff --git a/cmake/Sdl.ruleset b/cmake/Sdl.ruleset new file mode 100644 index 0000000000..2909df90a1 --- /dev/null +++ b/cmake/Sdl.ruleset @@ -0,0 +1,268 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 3ee4a14e4c..9b8b5bf818 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -127,11 +127,6 @@ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/common DEST set_target_properties(onnxruntime_common PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(onnxruntime_common PROPERTIES FOLDER "ONNXRuntime") -if(WIN32) - # Add Code Analysis properties to enable C++ Core checks. Have to do it via a props file include. - set_target_properties(onnxruntime_common PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/EnableVisualStudioCodeAnalysis.props) -endif() - # check if we need to link against librt on Linux include(CheckLibraryExists) include(CheckFunctionExists) diff --git a/cmake/onnxruntime_graph.cmake b/cmake/onnxruntime_graph.cmake index dde1fbd138..db09ae7a36 100644 --- a/cmake/onnxruntime_graph.cmake +++ b/cmake/onnxruntime_graph.cmake @@ -120,8 +120,5 @@ if (WIN32) target_compile_options(onnxruntime_graph PRIVATE /EHsc # exception handling - C++ may throw, extern "C" will not ) - endif() - - # Add Code Analysis properties to enable C++ Core checks. Have to do it via a props file include. - set_target_properties(onnxruntime_graph PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/EnableVisualStudioCodeAnalysis.props) + endif() endif() diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake index 263f580977..d1ddc06301 100644 --- a/cmake/onnxruntime_mlas.cmake +++ b/cmake/onnxruntime_mlas.cmake @@ -475,4 +475,7 @@ endforeach() set_target_properties(onnxruntime_mlas PROPERTIES FOLDER "ONNXRuntime") if (WIN32) target_compile_options(onnxruntime_mlas PRIVATE "/wd6385" "/wd4127") + if (onnxruntime_ENABLE_STATIC_ANALYSIS) + target_compile_options(onnxruntime_mlas PRIVATE "/analyze:stacksize 131072") + endif() endif() diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index a2f6d6bc22..90240c5f68 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -454,6 +454,10 @@ if (onnxruntime_USE_DNNL) source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dnnl_cc_srcs}) onnxruntime_add_shared_library_module(onnxruntime_providers_dnnl ${onnxruntime_providers_dnnl_cc_srcs}) target_link_directories(onnxruntime_providers_dnnl PRIVATE ${DNNL_LIB_DIR}) + if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS) + # dnnl_convgrad.cc(47,0): Warning C6262: Function uses '38816' bytes of stack: exceeds /analyze:stacksize '16384'. Consider moving some data to heap. + target_compile_options(onnxruntime_providers_dnnl PRIVATE "/analyze:stacksize 131072") + endif() add_dependencies(onnxruntime_providers_dnnl onnxruntime_providers_shared project_dnnl ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${DNNL_INCLUDE_DIR} ${DNNL_OCL_INCLUDE_DIR}) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 2a9eeca37e..ee3c33dea2 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -81,6 +81,9 @@ function(AddTest) # Lot of such things came from gtest target_compile_options(${_UT_TARGET} PRIVATE "$<$:-Xcompiler /wd6326>" "$<$>:/wd6326>") + # Raw new and delete. A lot of such things came from googletest. + target_compile_options(${_UT_TARGET} PRIVATE "$<$:-Xcompiler /wd26409>" + "$<$>:/wd26409>") endif() target_compile_options(${_UT_TARGET} PRIVATE ${disabled_warnings}) else() @@ -681,7 +684,12 @@ AddTest( onnx_test_data_proto nlohmann_json::nlohmann_json DEPENDS ${all_dependencies} ) -if(NOT MSVC) +if (MSVC) + # The warning means the type of two integral values around a binary operator is narrow than their result. + # If we promote the two input values first, it could be more tolerant to integer overflow. + # However, this is test code. We are less concerned. + target_compile_options(onnxruntime_test_all PRIVATE "/wd26451") +else() target_compile_options(onnxruntime_test_all PRIVATE "-Wno-parentheses") endif() # the default logger tests conflict with the need to have an overall default logger @@ -846,6 +854,15 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) if(WIN32) target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd4141>" "$<$>:/wd4141>") + # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak. + target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26409>" + "$<$>:/wd26409>") + target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26400>" + "$<$>:/wd26400>") + target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26814>" + "$<$>:/wd26814>") + target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26814>" + "$<$>:/wd26497>") target_compile_options(onnxruntime_benchmark PRIVATE "$<$:SHELL:--compiler-options /utf-8>" "$<$>:/utf-8>") endif() @@ -858,7 +875,11 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) onnxruntime_add_executable(onnxruntime_mlas_benchmark ${MLAS_BENCH_SOURCE_FILES}) target_include_directories(onnxruntime_mlas_benchmark PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc) target_link_libraries(onnxruntime_mlas_benchmark PRIVATE benchmark::benchmark onnxruntime_util onnxruntime_framework ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common ${CMAKE_DL_LIBS}) - if(NOT WIN32) + if(WIN32) + target_link_libraries(onnxruntime_mlas_benchmark PRIVATE debug Dbghelp) + # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak. + target_compile_options(onnxruntime_mlas_benchmark PRIVATE /wd26409) + else() target_link_libraries(onnxruntime_mlas_benchmark PRIVATE nsync_cpp ${CMAKE_DL_LIBS}) endif() set_target_properties(onnxruntime_mlas_benchmark PROPERTIES FOLDER "ONNXRuntimeTest") @@ -1100,6 +1121,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) ) onnxruntime_add_executable(onnxruntime_mlas_test ${onnxruntime_mlas_test_src}) if(MSVC) + target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:-Xcompiler /wd26409>" + "$<$>:/wd26409>") target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:SHELL:--compiler-options /utf-8>" "$<$>:/utf-8>") target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:-Xcompiler /wd6326>" @@ -1147,6 +1170,8 @@ if(UNIX) endif() else() set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-DEF:${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.def") + target_compile_options(custom_op_library PRIVATE "$<$:-Xcompiler /wd26409>" + "$<$>:/wd26409>") endif() set_property(TARGET custom_op_library APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG}) diff --git a/include/onnxruntime/core/common/common.h b/include/onnxruntime/core/common/common.h index 233311596d..6860bd169c 100644 --- a/include/onnxruntime/core/common/common.h +++ b/include/onnxruntime/core/common/common.h @@ -35,6 +35,7 @@ #include "core/common/exceptions.h" #include "core/common/make_string.h" #include "core/common/status.h" +#include "core/common/gsl_suppress.h" namespace onnxruntime { @@ -253,13 +254,6 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch } \ } while (0) -// C++ Core Guideline check suppression. -#if defined(_MSC_VER) && !defined(__NVCC__) && !defined(__clang__) -#define GSL_SUPPRESS(tag) [[gsl::suppress(tag)]] -#else -#define GSL_SUPPRESS(tag) -#endif - inline long long TimeDiffMicroSeconds(TimePoint start_time) { auto end_time = std::chrono::high_resolution_clock::now(); return std::chrono::duration_cast(end_time - start_time).count(); diff --git a/include/onnxruntime/core/common/gsl_suppress.h b/include/onnxruntime/core/common/gsl_suppress.h new file mode 100644 index 0000000000..66702d0424 --- /dev/null +++ b/include/onnxruntime/core/common/gsl_suppress.h @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#ifndef GSL_SUPPRESS +#if defined(__clang__) && !defined(__NVCC__) +#define GSL_SUPPRESS(x) [[gsl::suppress("x")]] +#else +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__NVCC__) +#define GSL_SUPPRESS(x) [[gsl::suppress(x)]] +#else +#define GSL_SUPPRESS(x) +#endif // _MSC_VER +#endif // __clang__ +#endif \ No newline at end of file diff --git a/include/onnxruntime/core/common/status.h b/include/onnxruntime/core/common/status.h index f348e7e653..b7c36df2b2 100644 --- a/include/onnxruntime/core/common/status.h +++ b/include/onnxruntime/core/common/status.h @@ -19,7 +19,7 @@ limitations under the License. #ifdef _WIN32 #include #endif - +#include "core/common/gsl_suppress.h" namespace onnxruntime { namespace common { @@ -80,35 +80,34 @@ constexpr const char* StatusCodeToString(StatusCode status) noexcept { #ifdef _WIN32 constexpr HRESULT StatusCodeToHRESULT(StatusCode status) noexcept { - switch (status) - { + switch (status) { case StatusCode::OK: - return S_OK; + return S_OK; case StatusCode::FAIL: - return E_FAIL; + return E_FAIL; case StatusCode::INVALID_ARGUMENT: - return E_INVALIDARG; + return E_INVALIDARG; case StatusCode::NO_SUCHFILE: - return HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); + return HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); case StatusCode::NO_MODEL: - return HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); + return HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND); case StatusCode::ENGINE_ERROR: - return E_FAIL; + return E_FAIL; case StatusCode::RUNTIME_EXCEPTION: - return E_FAIL; + return E_FAIL; case StatusCode::INVALID_PROTOBUF: - return HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); + return HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); case StatusCode::MODEL_LOADED: - return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); + return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); case StatusCode::NOT_IMPLEMENTED: - return E_NOTIMPL; + return E_NOTIMPL; case StatusCode::INVALID_GRAPH: - return HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); + return HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT); case StatusCode::EP_FAIL: - return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); + return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR); default: - return E_FAIL; - } + return E_FAIL; + } } #endif @@ -122,9 +121,10 @@ class [[nodiscard]] Status { Status(StatusCategory category, int code); + GSL_SUPPRESS(r.11) Status(const Status& other) : state_((other.state_ == nullptr) ? nullptr : new State(*other.state_)) {} - + GSL_SUPPRESS(r.11) Status& operator=(const Status& other) { if (state_ != other.state_) { if (other.state_ == nullptr) { diff --git a/include/onnxruntime/core/framework/kernel_def_builder.h b/include/onnxruntime/core/framework/kernel_def_builder.h index a25599be6f..2d35c49ee9 100644 --- a/include/onnxruntime/core/framework/kernel_def_builder.h +++ b/include/onnxruntime/core/framework/kernel_def_builder.h @@ -183,7 +183,7 @@ class KernelDefBuilder { static std::unique_ptr Create() { return std::make_unique(); } explicit KernelDefBuilder() - : kernel_def_(new KernelDef()) {} + : kernel_def_(std::make_unique()) {} KernelDefBuilder& SetName(const std::string& op_name); KernelDefBuilder& SetName(const char* op_name); @@ -274,7 +274,7 @@ class KernelDefBuilder { KernelDefBuilder& Alias(int input_index, int output_index); /** - Apply variadic number of alias mapping from inputs to outputs. + Apply variadic number of alias mapping from inputs to outputs. This is effectively applying Alias(i + input_offset, i + output_offset) for i >= 0 */ KernelDefBuilder& VariadicAlias(int input_offset, int output_offset); @@ -290,7 +290,7 @@ class KernelDefBuilder { } /** - Specify that this kernel's output buffers are passed from external, + Specify that this kernel's output buffers are passed from external, i.e. not created or managed by ORT's memory allocator. */ KernelDefBuilder& ExternalOutputs() { diff --git a/include/onnxruntime/core/framework/kernel_registry.h b/include/onnxruntime/core/framework/kernel_registry.h index a9875df58d..1d068e7625 100644 --- a/include/onnxruntime/core/framework/kernel_registry.h +++ b/include/onnxruntime/core/framework/kernel_registry.h @@ -37,7 +37,7 @@ class KernelRegistry { // TODO(Task:132) Make usage of unique_ptr/shared_ptr as out param consistent Status TryCreateKernel(const Node& node, const IExecutionProvider& execution_provider, const std::unordered_map& constant_initialized_tensors, - const OrtValueNameIdxMap& mlvalue_name_idx_map, const FuncManager& funcs_mgr, + const OrtValueNameIdxMap& mlvalue_name_idx_map, FuncManager& funcs_mgr, const DataTransferManager& data_transfer_mgr, std::unique_ptr& op_kernel) const; diff --git a/include/onnxruntime/core/framework/op_kernel.h b/include/onnxruntime/core/framework/op_kernel.h index 434119986e..59cbef7c66 100644 --- a/include/onnxruntime/core/framework/op_kernel.h +++ b/include/onnxruntime/core/framework/op_kernel.h @@ -125,9 +125,9 @@ class OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OpKernel); std::unique_ptr op_kernel_info_; }; - -using KernelCreateFn = std::function; -using KernelCreatePtrFn = std::add_pointer::type; +class FuncManager; +using KernelCreateFn = std::function& out)>; +using KernelCreatePtrFn = std::add_pointer& out)>::type; struct KernelCreateInfo { std::unique_ptr kernel_def; // Owned and stored in the global kernel registry. @@ -197,7 +197,7 @@ using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); .SinceVersion(ver) \ .Provider(provider) \ .Build(), \ - static_cast([](const OpKernelInfo& info) -> OpKernel* { return new __VA_ARGS__(info); })); \ + static_cast([](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique<__VA_ARGS__>(info); return Status::OK(); })); \ } #define ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(provider, domain, startver, endver, name) \ @@ -220,7 +220,7 @@ using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); .SinceVersion(startver, endver) \ .Provider(provider) \ .Build(), \ - static_cast([](const OpKernelInfo& info) -> OpKernel* { return new __VA_ARGS__(info); })); \ + static_cast([](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique<__VA_ARGS__>(info); return Status::OK(); })); \ } #define ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(provider, domain, ver, type, name) \ @@ -246,7 +246,7 @@ using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); .SinceVersion(ver) \ .Provider(provider) \ .Build(), \ - static_cast([](const OpKernelInfo& info) -> OpKernel* { return new __VA_ARGS__(info); })); \ + static_cast([](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique<__VA_ARGS__>(info); return Status::OK(); })); \ } #define ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(provider, domain, ver, type1, type2, name) \ @@ -263,7 +263,7 @@ using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); .SinceVersion(ver) \ .Provider(provider) \ .Build(), \ - static_cast([](const OpKernelInfo& info) -> OpKernel* { return new __VA_ARGS__(info); })); \ + static_cast([](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique<__VA_ARGS__>(info); return Status::OK(); })); \ } #define ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(provider, domain, startver, endver, type, name) \ @@ -293,7 +293,7 @@ using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); .SinceVersion(startver, endver) \ .Provider(provider) \ .Build(), \ - static_cast([](const OpKernelInfo& info) -> OpKernel* { return new __VA_ARGS__(info); })); \ + static_cast([](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique<__VA_ARGS__>(info); return Status::OK(); })); \ } #define ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(provider, domain, startver, endver, type1, type2, name) \ @@ -312,7 +312,7 @@ using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); .SinceVersion(startver, endver) \ .Provider(provider) \ .Build(), \ - static_cast([](const OpKernelInfo& info) -> OpKernel* { return new __VA_ARGS__(info); })); \ + static_cast([](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique<__VA_ARGS__>(info); return Status::OK(); })); \ } template diff --git a/include/onnxruntime/core/framework/op_kernel_info.h b/include/onnxruntime/core/framework/op_kernel_info.h index 0259e7e68c..dca4df8192 100644 --- a/include/onnxruntime/core/framework/op_kernel_info.h +++ b/include/onnxruntime/core/framework/op_kernel_info.h @@ -26,7 +26,6 @@ class OpKernelInfo : public OpNodeProtoHelper { const IExecutionProvider& execution_provider, const std::unordered_map& constant_initialized_tensors, const OrtValueNameIdxMap& mlvalue_name_idx_map, - const FuncManager& funcs_mgr, const DataTransferManager& data_transfer_mgr); OpKernelInfo(const OpKernelInfo& other); @@ -45,8 +44,6 @@ class OpKernelInfo : public OpNodeProtoHelper { bool TryGetConstantInput(int input_index, const Tensor** constant_input_value) const; - common::Status GetFusedFuncs(NodeComputeInfo*& compute_info) const; - private: ORT_DISALLOW_MOVE(OpKernelInfo); ORT_DISALLOW_ASSIGNMENT(OpKernelInfo); @@ -58,7 +55,6 @@ class OpKernelInfo : public OpNodeProtoHelper { gsl::not_null execution_provider_; const std::unordered_map& constant_initialized_tensors_; const OrtValueNameIdxMap& ort_value_name_idx_map_; - const FuncManager& funcs_mgr_; const DataTransferManager& data_transfer_mgr_; ProtoHelperNodeContext proto_helper_context_; }; diff --git a/include/onnxruntime/core/graph/basic_types.h b/include/onnxruntime/core/graph/basic_types.h index a79479b488..36984d0405 100644 --- a/include/onnxruntime/core/graph/basic_types.h +++ b/include/onnxruntime/core/graph/basic_types.h @@ -11,6 +11,7 @@ #include #include "core/common/basic_types.h" +#include "core/common/status.h" namespace ONNX_NAMESPACE { class ValueInfoProto; @@ -44,6 +45,6 @@ using IOnnxRuntimeOpSchemaCollectionPtr = std::shared_ptr; +class FuncManager; +using KernelCreateFn = std::function& out)>; } // namespace onnxruntime diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index d54f145d40..3fe38fa7ef 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -401,8 +401,8 @@ class Node { } /** Sets initialized function body for node. This is called right after function body initialization for a node. - * or during function inlining when a nested function is encountered. - */ + * or during function inlining when a nested function is encountered. + */ void SetFunctionBody(Function& func); /** Call the provided function for all explicit inputs, implicit inputs, and outputs of this Node. @@ -500,15 +500,14 @@ class Node { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Relationships); }; - private: - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Node); - // NOTE: This friendship relationship should ONLY be used for calling methods of the Node class and not accessing // the data members directly, so that the Node can maintain its internal invariants. friend class Graph; - Node(NodeIndex index, Graph& graph) : index_(index), graph_(&graph) {} + private: + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Node); + #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) void Init(const std::string& name, const std::string& op_type, @@ -648,7 +647,7 @@ class Graph { /** Check if a given name is a sparse initializer's name in the model * we currently convert sparse_initializer field in the model into dense Tensor instances. * However, we sometimes want to check if this initializer was stored as sparse in the model. - */ + */ bool IsSparseInitializer(const std::string& name) const; #endif @@ -978,7 +977,7 @@ class Graph { @remarks As a new Graph instance for the fused nodes is not created, a GraphViewer can be constructed with the IndexedSubGraph information to provide a view of the subgraph. The original nodes are left in place while this is in use. - Call FinalizeFuseSubGraph to remove them once the fused replacement node is fully created. + Call FinalizeFuseSubGraph to remove them once the fused replacement node is fully created. */ Node& BeginFuseSubGraph(const IndexedSubGraph& sub_graph, const std::string& fused_node_name); @@ -1200,9 +1199,6 @@ class Graph { } #endif - private: - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Graph); - // This friendship relationship should only be used to call Graph::Graph and // Graph::LoadGraph All other access should be via the public API. friend class Model; @@ -1243,6 +1239,8 @@ class Graph { const std::vector& model_functions, const logging::Logger& logger); + private: + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Graph); void InitializeStateFromModelFileGraphProto(); // Add node with specified . @@ -1530,16 +1528,16 @@ std::ostream& operator<<(std::ostream& out, const NodeArg& node_arg); // Print Node as, // (operator's name, operator's type, domain, version) : (input0, input1, ...) -> (output0, output1, ...) // For example, -// ("Add_14", Add, "", 7) : ("110": tensor(float),"109": tensor(float),) -> ("111": tensor(float),) +// ("Add_14", Add, "", 7) : ("110": tensor(float),"109": tensor(float),) -> ("111": tensor(float),) std::ostream& operator<<(std::ostream& out, const Node& node); // Print Graph as, for example, // Inputs: // "Input": tensor(float) // Nodes: -// ("add0", Add, "", 7) : ("Input": tensor(float),"Bias": tensor(float),) -> ("add0_out": tensor(float),) -// ("matmul", MatMul, "", 9) : ("add0_out": tensor(float),"matmul_weight": tensor(float),) -> ("matmul_out": tensor(float),) -// ("add1", Add, "", 7) : ("matmul_out": tensor(float),"add_weight": tensor(float),) -> ("add1_out": tensor(float),) -// ("reshape", Reshape, "", 5) : ("add1_out": tensor(float),"concat_out": tensor(int64),) -> ("Result": tensor(float),) +// ("add0", Add, "", 7) : ("Input": tensor(float),"Bias": tensor(float),) -> ("add0_out": tensor(float),) +// ("matmul", MatMul, "", 9) : ("add0_out": tensor(float),"matmul_weight": tensor(float),) -> ("matmul_out": tensor(float),) +// ("add1", Add, "", 7) : ("matmul_out": tensor(float),"add_weight": tensor(float),) -> ("add1_out": tensor(float),) +// ("reshape", Reshape, "", 5) : ("add1_out": tensor(float),"concat_out": tensor(int64),) -> ("Result": tensor(float),) // Outputs: // "Result": tensor(float) // Inputs' and outputs' format is described in document of NodeArg's operator<< above. diff --git a/include/onnxruntime/core/graph/node_arg.h b/include/onnxruntime/core/graph/node_arg.h index ecb5bdc131..80f5dda2dc 100644 --- a/include/onnxruntime/core/graph/node_arg.h +++ b/include/onnxruntime/core/graph/node_arg.h @@ -106,12 +106,12 @@ class NodeArg { Optional inputs are allowed in ONNX and an empty #Name represents a non-existent input argument. */ bool Exists() const noexcept; - private: - ORT_DISALLOW_COPY_AND_ASSIGNMENT(NodeArg); friend class Graph; NodeArg(NodeArgInfo&& node_arg_info); + private: + ORT_DISALLOW_COPY_AND_ASSIGNMENT(NodeArg); #if !defined(ORT_MINIMAL_BUILD) void SetType(const std::string* p_type); void SetType(const ONNX_NAMESPACE::TypeProto& type_proto); diff --git a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h index 0842400c2c..95bab63ba0 100644 --- a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h +++ b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h @@ -786,7 +786,7 @@ class ThreadPoolTempl : public onnxruntime::concurrency::ExtendedThreadPoolInter // unique_ptr. The explicit deleter avoids the Eigen-specific // definition of ThreadPoolParallelSection needing to be avilable in // threadpool.h where the user-facing parallel section API is defined. - +GSL_SUPPRESS(r.11) std::unique_ptr AllocateParallelSection() override { return std::unique_ptr (new ThreadPoolParallelSection, diff --git a/include/onnxruntime/core/session/environment.h b/include/onnxruntime/core/session/environment.h index f7a4638371..cc511ccad5 100644 --- a/include/onnxruntime/core/session/environment.h +++ b/include/onnxruntime/core/session/environment.h @@ -58,31 +58,31 @@ class Environment { /** * Registers an allocator for sharing between multiple sessions. * Return an error if an allocator with the same OrtMemoryInfo is already registered. - */ + */ Status RegisterAllocator(AllocatorPtr allocator); /** * Creates and registers an allocator for sharing between multiple sessions. * Return an error if an allocator with the same OrtMemoryInfo is already registered. - */ + */ Status CreateAndRegisterAllocator(const OrtMemoryInfo& mem_info, const OrtArenaCfg* arena_cfg = nullptr); /** * Returns the list of registered allocators in this env. - */ + */ const std::vector& GetRegisteredSharedAllocators() const { return shared_allocators_; } /** * Removes registered allocator that was previously registered for sharing between multiple sessions. - */ + */ Status UnregisterAllocator(const OrtMemoryInfo& mem_info); + Environment() = default; + private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Environment); - - Environment() = default; Status Initialize(std::unique_ptr logging_manager, const OrtThreadingOptions* tp_options = nullptr, bool create_global_thread_pools = false); diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 9384ad8700..8c32416061 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -129,6 +129,7 @@ extern "C" { // Used in *.cc files. Almost as same as ORT_API_STATUS, except without ORT_MUST_USE_RESULT and ORT_EXPORT #define ORT_API_STATUS_IMPL(NAME, ...) \ + GSL_SUPPRESS(r .11) \ _Success_(return == 0) _Check_return_ _Ret_maybenull_ OrtStatusPtr ORT_API_CALL NAME(__VA_ARGS__) NO_EXCEPTION #define ORT_CLASS_RELEASE(X) void(ORT_API_CALL * Release##X)(_Frees_ptr_opt_ Ort##X * input) diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 048421099b..1358d13072 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -950,8 +950,14 @@ struct CustomOpBase : OrtCustomOp { OrtCustomOp::GetOutputType = [](const OrtCustomOp* this_, size_t index) { return static_cast(this_)->GetOutputType(index); }; OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) { static_cast(op_kernel)->Compute(context); }; +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 26409) +#endif OrtCustomOp::KernelDestroy = [](void* op_kernel) { delete static_cast(op_kernel); }; - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif OrtCustomOp::GetInputCharacteristic = [](const OrtCustomOp* this_, size_t index) { return static_cast(this_)->GetInputCharacteristic(index); }; OrtCustomOp::GetOutputCharacteristic = [](const OrtCustomOp* this_, size_t index) { return static_cast(this_)->GetOutputCharacteristic(index); }; } diff --git a/onnxruntime/contrib_ops/cpu/activations.h b/onnxruntime/contrib_ops/cpu/activations.h index a156af1405..3bd07bb27b 100644 --- a/onnxruntime/contrib_ops/cpu/activations.h +++ b/onnxruntime/contrib_ops/cpu/activations.h @@ -67,7 +67,7 @@ class Gelu : public OpKernel { concurrency::ThreadPool* tp = context->GetOperatorThreadPool(); int64_t elem_count = input->Shape().Size(); - static const int64_t length_per_task = 4096; // this number comes from FastGelu. + constexpr int64_t length_per_task = 4096; // this number comes from FastGelu. int64_t task_count = (elem_count + length_per_task - 1) / length_per_task; concurrency::ThreadPool::TryBatchParallelFor( tp, static_cast(task_count), diff --git a/onnxruntime/contrib_ops/cpu/attnlstm/bahdanau_attention.cc b/onnxruntime/contrib_ops/cpu/attnlstm/bahdanau_attention.cc index fed268ed9c..6398ff2852 100644 --- a/onnxruntime/contrib_ops/cpu/attnlstm/bahdanau_attention.cc +++ b/onnxruntime/contrib_ops/cpu/attnlstm/bahdanau_attention.cc @@ -8,8 +8,9 @@ #include using onnxruntime::rnn::detail::Allocate; -//TODO: fix the warnings +// TODO: fix the warnings #if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif namespace onnxruntime { diff --git a/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc b/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc index 0a13083d9a..02a09e88db 100644 --- a/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc +++ b/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc @@ -12,6 +12,7 @@ #include "core/framework/allocator.h" //TODO: fix the warnings #if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif namespace onnxruntime { diff --git a/onnxruntime/contrib_ops/cpu/attnlstm/uni_dir_attn_lstm.cc b/onnxruntime/contrib_ops/cpu/attnlstm/uni_dir_attn_lstm.cc index 9c0246347a..f084e72dcd 100644 --- a/onnxruntime/contrib_ops/cpu/attnlstm/uni_dir_attn_lstm.cc +++ b/onnxruntime/contrib_ops/cpu/attnlstm/uni_dir_attn_lstm.cc @@ -16,6 +16,7 @@ #endif //TODO: fix the warnings #if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif using namespace onnxruntime::rnn::detail; diff --git a/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h b/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h index 5ebb515575..db2c93e377 100644 --- a/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h +++ b/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h @@ -12,6 +12,7 @@ //TODO: fix the warnings #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(push) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif namespace onnxruntime { diff --git a/onnxruntime/contrib_ops/cpu/crop_and_resize.cc b/onnxruntime/contrib_ops/cpu/crop_and_resize.cc index f0548f66de..2ed4d32bf2 100644 --- a/onnxruntime/contrib_ops/cpu/crop_and_resize.cc +++ b/onnxruntime/contrib_ops/cpu/crop_and_resize.cc @@ -24,6 +24,7 @@ limitations under the License. #include "core/providers/cpu/object_detection/roialign.h" //TODO: fix the warnings #if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif using namespace onnxruntime::concurrency; diff --git a/onnxruntime/contrib_ops/cpu/nchwc_ops.cc b/onnxruntime/contrib_ops/cpu/nchwc_ops.cc index 6439b2c8f4..c88ffd2dce 100644 --- a/onnxruntime/contrib_ops/cpu/nchwc_ops.cc +++ b/onnxruntime/contrib_ops/cpu/nchwc_ops.cc @@ -68,7 +68,7 @@ Status ReorderInput::Compute(OpKernelContext* context) const { if (channels_last_) { int64_t work_index = static_cast(work.start); - int64_t work_remaining = static_cast(work.end - work.start); + int64_t work_remaining = static_cast(work.end) - work.start; while (work_remaining > 0) { const int64_t batch_index = work_index / spatial_size; @@ -87,7 +87,7 @@ Status ReorderInput::Compute(OpKernelContext* context) const { } } else { int64_t work_index = static_cast(work.start) * nchwc_block_size; - int64_t work_remaining = static_cast(work.end - work.start) * nchwc_block_size; + int64_t work_remaining = (static_cast(work.end) - work.start) * nchwc_block_size; while (work_remaining > 0) { const int64_t batch_index = work_index / nchwc_channels; @@ -331,7 +331,7 @@ Status NchwcUpsample::Compute(OpKernelContext* context) const { auto upsample_worker = [&](ptrdiff_t batch) { auto work = concurrency::ThreadPool::PartitionWork(batch, worker_count, total_work); int64_t work_index = static_cast(work.start); - int64_t work_remaining = static_cast(work.end - work.start); + int64_t work_remaining = static_cast(work.end) - work.start; while (work_remaining > 0) { // Limit the current loop iteration to the same source image. diff --git a/onnxruntime/contrib_ops/cpu/quantization/qlinear_pool.cc b/onnxruntime/contrib_ops/cpu/quantization/qlinear_pool.cc index e97b654994..f06a80512a 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/qlinear_pool.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/qlinear_pool.cc @@ -119,7 +119,7 @@ struct QLinearPoolNhwc1DTask final { int64_t batch = begin / y_image_size; int64_t offset = begin % y_image_size; - for (int64_t remains = end - begin; remains > 0; offset = 0, batch++) { + for (std::ptrdiff_t remains = end - begin; remains > 0; offset = 0, batch++) { if (offset + remains <= y_image_size) { operator()(std::ptrdiff_t(batch), std::ptrdiff_t(offset), std::ptrdiff_t(offset + remains)); remains = 0; @@ -247,7 +247,7 @@ struct QLinearPoolNhwc2DTask final { int64_t batch = begin / y_image_size; int64_t offset = begin % y_image_size; - for (int64_t remains = end - begin; remains > 0; offset = 0, batch++) { + for (int64_t remains = static_cast(end) - begin; remains > 0; offset = 0, batch++) { if (offset + remains <= y_image_size) { operator()(std::ptrdiff_t(batch), std::ptrdiff_t(offset), std::ptrdiff_t(offset + remains)); remains = 0; @@ -268,7 +268,7 @@ struct QLinearPoolNhwc2DTask final { start_pw -= (start_ph * pooled_width); int64_t pool_index = channels * begin; - int64_t remains = end - begin; + std::ptrdiff_t remains = end - begin; std::vector Yh(channels); for (int64_t ph = start_ph; remains > 0 && ph < pooled_height; ++ph) { @@ -281,7 +281,7 @@ struct QLinearPoolNhwc2DTask final { wstart = std::max(wstart, static_cast(0)); // do the pooling here - float pool_init_value = PoolType::Initialize(); + constexpr float pool_init_value = PoolType::Initialize(); std::fill(Yh.data(), Yh.data() + channels, pool_init_value); for (int64_t h = hstart; h < hend; ++h) { int64_t input_index = channels * (h * width + wstart); @@ -415,7 +415,7 @@ struct QLinearPoolNhwc3DTask final { int64_t batch = begin / y_image_size; int64_t offset = begin % y_image_size; - for (int64_t remains = end - begin; remains > 0; offset = 0, batch++) { + for (int64_t remains = static_cast(end) - begin; remains > 0; offset = 0, batch++) { if (offset + remains <= y_image_size) { operator()(std::ptrdiff_t(batch), std::ptrdiff_t(offset), std::ptrdiff_t(offset + remains)); remains = 0; @@ -437,7 +437,7 @@ struct QLinearPoolNhwc3DTask final { int64_t start_pw = start_pd / pooled_depth; start_pd = start_pd - start_pw * pooled_depth; int64_t pool_index = channels * begin; - int64_t remains = end - begin; + int64_t remains = static_cast(end) - begin; std::vector Yh(channels); diff --git a/onnxruntime/contrib_ops/cpu/tokenizer.cc b/onnxruntime/contrib_ops/cpu/tokenizer.cc index d972692950..b26fa11fe2 100644 --- a/onnxruntime/contrib_ops/cpu/tokenizer.cc +++ b/onnxruntime/contrib_ops/cpu/tokenizer.cc @@ -93,7 +93,7 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) { re2::RE2::Options options; options.set_longest_match(true); for (const auto& sep : separators) { - std::unique_ptr regex(new re2::RE2(sep, options)); + std::unique_ptr regex = std::make_unique(sep, options); if (!regex->ok()) { ORT_THROW("Can not digest separators: ", sep, " ", regex->error()); } @@ -104,7 +104,7 @@ Tokenizer::Tokenizer(const OpKernelInfo& info) : OpKernel(info) { assert(!tokenexp.empty()); re2::RE2::Options options; options.set_longest_match(true); - std::unique_ptr regex(new re2::RE2(tokenexp, options)); + std::unique_ptr regex = std::make_unique(tokenexp, options); if (!regex->ok()) { ORT_THROW("Can not digest tokenexp: ", regex->error()); } diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc b/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc index 1225442192..9925501b1d 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search.cc @@ -30,6 +30,8 @@ #ifdef _MSC_VER #pragma warning(pop) +// Could reduce the chance of arithmetic overflow. TODO: fix it +#pragma warning(disable : 26451) #endif using namespace ONNX_NAMESPACE; @@ -431,10 +433,10 @@ Status BeamSearchImpl::ProcessLogits( Tensor::InitOrtValue(element_type, next_token_scores_shape, next_token_scores.data(), allocator->Info(), next_token_scores_value); const Tensor& input = next_token_scores_value.Get(); - const int axis = 1; + constexpr int axis = 1; const unsigned top_k = static_cast(2 * parameters_->num_beams); - const bool largest = true; - const bool sorted = true; // results returned in sorted order. + constexpr bool largest = true; + constexpr bool sorted = true; // results returned in sorted order. std::unique_ptr topk_scores; std::unique_ptr topk_indices; diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc b/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc index bb7aeb989e..d2bb36e0b1 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc @@ -10,7 +10,10 @@ #include "core/providers/cpu/tensor/utils.h" #include "core/providers/cpu/rnn/rnn_helpers.h" #include "beam_search_scorer.h" - +#ifdef _MSC_VER +// Could reduce the chance of arithmetic overflow. TODO: fix it +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace contrib { namespace transformers { @@ -126,7 +129,7 @@ void BeamSearchScorer::Initialize(AllocatorPtr& allocator, int sequence_lengt ORT_ENFORCE(next_beam_scores_.empty()); // Make sure this is called only once. size_t batch_beam_size = static_cast(batch_size_ * num_beams_); - const bool no_fill = false; // do not fill values after allocation + constexpr bool no_fill = false; // do not fill values after allocation next_beam_scores_ = Allocate(allocator, batch_beam_size, next_beam_scores_ptr_, no_fill); next_beam_tokens_ = Allocate(allocator, batch_beam_size, next_beam_tokens_ptr_, no_fill); next_beam_indices_ = Allocate(allocator, batch_beam_size, next_beam_indices_ptr_, no_fill); diff --git a/onnxruntime/contrib_ops/cpu/transformers/gpt_subgraph.cc b/onnxruntime/contrib_ops/cpu/transformers/gpt_subgraph.cc index 80825c7e26..db24804232 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/gpt_subgraph.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/gpt_subgraph.cc @@ -21,7 +21,10 @@ #ifdef _MSC_VER #pragma warning(pop) #endif - +#ifdef _MSC_VER +// Could reduce the chance of arithmetic overflow. TODO: fix it +#pragma warning(disable : 26451) +#endif using namespace ONNX_NAMESPACE; using namespace onnxruntime::common; diff --git a/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc b/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc index 10ef8b6f69..ca8e722bf8 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/logits_processor.cc @@ -1,7 +1,10 @@ #include #include "logits_processor.h" #include "dump_tensor.h" - +#ifdef _MSC_VER +// Could reduce the chance of arithmetic overflow. TODO: fix it +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace contrib { namespace transformers { diff --git a/onnxruntime/contrib_ops/cpu/transformers/sequences.cc b/onnxruntime/contrib_ops/cpu/transformers/sequences.cc index a9c70ef410..87e6aabe95 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/sequences.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/sequences.cc @@ -1,5 +1,8 @@ #include "sequences.h" - +#ifdef _MSC_VER +// Could reduce the chance of arithmetic overflow. TODO: fix it +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace contrib { namespace transformers { diff --git a/onnxruntime/core/common/threadpool.cc b/onnxruntime/core/common/threadpool.cc index eb709dcadc..8357388884 100644 --- a/onnxruntime/core/common/threadpool.cc +++ b/onnxruntime/core/common/threadpool.cc @@ -36,7 +36,10 @@ limitations under the License. #include #endif #endif - +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace concurrency { @@ -66,7 +69,7 @@ void ThreadPoolProfiler::Start() { ThreadPoolProfiler::MainThreadStat& ThreadPoolProfiler::GetMainThreadStat() { static thread_local std::unique_ptr stat; if (!stat) { - stat.reset(new MainThreadStat()); + stat = std::make_unique(); } return *stat; } @@ -336,8 +339,8 @@ class alignas(CACHE_LINE_BYTES) LoopCounter { // Hence, at low thread counts, each of N threads will get its own // shard representing 1/N of the work. constexpr static unsigned GetNumShards(uint64_t num_iterations, - uint64_t d_of_p, - uint64_t block_size) { + uint64_t d_of_p, + uint64_t block_size) { unsigned num_shards = 0; auto num_blocks = num_iterations / block_size; if (num_blocks == 0) { @@ -376,10 +379,10 @@ ThreadPool::ThreadPool(Env* env, int threads_to_create = degree_of_parallelism - 1; extended_eigen_threadpool_ = std::make_unique >(name, - threads_to_create, - low_latency_hint, - *env, - thread_options_); + threads_to_create, + low_latency_hint, + *env, + thread_options_); underlying_threadpool_ = extended_eigen_threadpool_.get(); } } diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc index 98a8bf4d98..c1d3366fdd 100644 --- a/onnxruntime/core/framework/allocator.cc +++ b/onnxruntime/core/framework/allocator.cc @@ -110,7 +110,10 @@ void CPUAllocator::Free(void* p) { } // namespace onnxruntime std::ostream& operator<<(std::ostream& out, const OrtMemoryInfo& info) { return (out << info.ToString()); } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 26409) +#endif ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1, enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out) { if (strcmp(name1, onnxruntime::CPU) == 0) { @@ -126,7 +129,7 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA } else if (strcmp(name1, onnxruntime::OpenVINO_GPU) == 0) { *out = new OrtMemoryInfo( onnxruntime::OpenVINO_GPU, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, static_cast(id1)), - id1, mem_type1); + id1, mem_type1); } else if (strcmp(name1, onnxruntime::DML) == 0) { *out = new OrtMemoryInfo( onnxruntime::DML, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, static_cast(id1)), @@ -138,7 +141,9 @@ ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtA } ORT_API(void, OrtApis::ReleaseMemoryInfo, _Frees_ptr_opt_ OrtMemoryInfo* p) { delete p; } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif ORT_API_STATUS_IMPL(OrtApis::MemoryInfoGetName, _In_ const OrtMemoryInfo* ptr, _Out_ const char** out) { *out = ptr->name; return nullptr; @@ -164,4 +169,3 @@ ORT_API_STATUS_IMPL(OrtApis::CompareMemoryInfo, _In_ const OrtMemoryInfo* info1, *out = (*info1 == *info2) ? 0 : -1; return nullptr; } - diff --git a/onnxruntime/core/framework/bfc_arena.cc b/onnxruntime/core/framework/bfc_arena.cc index 57e4fea00b..9bbb2b419f 100644 --- a/onnxruntime/core/framework/bfc_arena.cc +++ b/onnxruntime/core/framework/bfc_arena.cc @@ -164,8 +164,19 @@ Status BFCArena::Extend(size_t rounded_bytes) { static constexpr float kBackpedalFactor = 0.9f; // Try allocating less memory. while (mem_addr == nullptr) { + // kBackpedalFactor is float, bytes is size_t. The result of bytes * kBackpedalFactor is float. When we cast it to + // size_t, which is a smaller type, it could loss data. This is what C4244 complains. The "static_cast" here + // is to suppress the warning. C26451 suggest we may change kBackpedalFactor to double to get better accuary. But if + // we do that, AMD GPU CI build pipeline will have an "out-of-memory" error. So I choose to keep this piece of code + // untouched and disable the warning first. +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 26451) +#endif bytes = RoundedBytes(static_cast(bytes * kBackpedalFactor)); - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif // give up if we can't satisfy the requested size, or we're attempting an allocation of less than 8K. // // the latter protects against an infinite loop that occurs when bytes is less than 2560. at that point the 10% diff --git a/onnxruntime/core/framework/bfc_arena.h b/onnxruntime/core/framework/bfc_arena.h index f38d6b64e0..e664ba50d1 100644 --- a/onnxruntime/core/framework/bfc_arena.h +++ b/onnxruntime/core/framework/bfc_arena.h @@ -211,7 +211,7 @@ class BFCArena : public IAllocator { ORT_ENFORCE(0 == memory_size % kMinAllocationSize); const size_t n_handles = (memory_size + kMinAllocationSize - 1) / kMinAllocationSize; - handles_ = new ChunkHandle[n_handles]; + handles_ = std::make_unique(n_handles); for (size_t i = 0; i < n_handles; i++) { handles_[i] = kInvalidChunkHandle; } @@ -219,7 +219,7 @@ class BFCArena : public IAllocator { AllocationRegion() = default; - ~AllocationRegion() { delete[] handles_; } + ~AllocationRegion() = default; AllocationRegion(AllocationRegion&& other) noexcept { Swap(other); } @@ -266,7 +266,7 @@ class BFCArena : public IAllocator { // Array of size "memory_size / kMinAllocationSize". It is // indexed by (p-base) / kMinAllocationSize, contains ChunkHandle // for the memory allocation represented by "p" - ChunkHandle* handles_ = nullptr; + std::unique_ptr handles_; ORT_DISALLOW_ASSIGNMENT(AllocationRegion); }; diff --git a/onnxruntime/core/framework/callback.cc b/onnxruntime/core/framework/callback.cc index deb4d1e277..7e288b1a66 100644 --- a/onnxruntime/core/framework/callback.cc +++ b/onnxruntime/core/framework/callback.cc @@ -2,7 +2,9 @@ // Licensed under the MIT License. #include "core/framework/callback.h" - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif namespace onnxruntime { void OrtRunCallback(OrtCallback* f) noexcept { if (f == nullptr) return; diff --git a/onnxruntime/core/framework/data_types.cc b/onnxruntime/core/framework/data_types.cc index dc66fba500..90afec10e9 100644 --- a/onnxruntime/core/framework/data_types.cc +++ b/onnxruntime/core/framework/data_types.cc @@ -314,7 +314,10 @@ struct TensorTypeBase::Impl : public data_types_internal::TypeProtoImpl { const ONNX_NAMESPACE::TypeProto* TensorTypeBase::GetTypeProto() const { return impl_->GetProto(); } - +//TODO: Fix the warning +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif TensorTypeBase::TensorTypeBase() : DataTypeImpl{DataTypeImpl::GeneralType::kTensor, sizeof(Tensor)}, impl_(new Impl()) {} diff --git a/onnxruntime/core/framework/error_code.cc b/onnxruntime/core/framework/error_code.cc index 0bcad00233..d1d509fbe0 100644 --- a/onnxruntime/core/framework/error_code.cc +++ b/onnxruntime/core/framework/error_code.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/common/gsl_suppress.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" #include "core/common/status.h" @@ -66,5 +67,7 @@ ORT_API(OrtErrorCode, OrtApis::GetErrorCode, _In_ const OrtStatus* status) { ORT_API(const char*, OrtApis::GetErrorMessage, _In_ const OrtStatus* status) { return status->msg; } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif ORT_API(void, OrtApis::ReleaseStatus, _Frees_ptr_opt_ OrtStatus* value) { delete[] reinterpret_cast(value); } diff --git a/onnxruntime/core/framework/func_kernel.h b/onnxruntime/core/framework/func_kernel.h index ece2c24304..9bf7a5bff6 100644 --- a/onnxruntime/core/framework/func_kernel.h +++ b/onnxruntime/core/framework/func_kernel.h @@ -13,21 +13,29 @@ void release_helper_func(void* allocator, void* p); //A kernel that wrapper the ComputeFunction call generated by execution provider when fuse the sub-graph class FunctionKernel : public OpKernel { public: + explicit FunctionKernel(const OpKernelInfo& info, const NodeComputeInfo* compute) : OpKernel(info), compute_info_(compute) {} + //The original design is we load the dll, find the entry point and wrapper it. //Here for quick prototype, we keep the entry pointer in the node. - explicit FunctionKernel(const OpKernelInfo& info) : OpKernel(info) { - num_inputs_ = info.node().InputDefs().size(); - num_outputs_ = info.node().OutputDefs().size(); - auto status = info.GetFusedFuncs(compute_info_); - ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); - if (compute_info_->create_state_func) { + static Status Create(FuncManager& func_mgr, const OpKernelInfo& info, std::unique_ptr& out) { + const NodeComputeInfo* compute; + ORT_RETURN_IF_ERROR(func_mgr.GetFuncs(info.node().Name(), compute)); + std::unique_ptr funckernel = std::make_unique(info, compute); + funckernel->num_inputs_ = info.node().InputDefs().size(); + funckernel->num_outputs_ = info.node().OutputDefs().size(); + + if (compute->create_state_func) { //TODO: we are only provide host allocate method in compute context. //Do we need to hold the ref-counting here? - host_allocator_ = info.GetAllocator(0, OrtMemType::OrtMemTypeDefault); - ComputeContext context = {allocate_helper_func, release_helper_func, host_allocator_.get(), + funckernel->host_allocator_ = info.GetAllocator(0, OrtMemType::OrtMemTypeDefault); + ComputeContext context = {allocate_helper_func, release_helper_func, funckernel->host_allocator_.get(), info.node().Name().c_str()}; - ORT_ENFORCE(compute_info_->create_state_func(&context, &func_state_) == 0); + int ret = funckernel->compute_info_->create_state_func(&context, &funckernel->func_state_); + if (ret != 0) + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Create state function failed. Return value:", ret); } + out = std::move(funckernel); + return Status::OK(); } ~FunctionKernel() override { @@ -38,12 +46,14 @@ class FunctionKernel : public OpKernel { virtual Status Compute(OpKernelContext* context) const override { auto* context_internal = static_cast(context); - return compute_info_->compute_func(func_state_, OrtGetApiBase()->GetApi(ORT_API_VERSION), + const OrtApi* api = OrtGetApiBase()->GetApi(ORT_API_VERSION); + if (api == nullptr) return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "API VERSION ", ORT_API_VERSION, " is invalid."); + return compute_info_->compute_func(func_state_, api, reinterpret_cast(context_internal)); } private: - NodeComputeInfo* compute_info_{nullptr}; + const NodeComputeInfo* const compute_info_; FunctionState func_state_{nullptr}; size_t num_inputs_; size_t num_outputs_; diff --git a/onnxruntime/core/framework/fuse_nodes_funcs.cc b/onnxruntime/core/framework/fuse_nodes_funcs.cc index f4678afbed..d75b2eba16 100644 --- a/onnxruntime/core/framework/fuse_nodes_funcs.cc +++ b/onnxruntime/core/framework/fuse_nodes_funcs.cc @@ -22,7 +22,7 @@ Status FuncManager::AddFuncInfo(const std::string& name, NodeComputeInfo&& compu return Status::OK(); } -Status FuncManager::GetFuncs(const std::string& name, NodeComputeInfo*& compute_info) const { +Status FuncManager::GetFuncs(const std::string& name, const NodeComputeInfo*& compute_info) { auto it = fused_funcs_->find(name); if (it == fused_funcs_->end()) return Status(common::ONNXRUNTIME, common::FAIL, "func info for node: " + name + " not found."); @@ -30,7 +30,7 @@ Status FuncManager::GetFuncs(const std::string& name, NodeComputeInfo*& compute_ if (!it->second.compute_info.compute_func) { //load from path void* handle = nullptr; - ORT_RETURN_IF_ERROR(lib_loader_->LoadExternalLib(it->second.dso_path, &handle)); + ORT_RETURN_IF_ERROR(lib_loader_.LoadExternalLib(it->second.dso_path, &handle)); void* create_func_symbol_handle = nullptr; ORT_RETURN_IF_ERROR(Env::Default().GetSymbolFromLibrary(handle, kCreateStateFuncSymbol + name, diff --git a/onnxruntime/core/framework/fuse_nodes_funcs.h b/onnxruntime/core/framework/fuse_nodes_funcs.h index 665f7d5d22..628880b066 100644 --- a/onnxruntime/core/framework/fuse_nodes_funcs.h +++ b/onnxruntime/core/framework/fuse_nodes_funcs.h @@ -8,15 +8,15 @@ namespace onnxruntime { class FuncManager { public: FuncManager() - : fused_funcs_(std::make_shared >()), - lib_loader_(std::make_unique()) { + : fused_funcs_(std::make_shared >()) { } Status AddFuncInfo(const std::string& name, const std::string& dll_path); Status AddFuncInfo(const std::string& name, NodeComputeInfo&& compute_info); - Status GetFuncs(const std::string& name, NodeComputeInfo*& compute_info) const; + //Do not call AddFuncInfo after this function is called. + Status GetFuncs(const std::string& name, const NodeComputeInfo*& compute_info); size_t NumFuncs() const { return fused_funcs_->size(); } @@ -38,7 +38,7 @@ class FuncManager { // because it's filled in by the time main graph is traversed, // while subgraph session state is created later std::shared_ptr > fused_funcs_; - std::unique_ptr lib_loader_; + ExLibLoader lib_loader_; ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(FuncManager); }; } // namespace onnxruntime diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc index eff7992ce4..1ebccacad4 100644 --- a/onnxruntime/core/framework/graph_partitioner.cc +++ b/onnxruntime/core/framework/graph_partitioner.cc @@ -258,10 +258,9 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, bool export_dll, FuncMa KernelDefBuilder builder; BuildFusedKernelDef(builder, *node); ORT_RETURN_IF_ERROR(fused_kernel_registry.Register(builder, - static_cast( - [](const OpKernelInfo& info) -> OpKernel* { - return new FunctionKernel(info); - }))); + [](FuncManager& func_mgr, const OpKernelInfo& info, std::unique_ptr& out) -> Status { + return FunctionKernel::Create(func_mgr, info, out); + })); } } else { @@ -298,10 +297,9 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, bool export_dll, FuncMa KernelDefBuilder builder; BuildFusedKernelDef(builder, metadef, type); ORT_RETURN_IF_ERROR(fused_kernel_registry.Register(builder, - static_cast( - [](const OpKernelInfo& info) -> OpKernel* { - return new FunctionKernel(info); - }))); + [](FuncManager& func_mgr, const OpKernelInfo& info, std::unique_ptr& out) -> Status { + return FunctionKernel::Create(func_mgr, info, out); + })); // now that we're done compiling we can remove the original nodes from the Graph and wire in the new one graph.FinalizeFuseSubGraph(indexed_sub_graph, *node); @@ -477,10 +475,10 @@ static Status PartitionOrtFormatModelImpl(Graph& graph, FuncManager& func_mgr, } ORT_RETURN_IF_ERROR(fused_kernel_registry.Register( - KernelCreateInfo(std::move(kernel_def), static_cast( - [](const OpKernelInfo& info) -> OpKernel* { - return new FunctionKernel(info); - })))); + KernelCreateInfo(std::move(kernel_def), + [](FuncManager& func_mgr, const OpKernelInfo& info, std::unique_ptr& out) -> Status { + return FunctionKernel::Create(func_mgr, info, out); + }))); // now that we're done compiling we can remove the original nodes from the Graph and wire in the new one graph.FinalizeFuseSubGraph(indexed_sub_graph, node); diff --git a/onnxruntime/core/framework/kernel_registry.cc b/onnxruntime/core/framework/kernel_registry.cc index 07db21c6b2..de6c47e018 100644 --- a/onnxruntime/core/framework/kernel_registry.cc +++ b/onnxruntime/core/framework/kernel_registry.cc @@ -206,7 +206,7 @@ Status KernelRegistry::TryCreateKernel(const Node& node, const IExecutionProvider& execution_provider, const std::unordered_map& constant_initialized_tensors, const OrtValueNameIdxMap& ort_value_name_idx_map, - const FuncManager& funcs_mgr, + FuncManager& funcs_mgr, const DataTransferManager& data_transfer_mgr, /*out*/ std::unique_ptr& op_kernel) const { const KernelCreateInfo* kernel_create_info = nullptr; @@ -216,10 +216,8 @@ Status KernelRegistry::TryCreateKernel(const Node& node, execution_provider, constant_initialized_tensors, ort_value_name_idx_map, - funcs_mgr, data_transfer_mgr); - op_kernel.reset(kernel_create_info->kernel_create_func(kernel_info)); - return Status::OK(); + return kernel_create_info->kernel_create_func(funcs_mgr, kernel_info, op_kernel); } static std::string ToString(const std::vector& error_strs) { diff --git a/onnxruntime/core/framework/kernel_registry_manager.cc b/onnxruntime/core/framework/kernel_registry_manager.cc index e2137f072a..2041b0d7ba 100644 --- a/onnxruntime/core/framework/kernel_registry_manager.cc +++ b/onnxruntime/core/framework/kernel_registry_manager.cc @@ -13,18 +13,17 @@ using namespace ONNX_NAMESPACE; using namespace ::onnxruntime::common; namespace onnxruntime { -std::unique_ptr KernelRegistryManager::CreateKernel(const onnxruntime::Node& node, - const IExecutionProvider& execution_provider, - const SessionState& session_state, - const KernelCreateInfo& kernel_create_info) const { +Status KernelRegistryManager::CreateKernel(const onnxruntime::Node& node, + const IExecutionProvider& execution_provider, + SessionState& session_state, + const KernelCreateInfo& kernel_create_info, + std::unique_ptr& out) const { OpKernelInfo kernel_info(node, *kernel_create_info.kernel_def, execution_provider, session_state.GetConstantInitializedTensors(), session_state.GetOrtValueNameIdxMap(), - session_state.GetFuncMgr(), session_state.GetDataTransferMgr()); - // OpKernel is abstract base class so can't use make_unique - return std::unique_ptr(kernel_create_info.kernel_create_func(kernel_info)); + return kernel_create_info.kernel_create_func(session_state.GetMutableFuncMgr(), kernel_info, out); } Status KernelRegistryManager::RegisterKernels(const ExecutionProviders& execution_providers) { diff --git a/onnxruntime/core/framework/kernel_registry_manager.h b/onnxruntime/core/framework/kernel_registry_manager.h index 6a4fb6defc..4d9da148af 100644 --- a/onnxruntime/core/framework/kernel_registry_manager.h +++ b/onnxruntime/core/framework/kernel_registry_manager.h @@ -77,10 +77,10 @@ class KernelRegistryManager { bool SearchKernelRegistriesByHash(HashValue kernel_def_hash, const KernelCreateInfo** kernel_create_info) const; - std::unique_ptr CreateKernel(const onnxruntime::Node& node, - const IExecutionProvider& execution_provider, - const SessionState& session_state, - const KernelCreateInfo& kernel_create_info) const ORT_MUST_USE_RESULT; + Status CreateKernel(const onnxruntime::Node& node, + const IExecutionProvider& execution_provider, + SessionState& session_state, + const KernelCreateInfo& kernel_create_info, std::unique_ptr& out) const; ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(KernelRegistryManager); diff --git a/onnxruntime/core/framework/onnxruntime_map_type_info.cc b/onnxruntime/core/framework/onnxruntime_map_type_info.cc index e0b5ffc538..9b18ba6703 100644 --- a/onnxruntime/core/framework/onnxruntime_map_type_info.cc +++ b/onnxruntime/core/framework/onnxruntime_map_type_info.cc @@ -32,7 +32,9 @@ ToONNXTensorElementDataType(ONNX_NAMESPACE::TensorProto_DataType data_type) { default: { return ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; } } } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif OrtStatus* OrtMapTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* type_proto, OrtMapTypeInfo** out) { auto value_case = type_proto->value_case(); if (value_case != ONNX_NAMESPACE::TypeProto::kMapType) diff --git a/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc b/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc index 18427fd5fc..acae583c7a 100644 --- a/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc +++ b/onnxruntime/core/framework/onnxruntime_sequence_type_info.cc @@ -6,21 +6,20 @@ #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" -OrtSequenceTypeInfo::OrtSequenceTypeInfo(OrtTypeInfo* sequence_key_type) noexcept : - sequence_key_type_(sequence_key_type, &OrtApis::ReleaseTypeInfo) { +OrtSequenceTypeInfo::OrtSequenceTypeInfo(OrtTypeInfo* sequence_key_type) noexcept : sequence_key_type_(sequence_key_type, &OrtApis::ReleaseTypeInfo) { } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif OrtStatus* OrtSequenceTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* type_proto, OrtSequenceTypeInfo** out) { auto value_case = type_proto->value_case(); - if (value_case != ONNX_NAMESPACE::TypeProto::kSequenceType) - { - return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "type_proto is not of type sequence!");; + if (value_case != ONNX_NAMESPACE::TypeProto::kSequenceType) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "type_proto is not of type sequence!"); } auto type_proto_sequence = type_proto->sequence_type(); OrtTypeInfo* sequence_key_type_info = nullptr; - if (auto status = OrtTypeInfo::FromTypeProto(&type_proto_sequence.elem_type(), &sequence_key_type_info)) - { + if (auto status = OrtTypeInfo::FromTypeProto(&type_proto_sequence.elem_type(), &sequence_key_type_info)) { return status; } @@ -30,8 +29,7 @@ OrtStatus* OrtSequenceTypeInfo::FromTypeProto(const ONNX_NAMESPACE::TypeProto* t OrtStatus* OrtSequenceTypeInfo::Clone(OrtSequenceTypeInfo** out) { OrtTypeInfo* sequence_key_type_copy = nullptr; - if (auto status = sequence_key_type_->Clone(&sequence_key_type_copy)) - { + if (auto status = sequence_key_type_->Clone(&sequence_key_type_copy)) { return status; } *out = new OrtSequenceTypeInfo(sequence_key_type_copy); diff --git a/onnxruntime/core/framework/onnxruntime_typeinfo.cc b/onnxruntime/core/framework/onnxruntime_typeinfo.cc index 5a26fe2066..e3a07f84ef 100644 --- a/onnxruntime/core/framework/onnxruntime_typeinfo.cc +++ b/onnxruntime/core/framework/onnxruntime_typeinfo.cc @@ -27,7 +27,9 @@ using onnxruntime::Tensor; using onnxruntime::TensorShape; namespace on = ONNX_NAMESPACE; - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif OrtTypeInfo::OrtTypeInfo(ONNXType type1) noexcept : type(type1) { } diff --git a/onnxruntime/core/framework/onnxruntime_typeinfo.h b/onnxruntime/core/framework/onnxruntime_typeinfo.h index 5b9145d32e..67e270f5be 100644 --- a/onnxruntime/core/framework/onnxruntime_typeinfo.h +++ b/onnxruntime/core/framework/onnxruntime_typeinfo.h @@ -4,6 +4,7 @@ #pragma once #include #include +#include "core/common/gsl_suppress.h" #include "core/session/onnxruntime_c_api.h" namespace onnxruntime { diff --git a/onnxruntime/core/framework/op_kernel_info.cc b/onnxruntime/core/framework/op_kernel_info.cc index 4be25b5fe7..0ae43026e6 100644 --- a/onnxruntime/core/framework/op_kernel_info.cc +++ b/onnxruntime/core/framework/op_kernel_info.cc @@ -13,7 +13,6 @@ OpKernelInfo::OpKernelInfo(const onnxruntime::Node& node, const IExecutionProvider& execution_provider, const std::unordered_map& constant_initialized_tensors, const OrtValueNameIdxMap& ort_value_name_idx_map, - const FuncManager& funcs_mgr, const DataTransferManager& data_transfer_mgr) : OpNodeProtoHelper(&proto_helper_context_), node_(node), @@ -21,13 +20,12 @@ OpKernelInfo::OpKernelInfo(const onnxruntime::Node& node, execution_provider_(&execution_provider), constant_initialized_tensors_(constant_initialized_tensors), ort_value_name_idx_map_(ort_value_name_idx_map), - funcs_mgr_(funcs_mgr), data_transfer_mgr_(data_transfer_mgr), proto_helper_context_(node) {} OpKernelInfo::OpKernelInfo(const OpKernelInfo& other) : OpKernelInfo(other.node_, other.kernel_def_, *other.execution_provider_, other.constant_initialized_tensors_, - other.ort_value_name_idx_map_, other.funcs_mgr_, other.data_transfer_mgr_) {} + other.ort_value_name_idx_map_, other.data_transfer_mgr_) {} const OrtMemoryInfo& OpKernelInfo::GetMemoryInfo(int device_id, OrtMemType mem_type) const { AllocatorPtr alloc = GetAllocator(device_id, mem_type); @@ -79,7 +77,4 @@ bool OpKernelInfo::TryGetConstantInput(int input_index, const Tensor** constant_ return true; } -common::Status OpKernelInfo::GetFusedFuncs(NodeComputeInfo*& compute_info) const { - return funcs_mgr_.GetFuncs(node_.Name(), compute_info); -} } // namespace onnxruntime diff --git a/onnxruntime/core/framework/run_options.cc b/onnxruntime/core/framework/run_options.cc index 0a62265515..95c111009c 100644 --- a/onnxruntime/core/framework/run_options.cc +++ b/onnxruntime/core/framework/run_options.cc @@ -5,7 +5,9 @@ #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif ORT_API_STATUS_IMPL(OrtApis::CreateRunOptions, _Outptr_ OrtRunOptions** out) { API_IMPL_BEGIN *out = new OrtRunOptions(); diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index 2f17779238..09e679a657 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -5,6 +5,7 @@ #include #include +#include "core/common/gsl_suppress.h" #include "core/session/onnxruntime_c_api.h" #include "core/optimizer/graph_transformer_level.h" #include "core/util/thread_utils.h" diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 0414d99553..f93b912618 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -164,7 +164,7 @@ Status SessionState::CreateKernels(const KernelRegistryManager& kernel_registry_ max_nodeid = std::max(max_nodeid, node.Index()); } session_kernels_.clear(); - session_kernels_.resize(max_nodeid + 1, nullptr); + session_kernels_.resize(max_nodeid + 1); for (const auto& node : nodes) { // construct and save the kernels const KernelCreateInfo& kci = GetNodeKernelCreateInfo(node.Index()); @@ -173,10 +173,8 @@ Status SessionState::CreateKernels(const KernelRegistryManager& kernel_registry_ onnxruntime::ProviderType exec_provider_name = node.GetExecutionProviderType(); const IExecutionProvider& exec_provider = *execution_providers_.Get(exec_provider_name); - auto op_kernel = kernel_registry_manager.CreateKernel(node, exec_provider, *this, kci); - // assumes vector is already resize()'ed to the number of nodes in the graph - session_kernels_[node.Index()] = op_kernel.release(); + ORT_RETURN_IF_ERROR(kernel_registry_manager.CreateKernel(node, exec_provider, *this, kci, session_kernels_[node.Index()])); } } node_index_info_ = std::make_unique(*graph_viewer_, ort_value_name_idx_map_); diff --git a/onnxruntime/core/framework/session_state.h b/onnxruntime/core/framework/session_state.h index bc3be7b01f..b8ec5cb4f8 100644 --- a/onnxruntime/core/framework/session_state.h +++ b/onnxruntime/core/framework/session_state.h @@ -109,9 +109,6 @@ class SessionState { } ~SessionState() { - for (auto* p : session_kernels_) { - delete p; - } for (auto& kvp : deleter_for_initialized_tensors_) { kvp.second.f(kvp.second.param); } @@ -124,11 +121,11 @@ class SessionState { // Get kernel for specified node. // It should called right before graph execution only. const OpKernel* GetKernel(size_t node_id) const { - return (node_id < session_kernels_.size()) ? session_kernels_[node_id] : nullptr; + return (node_id < session_kernels_.size()) ? session_kernels_[node_id].get() : nullptr; } OpKernel* GetMutableKernel(size_t node_id) { - return (node_id < session_kernels_.size()) ? session_kernels_[node_id] : nullptr; + return (node_id < session_kernels_.size()) ? session_kernels_[node_id].get() : nullptr; } const ExecutionProviders& GetExecutionProviders() const noexcept { return execution_providers_; } @@ -144,27 +141,27 @@ class SessionState { const OrtValueNameIdxMap& GetOrtValueNameIdxMap() const noexcept { return ort_value_name_idx_map_; } /** - * Adds an initialized tensor (weight) so that it can be used by the - * execution frame to setup the appropriate OrtValue vectors. - * This function will take a shallow copy of d if d is not NULL. - * If 'constant' is true the tensor value cannot be overridden by an input at runtime. - * If 'sparse' is true the tensor value represents a densified weight that was initially stored in the model - * as sparse tensor. - */ + * Adds an initialized tensor (weight) so that it can be used by the + * execution frame to setup the appropriate OrtValue vectors. + * This function will take a shallow copy of d if d is not NULL. + * If 'constant' is true the tensor value cannot be overridden by an input at runtime. + * If 'sparse' is true the tensor value represents a densified weight that was initially stored in the model + * as sparse tensor. + */ Status AddInitializedTensor(int ort_value_index, const OrtValue& ort_value, const OrtCallback* d, bool constant, bool sparse); /** - * Gets the map of ort_value_index to initialized tensors (weights) so that it can be used by the - * execution frame to setup the appropriate OrtValue vectors. - * The lifetime of returned OrtValues are limited by this SessionState object. - */ + * Gets the map of ort_value_index to initialized tensors (weights) so that it can be used by the + * execution frame to setup the appropriate OrtValue vectors. + * The lifetime of returned OrtValues are limited by this SessionState object. + */ const std::unordered_map& GetInitializedTensors() const; /** - * Gets the map of ort_value_index to initialized tensors (e.g. weights) that are constant - * and cannot be overridden at runtime. - * The lifetime of returned OrtValues are limited by this SessionState object. - */ + * Gets the map of ort_value_index to initialized tensors (e.g. weights) that are constant + * and cannot be overridden at runtime. + * The lifetime of returned OrtValues are limited by this SessionState object. + */ const std::unordered_map& GetConstantInitializedTensors() const; #if !defined(DISABLE_SPARSE_TENSORS) @@ -355,9 +352,9 @@ class SessionState { void CleanInitializedTensorsFromGraph(); /** - * Prepack the constant initialized tensors for better performance. - * The original constant initialized tensors will be removed to save memory. - */ + * Prepack the constant initialized tensors for better performance. + * The original constant initialized tensors will be removed to save memory. + */ Status PrepackConstantInitializedTensors(std::unordered_map& constant_initializers_use_count, const std::unordered_map& initializers_to_share_map); @@ -402,7 +399,7 @@ class SessionState { std::unordered_map compiled_kernel_hashes_; // cache of the constructed kernels to avoid spending construction time per executor - std::vector session_kernels_; + std::vector> session_kernels_; Graph& graph_; std::unique_ptr graph_viewer_; // GraphViewer for const access to Graph diff --git a/onnxruntime/core/framework/sparse_tensor.cc b/onnxruntime/core/framework/sparse_tensor.cc index 223670392a..83ada96c05 100644 --- a/onnxruntime/core/framework/sparse_tensor.cc +++ b/onnxruntime/core/framework/sparse_tensor.cc @@ -22,7 +22,7 @@ std::ostream& operator<<(std::ostream& os, SparseFormat flags) { namespace { // Round up size to a multiple of int64 constexpr size_t kIndexAlignment = alignof(int64_t); -inline int64_t Roundup(int64_t size) { +constexpr inline int64_t Roundup(int64_t size) { return ((SafeInt(size) + kIndexAlignment - 1) / kIndexAlignment) * kIndexAlignment; } @@ -31,7 +31,7 @@ inline int64_t Roundup(int64_t size) { /// after data and make sure indices start at int64_t aligned place /// /// -inline int64_t CalculateRequiredBufferSize(int64_t data_size, int64_t indices_size) { +constexpr inline int64_t CalculateRequiredBufferSize(int64_t data_size, int64_t indices_size) { return SafeInt(Roundup(data_size)) + indices_size; } diff --git a/onnxruntime/core/framework/tensor_type_and_shape.cc b/onnxruntime/core/framework/tensor_type_and_shape.cc index e1dafc2f83..5f29fabcec 100644 --- a/onnxruntime/core/framework/tensor_type_and_shape.cc +++ b/onnxruntime/core/framework/tensor_type_and_shape.cc @@ -24,7 +24,9 @@ using onnxruntime::MLFloat16; using onnxruntime::SparseTensor; #endif using onnxruntime::Tensor; - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif ORT_API_STATUS_IMPL(OrtApis::CreateTensorTypeAndShapeInfo, _Outptr_ OrtTensorTypeAndShapeInfo** out) { API_IMPL_BEGIN *out = new OrtTensorTypeAndShapeInfo(); @@ -317,10 +319,10 @@ ORT_API_STATUS_IMPL(OrtApis::GetValueType, _In_ const OrtValue* v, _Out_ ONNXTyp } /** -* Get the type information of an OrtValue -* \param value -* \return The returned value should be freed by OrtReleaseTypeInfo after use -*/ + * Get the type information of an OrtValue + * \param value + * \return The returned value should be freed by OrtReleaseTypeInfo after use + */ ORT_API_STATUS_IMPL(OrtApis::GetTypeInfo, _In_ const OrtValue* v, _Outptr_result_maybenull_ struct OrtTypeInfo** out) { API_IMPL_BEGIN // TODO: This is consistent with the previous implementation but inconsistent with GetValueType which returns diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index ca7416fce0..2b6d2bbed1 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -533,7 +533,9 @@ ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enu ptr[i].~string(); } } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif class AutoDelete { public: OrtCallback d{nullptr, nullptr}; @@ -594,7 +596,7 @@ static Status GetFileContent( * @param tensor_proto tensor data in protobuf format * @param tensorp pre-allocated tensor object, where we store the data * @return -*/ + */ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path, const ONNX_NAMESPACE::TensorProto& tensor_proto, Tensor& tensor) { @@ -830,7 +832,7 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n break; } #else - ORT_UNUSED_PARAMETER(model_path); + ORT_UNUSED_PARAMETER(model_path); #endif default: ORT_THROW("Unsupported attribute value type of ", constant_attribute.type(), @@ -1098,7 +1100,6 @@ inline void CopyElement(void* dst, const void* src, int64_t dst_index, reinterpret_cast(dst)[dst_index] = reinterpret_cast(src)[src_index]; } - template static void SetIndices(gsl::span gathered_indices, std::string& raw_indices, @@ -1109,7 +1110,8 @@ static void SetIndices(gsl::span gathered_indices, for (auto src_index : gathered_indices) { ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) { ind_dest[dest_index] = static_cast(src_index); - } else { + } + else { auto* dst = ind_dest + dest_index; T v = static_cast(src_index); memcpy(dst, &v, sizeof(T)); diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc index 840b0321ac..bf76d3bcbd 100644 --- a/onnxruntime/core/framework/utils.cc +++ b/onnxruntime/core/framework/utils.cc @@ -563,14 +563,14 @@ static common::Status ExecuteGraphImpl(const SessionState& session_state, const logging::Logger& logger, const bool only_execute_path_to_fetches = false) { std::unique_ptr p_exec; if (execution_mode == ExecutionMode::ORT_SEQUENTIAL) { - p_exec = std::unique_ptr(new SequentialExecutor(terminate_flag, only_execute_path_to_fetches)); + p_exec = std::make_unique(terminate_flag, only_execute_path_to_fetches); } else if (execution_mode == ExecutionMode::ORT_PARALLEL) { auto* p_inter_op_thread_pool = session_state.GetInterOpThreadPool(); if (!p_inter_op_thread_pool) { LOGS(logger, WARNING) << "Only one thread was configured for parallel execution. Hence will use sequential execution."; - p_exec = std::unique_ptr(new SequentialExecutor(terminate_flag, only_execute_path_to_fetches)); + p_exec = std::make_unique(terminate_flag, only_execute_path_to_fetches); } else { - p_exec = std::unique_ptr(new ParallelExecutor(session_state, terminate_flag)); + p_exec = std::make_unique(session_state, terminate_flag); } } diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 3e81f43850..f854839abe 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -705,7 +705,7 @@ flatbuffers::Offset Node::SaveEdgesToOrtFormat(flatbuffers::FlatB Status Node::LoadFromOrtFormat(const onnxruntime::fbs::Node& fbs_node, Graph& graph, const logging::Logger& logger, std::unique_ptr& node) { - node.reset(new Node(fbs_node.index(), graph)); + node = std::make_unique(fbs_node.index(), graph); return node->LoadFromOrtFormat(fbs_node, logger); } @@ -860,9 +860,9 @@ void Node::CreateSubgraph(const std::string& attr_name) { if (attr != attributes_.cend() && utils::HasGraph(attr->second)) { GraphProto& mutable_graph = *attr->second.mutable_g(); - std::unique_ptr subgraph{new Graph(*graph_, *this, mutable_graph)}; + std::unique_ptr subgraph = std::make_unique(*graph_, *this, mutable_graph); attr_to_subgraph_map_.insert({std::string(attr_name), gsl::not_null{subgraph.get()}}); - subgraphs_.push_back(std::move(subgraph)); + subgraphs_.emplace_back(std::move(subgraph)); } } @@ -3999,7 +3999,7 @@ Status Graph::InlineFunction(Node& node) { // main graph. const Graph& subgraph = node.GetFunctionBody()->Body(); auto output_edges = node.GetRelationships().output_edges; - for (auto output_edge : output_edges) { + for (const auto& output_edge : output_edges) { RemoveEdge(node.Index(), output_edge.GetNode().Index(), output_edge.GetSrcArgIndex(), output_edge.GetDstArgIndex()); } @@ -4130,7 +4130,7 @@ void Graph::SetNodeArgType(NodeArg& arg, const ONNX_NAMESPACE::TypeProto& type_p GraphResolveNeeded(true); } -#endif // !defined(ORT_MINIMAL_BUILD) +#endif // !defined(ORT_MINIMAL_BUILD) Graph::~Graph() { // nothing to do, but we put it here so we don't need to fully define types in Graph that are held in unique_ptr @@ -4216,12 +4216,11 @@ Status Graph::LoadFromOrtFormat(const onnxruntime::fbs::Graph& fbs_graph, IOnnxRuntimeOpSchemaCollectionPtr schema_registry, #endif const logging::Logger& logger, std::unique_ptr& graph) { - // can't use make_unique as we're calling a private ctor - graph.reset(new Graph(owning_model, domain_to_version, + graph = std::make_unique(owning_model, domain_to_version, #if !defined(ORT_MINIMAL_BUILD) - schema_registry, + schema_registry, #endif - nullptr, nullptr, logger)); + nullptr, nullptr, logger); ORT_RETURN_IF_ERROR(graph->LoadFromOrtFormat(fbs_graph)); @@ -4240,14 +4239,13 @@ Status Graph::LoadFromOrtFormat(const onnxruntime::fbs::Graph& fbs_graph, Status Graph::LoadFromOrtFormat(const onnxruntime::fbs::Graph& fbs_graph, Graph& parent_graph, const Node& parent_node, const logging::Logger& logger, std::unique_ptr& graph) { - // can't use make_unique as we're calling a private ctor - graph.reset(new Graph(parent_graph.owning_model_, - parent_graph.domain_to_version_, + graph = std::make_unique(parent_graph.owning_model_, + parent_graph.domain_to_version_, #if !defined(ORT_MINIMAL_BUILD) - parent_graph.schema_registry_, + parent_graph.schema_registry_, #endif - &parent_graph, &parent_node, - logger)); + &parent_graph, &parent_node, + logger); return graph->LoadFromOrtFormat(fbs_graph); } @@ -4348,8 +4346,7 @@ common::Status Graph::LoadFromOrtFormat(const onnxruntime::fbs::Graph& fbs_graph ORT_RETURN_IF(nullptr == fbs_value_info, "NodeArg is missing. Invalid ORT format model."); NodeArgInfo node_arg_info; ORT_RETURN_IF_ERROR(fbs::utils::LoadValueInfoOrtFormat(*fbs_value_info, node_arg_info)); - // NodeArg ctor is private, cannot use make_unique - node_args_[fbs_value_info->name()->str()] = std::unique_ptr(new NodeArg(std::move(node_arg_info))); + node_args_[fbs_value_info->name()->str()] = std::make_unique(std::move(node_arg_info)); } } diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc index f04b32d6b6..e9e34c17cd 100644 --- a/onnxruntime/core/graph/model.cc +++ b/onnxruntime/core/graph/model.cc @@ -347,7 +347,7 @@ Status Model::Load(const ModelProto& model_proto, auto status = Status::OK(); ORT_TRY { - model.reset(new Model(model_proto, model_path, local_registries, logger)); + model = std::make_unique(model_proto, model_path, local_registries, logger); } ORT_CATCH(const std::exception& ex) { ORT_HANDLE_EXCEPTION([&]() { @@ -386,7 +386,7 @@ Status Model::Load(ModelProto&& model_proto, GSL_SUPPRESS(r .11) auto status = Status::OK(); ORT_TRY { - model.reset(new Model(std::move(model_proto), model_path, local_registries, logger, allow_released_opsets_only)); + model = std::make_unique(std::move(model_proto), model_path, local_registries, logger, allow_released_opsets_only); } ORT_CATCH(const std::exception& ex) { ORT_HANDLE_EXCEPTION([&]() { @@ -730,7 +730,7 @@ common::Status Model::LoadFromOrtFormat(const fbs::Model& fbs_model, #endif const logging::Logger& logger, std::unique_ptr& model) { - model.reset(new Model()); + model = std::make_unique(); // Load the model metadata if (const auto* fbs_metadata_props = fbs_model.metadata_props()) { diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h index 77cd279284..5425c3ee08 100644 --- a/onnxruntime/core/graph/model.h +++ b/onnxruntime/core/graph/model.h @@ -272,9 +272,9 @@ class Model { const logging::Logger& logger, std::unique_ptr& model); - private: Model(); + private: // Model data. #if !defined(ORT_MINIMAL_BUILD) ONNX_NAMESPACE::ModelProto model_proto_; diff --git a/onnxruntime/core/mlas/lib/convolve.cpp b/onnxruntime/core/mlas/lib/convolve.cpp index 78eceda4fc..4b5c682384 100644 --- a/onnxruntime/core/mlas/lib/convolve.cpp +++ b/onnxruntime/core/mlas/lib/convolve.cpp @@ -1018,6 +1018,7 @@ Return Value: } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(push) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif void diff --git a/onnxruntime/core/mlas/lib/dgemm.cpp b/onnxruntime/core/mlas/lib/dgemm.cpp index 3688699843..b98d6f8bb2 100644 --- a/onnxruntime/core/mlas/lib/dgemm.cpp +++ b/onnxruntime/core/mlas/lib/dgemm.cpp @@ -805,6 +805,7 @@ Return Value: #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(push) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif void diff --git a/onnxruntime/core/mlas/lib/qgemm_kernel_default.cpp b/onnxruntime/core/mlas/lib/qgemm_kernel_default.cpp index c968c2aae5..eb8ef4e274 100644 --- a/onnxruntime/core/mlas/lib/qgemm_kernel_default.cpp +++ b/onnxruntime/core/mlas/lib/qgemm_kernel_default.cpp @@ -34,7 +34,7 @@ constexpr MLAS_GEMM_QUANT_STRIDES MLAS_GEMM_QUANT_KERNEL_DEFAULT::Strides; constexpr MLAS_GEMM_QUANT_STRIDES MLAS_GEMM_QUANT_KERNEL_DEFAULT::PackedStrides; template<> -MLAS_FORCEINLINE constexpr +MLAS_FORCEINLINE constexpr int32_t MlasGemmQuantFixupZeroPointA( int32_t ZeroPointA, @@ -49,7 +49,7 @@ MlasGemmQuantFixupZeroPointA( } template<> -MLAS_FORCEINLINE +MLAS_FORCEINLINE constexpr int32_t MlasGemmQuantFixupZeroPointB( int32_t ZeroPointB, diff --git a/onnxruntime/core/mlas/lib/sgemm.cpp b/onnxruntime/core/mlas/lib/sgemm.cpp index 0360127ca0..62170a2573 100644 --- a/onnxruntime/core/mlas/lib/sgemm.cpp +++ b/onnxruntime/core/mlas/lib/sgemm.cpp @@ -1554,6 +1554,7 @@ Return Value: } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(push) +// Chance of arithmetic overflow could be reduced #pragma warning(disable : 26451) #endif void diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc index 828bfdbf87..77161ed6a8 100644 --- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc +++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc @@ -41,7 +41,7 @@ OptimizerExecutionFrame::Info::Info(const std::vector& nodes, size_t cpu_tensor_length; ORT_RETURN_IF_ERROR(utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &cpu_tensor_length)); OrtValue ort_value; - std::unique_ptr data(new char[cpu_tensor_length]); + std::unique_ptr data = std::make_unique(cpu_tensor_length); std::unique_ptr p_tensor; ORT_RETURN_IF_ERROR(utils::TensorProtoToMLValue(Env::Default(), model_path.IsEmpty() ? nullptr : model_path.ToPathString().c_str(), @@ -103,8 +103,9 @@ OptimizerExecutionFrame::Info::Info(const std::vector& nodes, std::unique_ptr OptimizerExecutionFrame::Info::CreateKernel(const Node* node) const { std::unique_ptr op_kernel; std::shared_ptr kernel_registry = execution_provider_.GetKernelRegistry(); + FuncManager func; auto status = kernel_registry->TryCreateKernel(*node, execution_provider_, initializers_, - ort_value_name_idx_map_, FuncManager(), data_transfer_mgr_, + ort_value_name_idx_map_, func, data_transfer_mgr_, op_kernel); // Kernel found in the CPU kernel registry diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index fb1c49e312..b5a90c36a9 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -28,10 +28,10 @@ void DropQDQNodesRules(SelectorsAndActions& qdq_selectors_and_actions) { MoveToSlot(dq, ArgType::kInput, 0, ArgType::kInput, 0), MoveToSlot(q, ArgType::kOutput, 0, ArgType::kOutput, 0)}; - std::unique_ptr action(new MergeIntoTarget(std::move(moves))); + std::unique_ptr action = std::make_unique(std::move(moves)); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector(new QDQ::DropDQDNodesSelector()); + std::unique_ptr selector = std::make_unique(); qdq_selectors_and_actions.RegisterSelectorAndAction(action_name, SelectorAndAction::OpVersionsMap{{"Gather", {}}, {"Reshape", {}}, @@ -49,10 +49,10 @@ void UnaryOpQDQRules(SelectorsAndActions& qdq_selectors_and_actions, bool is_int // 3 nodes. DQ, target, Q // Replace with internal QLinear version of operator. Delete all original nodes. const std::string action_name{"1DQ"}; - std::unique_ptr action(new QDQ::UnaryReplaceWithQLinear(kMSDomain)); + std::unique_ptr action = std::make_unique(kMSDomain); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector(new QDQ::UnarySelector(is_int8_allowed)); + std::unique_ptr selector = std::make_unique(is_int8_allowed); qdq_selectors_and_actions.RegisterSelectorAndAction(action_name, SelectorAndAction::OpVersionsMap{{"AveragePool", {}}}, std::move(selector), @@ -67,10 +67,10 @@ void BinaryOpQDQRules(SelectorsAndActions& qdq_selectors_and_actions) { // 4 nodes. 2 x DQ for inputs, target, Q // Replace with internal QLinear version of operator. Delete all original nodes. const std::string action_name{"2DQ"}; - std::unique_ptr action(new QDQ::BinaryReplaceWithQLinear(kMSDomain)); + std::unique_ptr action = std::make_unique(kMSDomain); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector(new QDQ::BinarySelector()); + std::unique_ptr selector = std::make_unique(); qdq_selectors_and_actions.RegisterSelectorAndAction(action_name, SelectorAndAction::OpVersionsMap{{"Add", {}}, {"Mul", {}}}, @@ -86,10 +86,10 @@ void VariadicOpQDQRules(SelectorsAndActions& qdq_selectors_and_actions) { // 0=variadic DQ nodes 2=target, 3=Q // Replace with QLinear version of operator. Delete all original nodes. const std::string action_name{"*DQ"}; - std::unique_ptr action(new QDQ::VariadicReplaceWithQLinear(kMSDomain)); + std::unique_ptr action = std::make_unique(kMSDomain); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector(new QDQ::VariadicSelector()); + std::unique_ptr selector = std::make_unique(); qdq_selectors_and_actions.RegisterSelectorAndAction(action_name, SelectorAndAction::OpVersionsMap{{"Concat", {}}}, @@ -107,10 +107,10 @@ void ConvQDQRules(SelectorsAndActions& qdq_selectors_and_actions, bool is_int8_a // Replace Conv with QLinearConv // Delete all original nodes const std::string action_name{"Conv"}; - std::unique_ptr action(new QDQ::ConvReplaceWithQLinear()); + std::unique_ptr action = std::make_unique(); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector(new QDQ::ConvSelector(is_int8_allowed)); + std::unique_ptr selector = std::make_unique(is_int8_allowed); qdq_selectors_and_actions.RegisterSelectorAndAction(action_name, SelectorAndAction::OpVersionsMap{{"Conv", {}}}, @@ -129,10 +129,10 @@ void MatMulQDQRules(SelectorsAndActions& qdq_selectors_and_actions, bool is_int8 // Delete all original nodes. const std::string action_name{"MatMul"}; - std::unique_ptr action(new QDQ::MatMulReplaceWithQLinear()); + std::unique_ptr action = std::make_unique(); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector(new QDQ::MatMulSelector(is_int8_allowed)); + std::unique_ptr selector = std::make_unique(is_int8_allowed); qdq_selectors_and_actions.RegisterSelectorAndAction(action_name, SelectorAndAction::OpVersionsMap{{"MatMul", {}}}, std::move(selector), diff --git a/onnxruntime/core/platform/windows/env.cc b/onnxruntime/core/platform/windows/env.cc index 01ad7b1521..6e370fffa2 100644 --- a/onnxruntime/core/platform/windows/env.cc +++ b/onnxruntime/core/platform/windows/env.cc @@ -54,6 +54,11 @@ class WindowsThread : public EnvThread { unsigned (*start_address)(int id, Eigen::ThreadPoolInterface* param); Eigen::ThreadPoolInterface* param; const ThreadOptions& thread_options; + Param(const ORTCHAR_T* name_prefix1, + int index1, + unsigned (*start_address1)(int id, Eigen::ThreadPoolInterface* param), + Eigen::ThreadPoolInterface* param1, + const ThreadOptions& thread_options1) : name_prefix(name_prefix1), index(index1), start_address(start_address1), param(param1), thread_options(thread_options1) {} }; public: @@ -63,15 +68,15 @@ class WindowsThread : public EnvThread { custom_create_thread_fn = thread_options.custom_create_thread_fn; custom_thread_creation_options = thread_options.custom_thread_creation_options; custom_join_thread_fn = thread_options.custom_join_thread_fn; - + std::unique_ptr local_param = std::make_unique(name_prefix, index, start_address, param, thread_options); if (custom_create_thread_fn) { - custom_thread_handle = custom_create_thread_fn(custom_thread_creation_options, (OrtThreadWorkerFn)CustomThreadMain, new Param{name_prefix, index, start_address, param, thread_options}); + custom_thread_handle = custom_create_thread_fn(custom_thread_creation_options, (OrtThreadWorkerFn)CustomThreadMain, local_param.release()); if (!custom_thread_handle) { - ORT_THROW("custom_create_thread_fn returned invalid handle."); + ORT_THROW("custom_create_thread_fn returned invalid handle."); } } else { hThread.reset(reinterpret_cast(_beginthreadex(nullptr, thread_options.stack_size, ThreadMain, - new Param{name_prefix, index, start_address, param, thread_options}, 0, + local_param.release(), 0, &threadID))); } } @@ -142,12 +147,18 @@ class WindowsThread : public EnvThread { class WindowsEnv : public Env { public: +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 26409) +#endif EnvThread* CreateThread(_In_opt_z_ const ORTCHAR_T* name_prefix, int index, unsigned (*start_address)(int id, Eigen::ThreadPoolInterface* param), Eigen::ThreadPoolInterface* param, const ThreadOptions& thread_options) { return new WindowsThread(name_prefix, index, start_address, param, thread_options); } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif void SleepForMicroseconds(int64_t micros) const override { Sleep(static_cast(micros) / 1000); } @@ -524,14 +535,32 @@ class WindowsEnv : public Env { } virtual Status LoadDynamicLibrary(const std::string& library_filename, bool /*global_symbols*/, void** handle) const override { + const std::wstring& wlibrary_filename = ToWideString(library_filename); #if WINAPI_FAMILY == WINAPI_FAMILY_PC_APP - *handle = ::LoadPackagedLibrary(ToWideString(library_filename).c_str(), 0); + *handle = ::LoadPackagedLibrary(wlibrary_filename.c_str(), 0); #else - *handle = ::LoadLibraryExA(library_filename.c_str(), nullptr, LOAD_WITH_ALTERED_SEARCH_PATH); + // TODO: in most cases, the path name is a relative path and the behavior of the following line of code is undefined. + *handle = ::LoadLibraryExW(wlibrary_filename.c_str(), nullptr, LOAD_WITH_ALTERED_SEARCH_PATH); #endif if (!*handle) { const auto error_code = GetLastError(); - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "LoadLibrary failed with error ", error_code, " \"", std::system_category().message(error_code), "\" when trying to load \"", library_filename, "\""); + LPVOID lpMsgBuf; + FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPWSTR)&lpMsgBuf, + 0, NULL); + std::wostringstream oss; + oss << L"LoadLibrary failed with error " << error_code << L" \"" << (LPWSTR)lpMsgBuf << L"\" when trying to load \"" << wlibrary_filename << L"\""; + std::wstring errmsg = oss.str(); + // TODO: errmsg should be converted to UTF-8 as it will be passed out to the C interface. + common::Status status(common::ONNXRUNTIME, common::FAIL, ToMBString(errmsg)); + LocalFree(lpMsgBuf); + return status; } return Status::OK(); } @@ -548,7 +577,23 @@ class WindowsEnv : public Env { *symbol = ::GetProcAddress(reinterpret_cast(handle), symbol_name.c_str()); if (!*symbol) { const auto error_code = GetLastError(); - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to find symbol in library, error code: ", error_code, " - ", std::system_category().message(error_code)); + LPVOID lpMsgBuf; + FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPWSTR)&lpMsgBuf, + 0, NULL); + std::wostringstream oss; + oss << L"Failed to find symbol " << ToWideString(symbol_name) << L" in library, error code: " << error_code << L" \"" << (LPWSTR)lpMsgBuf << L"\""; + std::wstring errmsg = oss.str(); + // TODO: errmsg should be converted to UTF-8 as it will be passed out to the C interface. + common::Status status(common::ONNXRUNTIME, common::FAIL, ToMBString(errmsg)); + LocalFree(lpMsgBuf); + return status; } return Status::OK(); } @@ -576,11 +621,11 @@ class WindowsEnv : public Env { // Create buffer to hold the result std::string buffer(kBufferSize, '\0'); - //The last argument is the size of the buffer pointed to by the lpBuffer parameter, including the null-terminating character, in characters. - //If the function succeeds, the return value is the number of characters stored in the buffer pointed to by lpBuffer, not including the terminating null character. - //Therefore, If the function succeeds, kBufferSize should be larger than char_count. + // The last argument is the size of the buffer pointed to by the lpBuffer parameter, including the null-terminating character, in characters. + // If the function succeeds, the return value is the number of characters stored in the buffer pointed to by lpBuffer, not including the terminating null character. + // Therefore, If the function succeeds, kBufferSize should be larger than char_count. auto char_count = GetEnvironmentVariableA(var_name.c_str(), buffer.data(), kBufferSize); - + if (kBufferSize > char_count) { buffer.resize(char_count); return buffer; diff --git a/onnxruntime/core/platform/windows/stacktrace.cc b/onnxruntime/core/platform/windows/stacktrace.cc index 53e403a042..4e159b8edd 100644 --- a/onnxruntime/core/platform/windows/stacktrace.cc +++ b/onnxruntime/core/platform/windows/stacktrace.cc @@ -104,7 +104,7 @@ std::vector CaptureStackTrace::Trace() const { stacktrace.reserve(num_frames); // hide CaptureStackTrace::Trace and GetStackTrace so the output starts with the 'real' location - const int frames_to_skip = 2; + constexpr int frames_to_skip = 2; // we generally want to skip the first two frames, but if something weird is going on (e.g. code coverage is // running) and we only have 1 or 2 frames, output them so there's at least something that may be meaningful diff --git a/onnxruntime/core/providers/common.h b/onnxruntime/core/providers/common.h index 0b9179c0b4..22949ed297 100644 --- a/onnxruntime/core/providers/common.h +++ b/onnxruntime/core/providers/common.h @@ -118,9 +118,9 @@ inline Status ComputePad(const int64_t in_dim, return Status::OK(); } -inline int64_t ComputeOutputShape(const int64_t in_dim, - const int64_t stride, const int64_t kernel, const int64_t dilation, - const int64_t pad_head, const int64_t pad_tail) { +constexpr inline int64_t ComputeOutputShape(const int64_t in_dim, + const int64_t stride, const int64_t kernel, const int64_t dilation, + const int64_t pad_head, const int64_t pad_tail) { const int64_t dkernel = dilation * (kernel - 1) + 1; return static_cast(static_cast(in_dim + pad_head + pad_tail - dkernel) / stride + 1); } diff --git a/onnxruntime/core/providers/cpu/activation/activations.h b/onnxruntime/core/providers/cpu/activation/activations.h index 75a73a818d..9b5fd84929 100644 --- a/onnxruntime/core/providers/cpu/activation/activations.h +++ b/onnxruntime/core/providers/cpu/activation/activations.h @@ -84,6 +84,7 @@ struct Softplus : public ElementWiseRangedTransform { Status Init(const onnxruntime::NodeAttributes&) { return Status::OK(); } + GSL_SUPPRESS(r .11) ElementWiseRangedTransform* Copy() const { using T1 = typename std::remove_pointer::type; using T2 = typename std::remove_const::type; @@ -106,6 +107,7 @@ struct Relu : public ElementWiseRangedTransform { Status Init(const onnxruntime::NodeAttributes&) { return Status::OK(); } + GSL_SUPPRESS(r .11) ElementWiseRangedTransform* Copy() const { // replace it with a macro. why this? using T1 = typename std::remove_pointer::type; using T2 = typename std::remove_const::type; //redundant? @@ -128,6 +130,7 @@ struct Sigmoid : public ElementWiseRangedTransform { Status Init(const onnxruntime::NodeAttributes&) { return Status::OK(); } + GSL_SUPPRESS(r .11) ElementWiseRangedTransform* Copy() const { using T1 = typename std::remove_pointer::type; using T2 = typename std::remove_const::type; @@ -153,6 +156,7 @@ struct Softsign : public ElementWiseRangedTransform { Status Init(const onnxruntime::NodeAttributes&) { return Status::OK(); } + GSL_SUPPRESS(r .11) ElementWiseRangedTransform* Copy() const { using T1 = typename std::remove_pointer::type; using T2 = typename std::remove_const::type; @@ -175,6 +179,7 @@ struct Tanh : public ElementWiseRangedTransform { Status Init(const onnxruntime::NodeAttributes&) { return Status::OK(); } + GSL_SUPPRESS(r .11) ElementWiseRangedTransform* Copy() const { using T1 = typename std::remove_pointer::type; using T2 = typename std::remove_const::type; @@ -192,6 +197,7 @@ struct Tanh : public ElementWiseRangedTransform { ym = xm.tanh(); } }; + template <> void Tanh::operator()(std::ptrdiff_t first, std::ptrdiff_t last) const; @@ -226,7 +232,6 @@ struct Selu : public ElementWiseRangedTransform { ym = (T)gamma * (xm.cwiseMax(0.0f) + ((T)alpha * (xm.array().exp() - 1.0f)).cwiseMin(0.0f)); } }; - } // namespace functors DEFINE_ELE_KERNEL(Celu); diff --git a/onnxruntime/core/providers/cpu/controlflow/loop.cc b/onnxruntime/core/providers/cpu/controlflow/loop.cc index b844d79206..fac00d83b0 100644 --- a/onnxruntime/core/providers/cpu/controlflow/loop.cc +++ b/onnxruntime/core/providers/cpu/controlflow/loop.cc @@ -142,7 +142,7 @@ Loop::Info::Info(const onnxruntime::Node& node, const GraphViewer& subgraph_in) const auto& node_input_types = node.InputDefs(); loop_carried_vars_types.reserve(num_subgraph_inputs); for (int i = 0; i < num_loop_carried_vars; ++i) { - loop_carried_vars_types.push_back(node_input_types[i + 2]->TypeAsProto()); + loop_carried_vars_types.push_back(node_input_types[static_cast(i) + 2]->TypeAsProto()); } auto& subgraph_inputs = subgraph.GetInputs(); @@ -280,7 +280,7 @@ common::Status Loop::SetupSubgraphExecutionInfo(const SessionState& session_stat // the Loop inputs are matched to subgraph feeds based on order. // we first need the names of the Loop inputs to determine what device they are available on std::vector feed_names; - feed_names.reserve(info_->num_subgraph_inputs + info_->num_implicit_inputs); + feed_names.reserve(static_cast(info_->num_subgraph_inputs) + info_->num_implicit_inputs); // iter_num and cond subgraph inputs - created by the LoopImpl::Initialize so the name doesn't matter // as we skip them when we call FindDevicesForValues, and default them to always being on CPU. @@ -291,7 +291,7 @@ common::Status Loop::SetupSubgraphExecutionInfo(const SessionState& session_stat const auto& loop_inputs = node.InputDefs(); for (int i = 0; i < info_->num_loop_carried_vars; ++i) { // + 2 to skip 'M' and 'cond' Loop inputs - feed_names.push_back(loop_inputs[i + 2]->Name()); + feed_names.push_back(loop_inputs[static_cast(i) + 2]->Name()); } for (auto& entry : node.ImplicitInputDefs()) { @@ -306,7 +306,7 @@ common::Status Loop::SetupSubgraphExecutionInfo(const SessionState& session_stat // now update the feed names to use the subgraph input names for the loop carried vars so that we can determine // what device the subgraph needs them on - for (int i = 0; i < info_->num_loop_carried_vars; ++i) { + for (ptrdiff_t i = 0; i < info_->num_loop_carried_vars; ++i) { // +2 for both to skip the iter_num and cond values feed_names[i + 2] = info_->subgraph_input_names[i + 2]; } @@ -329,7 +329,7 @@ common::Status Loop::SetupSubgraphExecutionInfo(const SessionState& session_stat // Loop state variables need to be where we can feed them in to the next iteration, so set the fetch location // to match the feed location. - for (int i = 0; i < info_->num_loop_carried_vars; ++i) { + for (ptrdiff_t i = 0; i < info_->num_loop_carried_vars; ++i) { // +2 for both to skip the iter_num and cond input values const auto& alloc_info = utils::FindMemoryInfoForValue(session_state, loop_inputs[i + 2]->Name()); fetch_locations.push_back(&alloc_info); @@ -421,13 +421,13 @@ Status LoopImpl::Initialize() { iter_num_mlvalue_ = MakeScalarMLValue(cpu_allocator, 0, iter_num_rank != 0); condition_mlvalue_ = MakeScalarMLValue(cpu_allocator, condition_, condition_rank != 0); - loop_output_tensors_.resize(info_.num_outputs - info_.num_loop_carried_vars); + loop_output_tensors_.resize(static_cast(info_.num_outputs) - info_.num_loop_carried_vars); return status; } void LoopImpl::CreateInitialFeeds(std::vector& feeds) { - feeds.reserve(info_.num_subgraph_inputs + info_.num_implicit_inputs); + feeds.reserve(static_cast(info_.num_subgraph_inputs) + info_.num_implicit_inputs); // This ordering is the same as used in SetupSubgraphExecutionInfo feeds.push_back(iter_num_mlvalue_); @@ -450,12 +450,12 @@ void LoopImpl::SaveOutputsAndUpdateFeeds(const std::vector& last_outpu // next_input: iter_num, cond, loop_vars. iter_num is re-used // simple copy for cond and loop carried vars. start at 1 to skip iter_num in input - for (int i = 1; i < info_.num_subgraph_inputs; ++i) { + for (ptrdiff_t i = 1; i < info_.num_subgraph_inputs; ++i) { next_inputs[i] = last_outputs[i - 1]; } // save loop outputs as we have to concatenate at the end - for (int j = info_.num_loop_carried_vars; j < info_.num_outputs; ++j) { + for (ptrdiff_t j = info_.num_loop_carried_vars; j < info_.num_outputs; ++j) { ORT_ENFORCE(last_outputs[j + 1].IsTensor(), "All scan outputs MUST be tensors"); loop_output_tensors_[j - info_.num_loop_carried_vars].push_back(last_outputs[j + 1]); // skip 'cond' in output } @@ -570,13 +570,13 @@ Status LoopImpl::Execute(const FeedsFetchesManager& ffm) { if (iter_num_value != 0) { for (int i = 0; i < info_.num_loop_carried_vars; ++i) { // need to allocate Loop output and copy OrtValue from fetches - ORT_RETURN_IF_ERROR(copy_mlvalue_to_output(fetches[i + 1], i, iter_num_value, *info_.loop_carried_vars_types[i])); // skip cond + ORT_RETURN_IF_ERROR(copy_mlvalue_to_output(fetches[static_cast(i) + 1], i, iter_num_value, *info_.loop_carried_vars_types[static_cast(i)])); // skip cond } for (int i = info_.num_loop_carried_vars; i < info_.num_outputs; ++i) { // add last output - auto& per_iteration_outputs = loop_output_tensors_[i - info_.num_loop_carried_vars]; - per_iteration_outputs.push_back(fetches[i + 1]); // skip cond + auto& per_iteration_outputs = loop_output_tensors_[static_cast(i) - info_.num_loop_carried_vars]; + per_iteration_outputs.push_back(fetches[static_cast(i) + 1]); // skip cond ORT_RETURN_IF_ERROR(ConcatenateLoopOutput(per_iteration_outputs, i)); } @@ -584,7 +584,7 @@ Status LoopImpl::Execute(const FeedsFetchesManager& ffm) { // no iterations. // copy input loop carried vars to output. for (int i = 0; i < info_.num_loop_carried_vars; ++i) { - ORT_RETURN_IF_ERROR(copy_mlvalue_to_output(feeds[i + 2], i, iter_num_value, *info_.loop_carried_vars_types[i])); // skip iter# and cond + ORT_RETURN_IF_ERROR(copy_mlvalue_to_output(feeds[static_cast(i) + 2], i, iter_num_value, *info_.loop_carried_vars_types[i])); // skip iter# and cond } // create empty outputs for loop outputs using the subgraph output shapes for the rank @@ -592,11 +592,11 @@ Status LoopImpl::Execute(const FeedsFetchesManager& ffm) { for (int i = info_.num_loop_carried_vars; i < info_.num_outputs; ++i) { // get shape from subgraph output if possible to attempt to have the correct rank - auto* graph_output = graph_outputs.at(i + 1); // + 1 as first subgraph output is condition value + auto* graph_output = graph_outputs.at(static_cast(i) + 1); // + 1 as first subgraph output is condition value auto* graph_output_shape = graph_output->Shape(); std::vector output_dims; - output_dims.reserve((graph_output_shape ? graph_output_shape->dim_size() : 0) + 1); + output_dims.reserve(static_cast(graph_output_shape ? graph_output_shape->dim_size() : 0) + 1); output_dims.push_back(0); // num iterations is first dim if (graph_output_shape) { diff --git a/onnxruntime/core/providers/cpu/controlflow/scan_8.cc b/onnxruntime/core/providers/cpu/controlflow/scan_8.cc index b5765bf5ca..53e7ae0e69 100644 --- a/onnxruntime/core/providers/cpu/controlflow/scan_8.cc +++ b/onnxruntime/core/providers/cpu/controlflow/scan_8.cc @@ -161,7 +161,7 @@ Status Scan<8>::SetupSubgraphExecutionInfo(const SessionState& session_state, const auto& node = Node(); info_ = std::make_unique::Info>(node, subgraph_session_state.GetGraphViewer(), - static_cast(num_scan_inputs_)); + static_cast(num_scan_inputs_)); auto status = scan::detail::CreateFeedsFetchesManager(node, *info_, session_state, subgraph_session_state, /* is_v8 */ true, feeds_fetches_manager_); @@ -396,13 +396,13 @@ Status Scan8Impl::Execute(const FeedsFetchesManager& ffm) { // Setup input OrtValue streams std::vector::Iterator> scan_input_stream_iterators; - scan_input_stream_iterators.reserve(info_.num_variadic_inputs - info_.num_loop_state_variables); + scan_input_stream_iterators.reserve(static_cast(info_.num_variadic_inputs) - info_.num_loop_state_variables); for (int i = info_.num_loop_state_variables, end = info_.num_variadic_inputs; i < end; ++i) { const auto& ort_value = GetSubgraphInputMLValue(context_, i); // forward - if (directions_[i - info_.num_loop_state_variables] == static_cast(ScanDirection::kForward)) { + if (directions_[static_cast(i) - info_.num_loop_state_variables] == static_cast(ScanDirection::kForward)) { // the iterator is self contained, so we don't need to keep the OrtValueTensorSlicer instance around scan_input_stream_iterators.push_back(device_helpers_.create_const_slicer_func(ort_value, 1, b).begin()); } else { // reverse diff --git a/onnxruntime/core/providers/cpu/controlflow/scan_9.cc b/onnxruntime/core/providers/cpu/controlflow/scan_9.cc index 001c2d9ac8..c387a3da4c 100644 --- a/onnxruntime/core/providers/cpu/controlflow/scan_9.cc +++ b/onnxruntime/core/providers/cpu/controlflow/scan_9.cc @@ -205,7 +205,7 @@ Status Scan<9>::SetupSubgraphExecutionInfo(const SessionState& session_state, const auto& node = Node(); info_ = std::make_unique::Info>(node, subgraph_session_state.GetGraphViewer(), - static_cast(num_scan_inputs_)); + static_cast(num_scan_inputs_)); auto status = scan::detail::CreateFeedsFetchesManager(node, *info_, session_state, subgraph_session_state, /* is_v8 */ false, feeds_fetches_manager_); @@ -281,7 +281,7 @@ Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, " Expected ", min_dims_required, " dimensions or more but input had shape of ", input_shape); - auto seq_len_dim = input_axes_[i - info_.num_loop_state_variables]; + auto seq_len_dim = input_axes_[static_cast(i) - info_.num_loop_state_variables]; auto this_seq_len = input_shape[seq_len_dim]; if (sequence_len_ < 0) { @@ -432,7 +432,7 @@ Status ScanImpl::Execute(const FeedsFetchesManager& ffm) { // Setup input OrtValue streams std::vector::Iterator> scan_input_stream_iterators; - scan_input_stream_iterators.reserve(info_.num_inputs - info_.num_loop_state_variables); + scan_input_stream_iterators.reserve(static_cast(info_.num_inputs) - info_.num_loop_state_variables); for (int i = 0, end = info_.num_scan_inputs; i < end; ++i) { const auto& ort_value = inputs_[i]; diff --git a/onnxruntime/core/providers/cpu/controlflow/scan_utils.cc b/onnxruntime/core/providers/cpu/controlflow/scan_utils.cc index 49a5cb19ec..e5b25b56a4 100644 --- a/onnxruntime/core/providers/cpu/controlflow/scan_utils.cc +++ b/onnxruntime/core/providers/cpu/controlflow/scan_utils.cc @@ -142,7 +142,7 @@ Status CreateFeedsFetchesManager(const Node& node, // we need the names of the Scan inputs to determine what device they are available on, // so first create a list using those value std::vector feed_names; - feed_names.reserve(info.num_variadic_inputs + info.num_implicit_inputs); + feed_names.reserve(static_cast(info.num_variadic_inputs) + info.num_implicit_inputs); const auto& scan_inputs = node.InputDefs(); int start = is_v8 ? 1 : 0; // skip sequence_lens for v8 @@ -217,7 +217,7 @@ Status IterateSequence(OpKernelContextInternal& context, const SessionState& ses feeds[input] = loop_state_variables[input].Input(); } else { // add sliced input - auto& iterator = scan_input_stream_iterators[input - num_loop_state_variables]; + auto& iterator = scan_input_stream_iterators[static_cast(input) - num_loop_state_variables]; feeds[input] = *iterator; ++iterator; diff --git a/onnxruntime/core/providers/cpu/controlflow/scan_utils.h b/onnxruntime/core/providers/cpu/controlflow/scan_utils.h index 6e36fe2f28..4eee03b6f2 100644 --- a/onnxruntime/core/providers/cpu/controlflow/scan_utils.h +++ b/onnxruntime/core/providers/cpu/controlflow/scan_utils.h @@ -52,7 +52,7 @@ class LoopStateVariable { const OrtValue original_value_; OrtValue final_value_; - /* we use original_value and final_value once, + /* we use original_value and final_value once, and alternate between a_ and b_ as input/output for each iteration to avoid copies Iteration Input Output @@ -88,8 +88,8 @@ class OutputIterator { ScanDirection direction = ScanDirection::kForward, bool temporary = false, MLDataType data_type = nullptr) { - iterator.reset(new OutputIterator(context, output_index, is_loop_state_var, is_v8, final_shape, - create_slicer_func, zero_data_func, direction, temporary, data_type)); + iterator = std::make_unique(context, output_index, is_loop_state_var, is_v8, final_shape, + create_slicer_func, zero_data_func, direction, temporary, data_type); return iterator->Initialize(); } @@ -115,8 +115,7 @@ class OutputIterator { ORT_ENFORCE(final_output_mlvalue_, "Attempt to retrieve final output before it was set."); return *final_output_mlvalue_; } - - private: + //std::unique_ptr needs to access this function. OutputIterator(OpKernelContextInternal& context, int output_index, bool is_loop_state_var, @@ -128,6 +127,7 @@ class OutputIterator { bool temporary, MLDataType data_type); + private: Status Initialize(); Status AllocateFinalBuffer(); @@ -201,7 +201,7 @@ void CalculateTransposedShapeForInput(const TensorShape& original_shape, int64_t /** Calculate the transpose permutations and shape by shifting the chosen axis FROM the first dimension. -e.g. if shape is {4, 2, 3} and axis 2 is chosen, dimension 0 will move to dimension 2, +e.g. if shape is {4, 2, 3} and axis 2 is chosen, dimension 0 will move to dimension 2, the permutations will be {1, 2, 0} and output shape will be {2, 3, 4} */ void CalculateTransposedShapeForOutput(const TensorShape& original_shape, int64_t axis, diff --git a/onnxruntime/core/providers/cpu/cpu_provider_factory.cc b/onnxruntime/core/providers/cpu/cpu_provider_factory.cc index a2dff68a39..00222f5642 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_factory.cc +++ b/onnxruntime/core/providers/cpu/cpu_provider_factory.cc @@ -37,7 +37,9 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessio options->provider_factories.push_back(onnxruntime::CreateExecutionProviderFactory_CPU(use_arena)); return nullptr; } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif ORT_API_STATUS_IMPL(OrtApis::CreateCpuMemoryInfo, enum OrtAllocatorType type, enum OrtMemType mem_type, _Outptr_ OrtMemoryInfo** out) { *out = new OrtMemoryInfo(onnxruntime::CPU, type, OrtDevice(), 0, mem_type); diff --git a/onnxruntime/core/providers/cpu/element_wise_ranged_transform.h b/onnxruntime/core/providers/cpu/element_wise_ranged_transform.h index b7fe209203..f6cfc64216 100644 --- a/onnxruntime/core/providers/cpu/element_wise_ranged_transform.h +++ b/onnxruntime/core/providers/cpu/element_wise_ranged_transform.h @@ -56,6 +56,7 @@ ElementWiseRangedTransform::~ElementWiseRangedTransform() { Status Init(const onnxruntime::NodeAttributes& attributes) { \ return (GetFloatParam(#X, attributes, X)); \ } \ + GSL_SUPPRESS(r .11) \ ElementWiseRangedTransform* Copy() const final { \ using T1 = typename std::remove_pointer::type; \ using T2 = typename std::remove_const::type; \ @@ -70,6 +71,7 @@ ElementWiseRangedTransform::~ElementWiseRangedTransform() { ORT_RETURN_IF_ERROR(GetFloatParam(#Y, attributes, Y)); \ return Status::OK(); \ } \ + GSL_SUPPRESS(r .11) \ ElementWiseRangedTransform* Copy() const final { \ using T1 = typename std::remove_pointer::type; \ using T2 = typename std::remove_const::type; \ diff --git a/onnxruntime/core/providers/cpu/generator/range.cc b/onnxruntime/core/providers/cpu/generator/range.cc index 05ef9f508a..97903fd074 100644 --- a/onnxruntime/core/providers/cpu/generator/range.cc +++ b/onnxruntime/core/providers/cpu/generator/range.cc @@ -6,7 +6,10 @@ #include #include "core/providers/op_kernel_type_control.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace op_kernel_type_control { diff --git a/onnxruntime/core/providers/cpu/math/clip.h b/onnxruntime/core/providers/cpu/math/clip.h index cdbe86ee66..938f41f1f2 100644 --- a/onnxruntime/core/providers/cpu/math/clip.h +++ b/onnxruntime/core/providers/cpu/math/clip.h @@ -16,8 +16,8 @@ template class Clip_6Base { public: explicit Clip_6Base(const OpKernelInfo& info) { - auto min_val = std::numeric_limits::lowest(); - auto max_val = std::numeric_limits::max(); + constexpr auto min_val = std::numeric_limits::lowest(); + constexpr auto max_val = std::numeric_limits::max(); info.GetAttrOrDefault("min", &min_, min_val); info.GetAttrOrDefault("max", &max_, max_val); ORT_ENFORCE(min_ <= max_); diff --git a/onnxruntime/core/providers/cpu/math/det.cc b/onnxruntime/core/providers/cpu/math/det.cc index c7b2feffe4..996e8505f0 100644 --- a/onnxruntime/core/providers/cpu/math/det.cc +++ b/onnxruntime/core/providers/cpu/math/det.cc @@ -3,6 +3,11 @@ #include "core/providers/cpu/math/det.h" #include "core/util/math_cpuonly.h" +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif using namespace onnxruntime::common; diff --git a/onnxruntime/core/providers/cpu/math/einsum_utils/einsum_compute_preprocessor.cc b/onnxruntime/core/providers/cpu/math/einsum_utils/einsum_compute_preprocessor.cc index 88ef7738b2..ab6944b011 100644 --- a/onnxruntime/core/providers/cpu/math/einsum_utils/einsum_compute_preprocessor.cc +++ b/onnxruntime/core/providers/cpu/math/einsum_utils/einsum_compute_preprocessor.cc @@ -114,7 +114,7 @@ Status EinsumComputePreprocessor::ProcessSubscripts() { // Example for the following line of code // Subscript "...ij" for an input of rank 6 // num_of_ellipsis_dims = 6 - 5 + 3 = 4 - int64_t current_num_of_ellipsis_dims = rank - subscript.length() + 3; + int64_t current_num_of_ellipsis_dims = static_cast(rank) - subscript.length() + 3; if (current_num_of_ellipsis_dims < 0) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Einsum subscripts string contains too many subscript labels when compared to the rank of the input"); diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc index 3403023608..5224c7fef5 100644 --- a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc +++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc @@ -1794,8 +1794,8 @@ void UntypedBroadcastTwo(OpKernelContext& context, const ProcessBroadcastSpanFun concurrency::ThreadPool::TryParallelFor( tp, output_size / span_size, - TensorOpCost{static_cast(input_broadcaster.Input0ElementSize() * span_size), - static_cast(output_tensor.DataType()->Size() * span_size), + TensorOpCost{static_cast(input_broadcaster.Input0ElementSize()) * span_size, + static_cast(output_tensor.DataType()->Size()) * span_size, unit_cost * span_size}, [span_size, &const_input_broadcaster, &output_tensor, &funcs, user_data](std::ptrdiff_t first_span, std::ptrdiff_t last_span) { diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.h b/onnxruntime/core/providers/cpu/math/element_wise_ops.h index b95738c878..a94d6b25c3 100644 --- a/onnxruntime/core/providers/cpu/math/element_wise_ops.h +++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.h @@ -10,7 +10,12 @@ namespace onnxruntime { namespace functors { - +// TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +// Do not use raw new/delete. +#pragma warning(disable : 26409) +#endif template struct Log final : public ElementWiseRangedTransform { Status Init(const onnxruntime::NodeAttributes) { @@ -982,4 +987,7 @@ struct TensorAllocator { private: AllocatorPtr allocator_; }; +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/ml/ml_common.h b/onnxruntime/core/providers/cpu/ml/ml_common.h index e2fb9139e1..e40b2a563f 100644 --- a/onnxruntime/core/providers/cpu/ml/ml_common.h +++ b/onnxruntime/core/providers/cpu/ml/ml_common.h @@ -241,7 +241,7 @@ static inline void multiclass_probability(int64_t classcount, } } -static const float ml_sqrt2 = 1.41421356f; +static constexpr float ml_sqrt2 = 1.41421356f; static inline float ComputeLogistic(float val) { float v = 1 / (1 + std::exp(-std::abs(val))); diff --git a/onnxruntime/core/providers/cpu/ml/svmclassifier.cc b/onnxruntime/core/providers/cpu/ml/svmclassifier.cc index e9bd36e846..070d0aec73 100644 --- a/onnxruntime/core/providers/cpu/ml/svmclassifier.cc +++ b/onnxruntime/core/providers/cpu/ml/svmclassifier.cc @@ -3,6 +3,11 @@ #include "core/providers/cpu/ml/svmclassifier.h" #include "core/platform/threadpool.h" +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace ml { diff --git a/onnxruntime/core/providers/cpu/nn/batch_norm.h b/onnxruntime/core/providers/cpu/nn/batch_norm.h index fc109dff76..bb8400094b 100644 --- a/onnxruntime/core/providers/cpu/nn/batch_norm.h +++ b/onnxruntime/core/providers/cpu/nn/batch_norm.h @@ -58,7 +58,10 @@ class BatchNorm : public OpKernel { #endif } } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +#pragma warning(disable : 26451) +#endif Status Compute(OpKernelContext* p_op_kernel_context) const override { const auto* X = p_op_kernel_context->Input(0); const auto* scale = p_op_kernel_context->Input(1); @@ -206,4 +209,7 @@ class BatchNorm : public OpKernel { const bool is_spatial_; int64_t is_train_; }; +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h b/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h index 85e555ef23..49afe6503c 100644 --- a/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h +++ b/onnxruntime/core/providers/cpu/nn/batch_norm_helper.h @@ -8,7 +8,12 @@ #include "core/framework/tensor.h" #endif #include - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { class BatchNormHelper { public: @@ -124,3 +129,6 @@ class BatchNormHelper { } }; } // namespace onnxruntime +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/onnxruntime/core/providers/cpu/nn/conv_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_attributes.h index 7fff54137d..393a5ea992 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/conv_attributes.h @@ -243,7 +243,7 @@ struct ConvAttributes { } post_slicing_needed = true; - slice_axes.push_back(dim + 2); + slice_axes.push_back(static_cast(dim) + 2); slice_starts.push_back(excess_output_head); slice_ends.push_back(excess_output_head + output_dim_size); // we may modify this below output_shape_with_revised_pads.push_back(excess_output_head + output_dim_size); // we may modify this below @@ -273,7 +273,7 @@ struct ConvAttributes { // Head has not been over-padded. Only tail pads need to be modified. post_slicing_needed = true; - slice_axes.push_back(dim + 2); + slice_axes.push_back(static_cast(dim) + 2); slice_starts.push_back(0); slice_ends.push_back(output_dim_size - revised_dim_size); } diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc index d7e39c16b6..1e2f4f60d3 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc @@ -76,7 +76,7 @@ Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, Alloca transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(alloc)); for (int64_t group_id = 0; group_id < conv_transpose_attrs_.group; ++group_id) { - MlasTranspose(tensor.Data() + (N * K * group_id), + MlasTranspose(tensor.Data() + (group_id * N * K), ((float*)packed_filter_data) + (group_id * packed_elements_per_group), K, N); } diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h index 9ded92912f..621a522344 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h @@ -50,7 +50,7 @@ struct ConvTransposeAttributes : public ConvAttributes { const TensorShape& F_Shape = (filter_shape != nullptr) ? *filter_shape : F->Shape(); const Tensor* Pads = dynamic_padding ? context->Input(2) : nullptr; const Tensor* B = has_bias ? (dynamic_padding ? context->Input(3) : context->Input(2)) : nullptr; - const TensorShape& input_shape = X->Shape().Slice(2); + TensorShape input_shape = X->Shape().Slice(2); const int64_t num_input_channels = X->Shape()[1]; const int64_t N = X->Shape()[0]; diff --git a/onnxruntime/core/providers/cpu/nn/lrn.cc b/onnxruntime/core/providers/cpu/nn/lrn.cc index a3d133358f..5111a1479e 100644 --- a/onnxruntime/core/providers/cpu/nn/lrn.cc +++ b/onnxruntime/core/providers/cpu/nn/lrn.cc @@ -21,7 +21,11 @@ #include "core/common/safeint.h" #include "core/util/math.h" #include "core/util/math_cpuonly.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace functors { @@ -74,7 +78,7 @@ Status LRN::Compute(OpKernelContext* context) const { auto* scale_data = static_cast(scale_buffer.get()); math::Set(Xsize, bias_, scale_data, &CPUMathUtil::Instance()); - const size_t padded_square_size = (C + size_ - 1) * H * W; + const size_t padded_square_size = (static_cast(C) + size_ - 1) * H * W; auto psdata = alloc->Alloc(SafeInt(sizeof(float)) * padded_square_size); BufferUniquePtr padded_square_buffer(psdata, BufferDeleter(alloc)); auto* padded_square_data = static_cast(padded_square_buffer.get()); diff --git a/onnxruntime/core/providers/cpu/nn/pool_attributes.h b/onnxruntime/core/providers/cpu/nn/pool_attributes.h index 11c11b6b6c..87f07e55f7 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/pool_attributes.h @@ -168,7 +168,11 @@ struct PoolAttributes { *out_size = ComputeOutputSize(in_size, stride, kernel, *pad_head + *pad_tail, dilation); } } - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif int64_t ComputeOutputSize(int64_t in_size, int64_t stride, int64_t kernel, @@ -180,6 +184,9 @@ struct PoolAttributes { return static_cast( std::ceil(static_cast(in_size + pad_needed - dilation * (kernel - 1) - 1) / stride + 1)); } +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/pool_base.h b/onnxruntime/core/providers/cpu/nn/pool_base.h index d03754eec9..00dd1b1520 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_base.h +++ b/onnxruntime/core/providers/cpu/nn/pool_base.h @@ -36,7 +36,7 @@ class PoolProcessContext { class AveragePool { public: - static float Initialize() { + constexpr static float Initialize() { return 0.0; } @@ -59,7 +59,7 @@ class MaxPool; template <> class MaxPool<1 /*START_VERSION*/> { public: - static float Initialize() { + constexpr static float Initialize() { return std::numeric_limits::lowest(); } @@ -84,7 +84,7 @@ class MaxPool<8 /*START_VERSION*/> { class LpPool { public: - static float Initialize() { + constexpr static float Initialize() { return 0.0f; } diff --git a/onnxruntime/core/providers/cpu/nn/shrink.cc b/onnxruntime/core/providers/cpu/nn/shrink.cc index 6272eb409f..0c336da99e 100644 --- a/onnxruntime/core/providers/cpu/nn/shrink.cc +++ b/onnxruntime/core/providers/cpu/nn/shrink.cc @@ -31,7 +31,10 @@ ONNX_CPU_OPERATOR_KERNEL( BuildKernelDefConstraintsFromTypeList(), BuildKernelDefConstraintsFromTypeList()), Shrink); - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26451) +#endif namespace shrink_internal { template inline T ShrinkCore(const T& val, float bias, float lambd) { diff --git a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc index d6677a482c..464d027202 100644 --- a/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc +++ b/onnxruntime/core/providers/cpu/nn/tfidfvectorizer.cc @@ -125,7 +125,7 @@ struct TfIdfVectorizer::Impl { // Contains output indexes // represents ngram_indexes output gsl::span ngram_indexes_; - gsl::span weights_; + gsl::span weights_; // This map contains references to pool_string_ entries // of pool_strings attribute @@ -145,13 +145,13 @@ struct TfIdfVectorizer::Impl { assert(ngram_id != 0); --ngram_id; assert(ngram_id < ngram_indexes_.size()); - auto output_idx = row_num * output_size_ + ngram_indexes_[ngram_id]; + auto output_idx = static_cast(row_num) * output_size_ + ngram_indexes_[ngram_id]; assert(static_cast(output_idx) < frequencies.size()); ++frequencies[output_idx]; } }; -TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), impl_(new Impl) { +TfIdfVectorizer::TfIdfVectorizer(const OpKernelInfo& info) : OpKernel(info), impl_(std::make_unique()) { std::string mode; Status status = info.GetAttr("mode", &mode); ORT_ENFORCE(status.IsOK(), "mode is required"); diff --git a/onnxruntime/core/providers/cpu/object_detection/non_max_suppression.cc b/onnxruntime/core/providers/cpu/object_detection/non_max_suppression.cc index bc6b7e3543..ddc4432eba 100644 --- a/onnxruntime/core/providers/cpu/object_detection/non_max_suppression.cc +++ b/onnxruntime/core/providers/cpu/object_detection/non_max_suppression.cc @@ -194,7 +194,7 @@ Status NonMaxSuppression::Compute(OpKernelContext* ctx) const { } //for class_index } //for batch_index - const auto last_dim = 3; + constexpr auto last_dim = 3; const auto num_selected = selected_indices.size(); Tensor* output = ctx->Output(0, {static_cast(num_selected), last_dim}); ORT_ENFORCE(output != nullptr); diff --git a/onnxruntime/core/providers/cpu/object_detection/roialign.cc b/onnxruntime/core/providers/cpu/object_detection/roialign.cc index ed9f56d050..f316da7cb6 100644 --- a/onnxruntime/core/providers/cpu/object_detection/roialign.cc +++ b/onnxruntime/core/providers/cpu/object_detection/roialign.cc @@ -49,7 +49,11 @@ struct PreCalc { T w3; T w4; }; - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif template static void PreCalcForBilinearInterpolate(const int64_t height, const int64_t width, const int64_t pooled_height, const int64_t pooled_width, const int64_t iy_upper, const int64_t ix_upper, @@ -235,8 +239,8 @@ void RoiAlignForward(const TensorShape& output_shape, const T* bottom_data, floa } // namespace Status CheckROIAlignValidInput(const Tensor* X_ptr, const Tensor* rois_ptr, const Tensor* batch_indices_ptr) { - const int64_t EXPECTED_NUM_ROI_DIMS = 2; - const int64_t EXPECTED_SECOND_ROI_DIM = 4; + constexpr int64_t EXPECTED_NUM_ROI_DIMS = 2; + constexpr int64_t EXPECTED_SECOND_ROI_DIM = 4; if (!X_ptr) { return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Null input X ptr"); } diff --git a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc index 27b23781b3..2b81637f7f 100644 --- a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc +++ b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc @@ -619,7 +619,7 @@ Status QLinearConv::Compute(OpKernelContext* context) const { auto conv_worker = [&](ptrdiff_t batch) { auto work = concurrency::ThreadPool::PartitionWork(batch, thread_count, static_cast(output_image_size)); int64_t output_start = static_cast(work.start); - int64_t output_count = static_cast(work.end - work.start); + int64_t output_count = static_cast(work.end) - work.start; ActType const** worker_indirection_buffer = nullptr; if (indirection_buffer) { diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc index c8af3ffff3..5a8c659c96 100644 --- a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc +++ b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc @@ -3,7 +3,10 @@ #include "core/providers/cpu/reduction/reduction_ops.h" #include "core/providers/common.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26451) +#endif using namespace std; namespace onnxruntime { @@ -219,10 +222,6 @@ bool operator!=(FastReduceKind a, FastReduceKind b) { return static_cast(a) != static_cast(b); } -bool IsFastReduceKindAvailable(FastReduceKind scenario, FastReduceKind available) { - return (static_cast(scenario) & static_cast(available)) > 0; -} - bool ResultsNoTransposePrepareForReduce::equal(gsl::span local_input_shape, gsl::span local_reduced_axes) { if (gsl::make_span(input_shape) != local_input_shape) @@ -267,12 +266,6 @@ void ReduceAggregatorBase::FastReduceKRK(const Tensor&, const std::vector(n_row * n_col * element_size), - static_cast(n_row * element_size), - static_cast(n_row * n_col * element_size * n_ops)}; -} - void NoTransposePrepareForReduce(const TensorShape& new_input_shape, gsl::span reduced_axes, ResultsNoTransposePrepareForReduce& results) { diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_ops.h b/onnxruntime/core/providers/cpu/reduction/reduction_ops.h index 9f064fa345..8acce4524d 100644 --- a/onnxruntime/core/providers/cpu/reduction/reduction_ops.h +++ b/onnxruntime/core/providers/cpu/reduction/reduction_ops.h @@ -34,10 +34,15 @@ bool operator==(FastReduceKind a, FastReduceKind b); bool operator!=(FastReduceKind a, FastReduceKind b); -bool IsFastReduceKindAvailable(FastReduceKind scenario, FastReduceKind available); - +constexpr bool IsFastReduceKindAvailable(FastReduceKind scenario, FastReduceKind available) { + return (static_cast(scenario) & static_cast(available)) > 0; +} /* Evaluate the cost of parallelized FastReduce implementations. */ -TensorOpCost ParallelReduceFastCost(int64_t n_row, int64_t n_col, int64_t element_size, int n_ops); +constexpr TensorOpCost ParallelReduceFastCost(int64_t n_row, int64_t n_col, int64_t element_size, int n_ops) { + return TensorOpCost{static_cast(n_row * n_col * element_size), + static_cast(n_row * element_size), + static_cast(n_row * n_col * element_size * n_ops)}; +} /** This only improves reduce function when reduced axes are contiguous: diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc index 8f4f2aaa32..e53dbb4cf0 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc @@ -14,7 +14,11 @@ #ifdef _MSC_VER #pragma warning(pop) #endif - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif /* ONNX_OPERATOR_SCHEMA(GRU) .SetDoc(R"DOC( @@ -644,7 +648,7 @@ void UniDirectionalGru::Compute(const gsl::span& inputs_arg, for (int r = 0; r < batch_size_; r++) { const T* p_bias_r = use_bias_ ? SafeRawConstPointer(batched_bias_WRr_local + r * hidden_size_, batched_bias_WRr_local_end, hidden_size_) - : nullptr; + : nullptr; // initialize p_rt with input to calculate rt. outputZRH_ has Xt*(Wr^T) + Ht-1*(Rr^T). T* p_rt = SafeRawPointer(outputZRH_, out_added_offset + r * hidden_size_x3 + hidden_size_, hidden_size_); @@ -738,7 +742,7 @@ void UniDirectionalGru::Compute(const gsl::span& inputs_arg, const T* p_bias_z = use_bias_ ? SafeRawConstPointer(batched_bias_WRz_local, batched_bias_WRz_local_end, hidden_size_) - : nullptr; + : nullptr; // initialize p_zt with Xt*(Wz^T) + Ht-1*(Rz^T), which is most of the input to calculate zt: T* p_zt = SafeRawPointer(outputZRH_, out_added_offset + r * hidden_size_x3, hidden_size_); @@ -788,7 +792,7 @@ void UniDirectionalGru::Compute(const gsl::span& inputs_arg, prev_Ht = output; prev_Ht_end = output_end; } - } // End parallel section + } // End parallel section // copy last output to final_hidden_state for (int i = 0; i < batch_size_; i++) { diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc index a711a59bd1..b602adb32e 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc @@ -185,7 +185,7 @@ Status DeepCpuLstmOp::TryPackWeights(const Tensor& weights, PackedWeights& packe const size_t N = static_cast(shape[1]); const size_t K = static_cast(shape[2]); - if ((shape[0] != num_directions_) || (N != static_cast(hidden_size_ * 4))) { + if ((shape[0] != num_directions_) || (N != static_cast(hidden_size_) * 4)) { return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/rnn/lstm_base.cc b/onnxruntime/core/providers/cpu/rnn/lstm_base.cc index 44d785b62b..cfabe3fb45 100644 --- a/onnxruntime/core/providers/cpu/rnn/lstm_base.cc +++ b/onnxruntime/core/providers/cpu/rnn/lstm_base.cc @@ -3,7 +3,10 @@ #include "lstm_base.h" #include "uni_directional_lstm.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26451) +#endif namespace onnxruntime { using namespace rnn::detail; diff --git a/onnxruntime/core/providers/cpu/rnn/rnn.cc b/onnxruntime/core/providers/cpu/rnn/rnn.cc index 1f0e0610da..a51129ceac 100644 --- a/onnxruntime/core/providers/cpu/rnn/rnn.cc +++ b/onnxruntime/core/providers/cpu/rnn/rnn.cc @@ -9,7 +9,11 @@ #include "core/providers/cpu/rnn/rnn_helpers.h" #include "core/util/math.h" #include "core/util/math_cpuonly.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { ONNX_CPU_OPERATOR_VERSIONED_KERNEL( RNN, diff --git a/onnxruntime/core/providers/cpu/rnn/rnn_helpers.cc b/onnxruntime/core/providers/cpu/rnn/rnn_helpers.cc index a7c20b9ed6..ece449095c 100644 --- a/onnxruntime/core/providers/cpu/rnn/rnn_helpers.cc +++ b/onnxruntime/core/providers/cpu/rnn/rnn_helpers.cc @@ -17,7 +17,10 @@ #include "core/providers/cpu/rnn/rnn_activation_functors.h" #include "core/util/math.h" #include "core/util/math_cpuonly.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace rnn { namespace detail { @@ -311,21 +314,21 @@ void ComputeGemm(const int M, namespace deepcpu { -const float alpha_1 = 4.89352455891786e-03f; -const float alpha_3 = 6.37261928875436e-04f; -const float alpha_5 = 1.48572235717979e-05f; -const float alpha_7 = 5.12229709037114e-08f; -const float alpha_9 = -8.60467152213735e-11f; -const float alpha_11 = 2.00018790482477e-13f; -const float alpha_13 = -2.76076847742355e-16f; +constexpr float alpha_1 = 4.89352455891786e-03f; +constexpr float alpha_3 = 6.37261928875436e-04f; +constexpr float alpha_5 = 1.48572235717979e-05f; +constexpr float alpha_7 = 5.12229709037114e-08f; +constexpr float alpha_9 = -8.60467152213735e-11f; +constexpr float alpha_11 = 2.00018790482477e-13f; +constexpr float alpha_13 = -2.76076847742355e-16f; -const float beta_0 = 4.89352518554385e-03f; -const float beta_2 = 2.26843463243900e-03f; -const float beta_4 = 1.18534705686654e-04f; -const float beta_6 = 1.19825839466702e-06f; +constexpr float beta_0 = 4.89352518554385e-03f; +constexpr float beta_2 = 2.26843463243900e-03f; +constexpr float beta_4 = 1.18534705686654e-04f; +constexpr float beta_6 = 1.19825839466702e-06f; -const float sigmoid_bound = 20.0f; -const float tanh_bound = 10.0f; +constexpr float sigmoid_bound = 20.0f; +constexpr float tanh_bound = 10.0f; #if defined(__GNUC__) && !defined(__wasm__) #define restrict __restrict__ @@ -918,4 +921,3 @@ GruOutputGateFuncPtr GruOutputGateFuncByName(const std::string& func) { } // namespace detail } // namespace rnn } // namespace onnxruntime - diff --git a/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h b/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h index e23d516350..c94cdf14ab 100644 --- a/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h +++ b/onnxruntime/core/providers/cpu/rnn/rnn_helpers.h @@ -110,7 +110,7 @@ void ReverseSequence(gsl::span inputs, for (int i = 0; i < batch_size; i++) { int seq_len = sequence_lengths[i]; - for (int j = 0; j < seq_len; j++) { + for (ptrdiff_t j = 0; j < seq_len; j++) { gsl::span src = inputs.subspan(j * batch_size * input_size + i * input_size, input_size); gsl::span dest = inputs_reverse.subspan(num_directions * (seq_len - j - 1) * batch_size * input_size + i * input_size, input_size); diff --git a/onnxruntime/core/providers/cpu/rnn/uni_directional_lstm.cc b/onnxruntime/core/providers/cpu/rnn/uni_directional_lstm.cc index 715aa931a3..605ebacaee 100644 --- a/onnxruntime/core/providers/cpu/rnn/uni_directional_lstm.cc +++ b/onnxruntime/core/providers/cpu/rnn/uni_directional_lstm.cc @@ -4,7 +4,11 @@ #include "uni_directional_lstm.h" #include "core/platform/threadpool.h" - +//TODO: fix the warnings +#if defined(_MSC_VER) && !defined(__clang__) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { namespace lstm { @@ -82,7 +86,7 @@ UniDirectionalLstm::UniDirectionalLstm( template void UniDirectionalLstm::AllocateBuffers() { // allocate and fill with zeroes - const bool fill = true; + constexpr bool fill = true; hidden0_ = Allocate(allocator_, hidden_size_, hidden0_ptr_, fill); internal_memory_prev_ = Allocate(allocator_, hidden_size_, internal_memory_prev_ptr_, fill); batched_hidden0_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_hidden0_ptr_); diff --git a/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc b/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc index 412c6d6aa0..9645028745 100644 --- a/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc +++ b/onnxruntime/core/providers/cpu/sequence/sequence_ops.cc @@ -69,7 +69,7 @@ static int64_t GetSeqIdx(const Tensor& idx_tensor) { return seq_idx; } -bool ValidateSeqIdx(int64_t input_seq_idx, int64_t seq_size) { +constexpr bool ValidateSeqIdx(int64_t input_seq_idx, int64_t seq_size) { bool retval = false; if (input_seq_idx < 0) { retval = input_seq_idx <= -1 && input_seq_idx >= -seq_size; @@ -536,7 +536,7 @@ Status SplitToSequence::ComputeImpl(OpKernelContext& context, const Tensor& inpu copy_data(src, dst, count); }); - input_offset += split_size * after_dims_excluding_split; // offset by the N data we used in this iteration + input_offset += static_cast(split_size) * after_dims_excluding_split; // offset by the N data we used in this iteration // if keep_dims = 0, reshape the tensor by dropping the dimension corresponding to 'axis' if (use_keep_dims && keepdims_ == 0) { diff --git a/onnxruntime/core/providers/cpu/tensor/expand.cc b/onnxruntime/core/providers/cpu/tensor/expand.cc index 085214f290..466bb83ad1 100644 --- a/onnxruntime/core/providers/cpu/tensor/expand.cc +++ b/onnxruntime/core/providers/cpu/tensor/expand.cc @@ -78,9 +78,9 @@ Status Expand::Compute(OpKernelContext* context) const { return Status::OK(); } - std::unique_ptr input_dim_group{new int64_t[max_dims_size]}; - std::unique_ptr output_dim_group{new int64_t[max_dims_size]}; - std::unique_ptr expand_dim_size{new int64_t[max_dims_size]}; + std::unique_ptr input_dim_group = std::make_unique(max_dims_size); + std::unique_ptr output_dim_group = std::make_unique(max_dims_size); + std::unique_ptr expand_dim_size = std::make_unique(max_dims_size); auto dim_group_start = max_dims_size; for (int64_t input_dims_iter = input_dims_size - 1, diff --git a/onnxruntime/core/providers/cpu/tensor/gather_nd.cc b/onnxruntime/core/providers/cpu/tensor/gather_nd.cc index cec467e066..4e75de493f 100644 --- a/onnxruntime/core/providers/cpu/tensor/gather_nd.cc +++ b/onnxruntime/core/providers/cpu/tensor/gather_nd.cc @@ -100,7 +100,7 @@ Status GatherNDBase::PrepareForCompute(const TensorShape& input_shape, const Ten relative_slice_offset += index * sizes_from_slice_dims[dim_idx]; } - p.slice_offsets[slice_idx] = input_base_offset + relative_slice_offset; + p.slice_offsets[slice_idx] = static_cast(input_base_offset) + relative_slice_offset; }; concurrency::ThreadPool::TryParallelFor( diff --git a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc index 0b09f213b6..9fc0aeb2b9 100644 --- a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc +++ b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc @@ -77,7 +77,7 @@ Status NonZero::Compute(OpKernelContext* context) const { // as we iterate the entries, increment the coordinate for the current entry // e.g. if shape is {2,2}, we start with 0,0 increment to 0,1 increment to 1,0 and finally 1,1 auto increment_coordinate = [&coordinate, &coordinate_size, &X_shape]() { - for (int64_t idx = coordinate_size - 1; idx >= 0; --idx) { + for (Eigen::Index idx = coordinate_size - 1; idx >= 0; --idx) { int64_t& cur_coord = coordinate[idx]; if (cur_coord != X_shape[idx] - 1) { ++cur_coord; diff --git a/onnxruntime/core/providers/cpu/tensor/onehot.cc b/onnxruntime/core/providers/cpu/tensor/onehot.cc index cd621f968d..42e20284c5 100644 --- a/onnxruntime/core/providers/cpu/tensor/onehot.cc +++ b/onnxruntime/core/providers/cpu/tensor/onehot.cc @@ -94,7 +94,7 @@ Status PrepareOutputShape(const Tensor* indices, const int64_t depth_val, const output_shape = indices_shape.GetDimsAsVector(); // output rank is always 1 more than the input rank as a new dimension is added to the input shape - const auto output_rank = static_cast(indices_num_dims + 1); + const auto output_rank = static_cast(indices_num_dims) + 1; auto true_axis = HandleNegativeAxis(axis, output_rank); diff --git a/onnxruntime/core/providers/cpu/tensor/reverse_sequence.cc b/onnxruntime/core/providers/cpu/tensor/reverse_sequence.cc index 493ff62b1d..a76f82fb39 100644 --- a/onnxruntime/core/providers/cpu/tensor/reverse_sequence.cc +++ b/onnxruntime/core/providers/cpu/tensor/reverse_sequence.cc @@ -79,7 +79,7 @@ Status ReverseSequenceOp::Compute(OpKernelContext* context) const { return status; } -static int64_t TimeMajorInputOffset(const int64_t max_seq_len, +constexpr static int64_t TimeMajorInputOffset(const int64_t max_seq_len, const int64_t batch_size, const int64_t input_size, const int64_t batch_num, @@ -88,7 +88,7 @@ static int64_t TimeMajorInputOffset(const int64_t max_seq_len, return seq_num * batch_size * input_size + batch_num * input_size; } -static int64_t BatchMajorInputOffset(const int64_t max_seq_len, +constexpr static int64_t BatchMajorInputOffset(const int64_t max_seq_len, const int64_t batch_size, const int64_t input_size, const int64_t batch_num, @@ -97,7 +97,7 @@ static int64_t BatchMajorInputOffset(const int64_t max_seq_len, return batch_num * max_seq_len * input_size + seq_num * input_size; } -static int64_t TimeMajorOutputOffset(const int64_t max_seq_len, +constexpr static int64_t TimeMajorOutputOffset(const int64_t max_seq_len, const int64_t batch_size, const int64_t input_size, const int64_t batch_num, @@ -107,7 +107,7 @@ static int64_t TimeMajorOutputOffset(const int64_t max_seq_len, return (seq_len - seq_num - 1) * batch_size * input_size + batch_num * input_size; } -static int64_t BatchMajorOutputOffset(const int64_t max_seq_len, +constexpr static int64_t BatchMajorOutputOffset(const int64_t max_seq_len, const int64_t batch_size, const int64_t input_size, const int64_t batch_num, diff --git a/onnxruntime/core/providers/cpu/tensor/scatter_nd.cc b/onnxruntime/core/providers/cpu/tensor/scatter_nd.cc index b5a19fd44c..3fb3956cdc 100644 --- a/onnxruntime/core/providers/cpu/tensor/scatter_nd.cc +++ b/onnxruntime/core/providers/cpu/tensor/scatter_nd.cc @@ -64,7 +64,7 @@ Status ScatterNDBase::ValidateShapes(const TensorShape& input_shape, // Validate rank of update tensor // Per spec, the rank of the update tensor should be: // (Rank of input tensor) + (Rank of indices tensor) -1 - last_indice_dimension - if (update_rank != (input_rank + indice_rank - 1 - static_cast(last_indice_dimension))) { + if (update_rank != (input_rank + indice_rank - 1 - static_cast(last_indice_dimension))) { return true; } diff --git a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc index e565c2467b..a62bad2178 100644 --- a/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc +++ b/onnxruntime/core/providers/cpu/tensor/space_depth_ops.cc @@ -62,7 +62,7 @@ ONNX_CPU_OPERATOR_KERNEL( // intermediate tensor shapes are: // (batch, blocksize, blocksize, input_depth / (blocksize * blocksize), input_height, input_width) for DepthToSpace // (batch, input_depth, input_height / blocksize, blocksize, input_width / blocksize, blocksize) for SpaceToDepth -const int IntermediateTensorRank = 6; +constexpr int IntermediateTensorRank = 6; template using EigenTensorMap = Eigen::TensorMap, Eigen::Aligned>; diff --git a/onnxruntime/core/providers/cpu/tensor/split.cc b/onnxruntime/core/providers/cpu/tensor/split.cc index 74d8be1fce..345c9d2be3 100644 --- a/onnxruntime/core/providers/cpu/tensor/split.cc +++ b/onnxruntime/core/providers/cpu/tensor/split.cc @@ -197,7 +197,7 @@ Status Split::ComputeImpl(OpKernelContext& context, const Tensor& input) const { copy_data(src, dst, count); }); - input_offset += split_size * after_dims_excluding_split; // offset by the N data we used in this iteration + input_offset += static_cast(split_size) * after_dims_excluding_split; // offset by the N data we used in this iteration } return Status::OK(); diff --git a/onnxruntime/core/providers/cpu/tensor/transpose.cc b/onnxruntime/core/providers/cpu/tensor/transpose.cc index 16d0f5c746..de6a1b33cf 100644 --- a/onnxruntime/core/providers/cpu/tensor/transpose.cc +++ b/onnxruntime/core/providers/cpu/tensor/transpose.cc @@ -78,7 +78,7 @@ static void IncrementIndexAndComputeOffsetSetup(MultiIndex& mindex, size_t num_a for (size_t i = 0; i < num_axes; ++i) { if (target_dims[i] == 1) continue; - mindex.InitAxis(naxes, 0, static_cast(target_dims[i]), stride[i] * element_size); + mindex.InitAxis(naxes, 0, static_cast(target_dims[i]), stride[i] * static_cast(element_size)); ++naxes; } ORT_ENFORCE(naxes > 0, "Method IncrementIndexAndComputeOffset assumes this value is strictly positive."); diff --git a/onnxruntime/core/providers/cpu/tensor/upsample.h b/onnxruntime/core/providers/cpu/tensor/upsample.h index 3d998187e8..dd9dc489de 100644 --- a/onnxruntime/core/providers/cpu/tensor/upsample.h +++ b/onnxruntime/core/providers/cpu/tensor/upsample.h @@ -7,7 +7,11 @@ #include "core/framework/op_kernel.h" #endif #include - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +// Chance of arithmetic overflow could be reduced +#pragma warning(disable : 26451) +#endif namespace onnxruntime { constexpr const char* UpsampleModeNN = "nearest"; @@ -16,9 +20,9 @@ constexpr const char* UpsampleModeCubic = "cubic"; // In case of cubic mode the grid used to calculate the interpolation value // is a 4x4 matrix -const size_t CubicModeGridLength = 4; +constexpr size_t CubicModeGridLength = 4; -using GetNearestPixelFunc = int64_t(*)(float, bool); +using GetNearestPixelFunc = int64_t (*)(float, bool); using GetOriginalCoordinateFunc = float (*)(float, float, float, float, float, float); enum UpsampleMode { @@ -375,3 +379,6 @@ class Upsample : public UpsampleBase, public OpKernel { }; } // namespace onnxruntime +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/onnxruntime/core/providers/cuda/cuda_kernel.h b/onnxruntime/core/providers/cuda/cuda_kernel.h index 8268e40292..cb3232ffd5 100644 --- a/onnxruntime/core/providers/cuda/cuda_kernel.h +++ b/onnxruntime/core/providers/cuda/cuda_kernel.h @@ -30,7 +30,7 @@ class CudaKernel : public OpKernel { if (s.IsOK()) { auto err = cudaGetLastError(); if (err != cudaSuccess) { - s = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CUDA error ", cudaGetErrorName(err), ":", cudaGetErrorString(err)); + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "CUDA error ", cudaGetErrorName(err), ":", cudaGetErrorString(err)); } } diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp index 02733453f7..04d1e6859f 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/AbiCustomRegistry.cpp @@ -450,9 +450,9 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel( constantCpuInputCapture, shapeInferrerCapture, defaultAttributesCapture - ](const onnxruntime::OpKernelInfo& info) -> onnxruntime::OpKernel* + ](onnxruntime::FuncManager&, const onnxruntime::OpKernelInfo& info, std::unique_ptr& out) -> onnxruntime::common::Status { - return new AbiOpKernel( + out = std::make_unique( kernelFactoryCapture.Get(), info, requiresInputShapesAtCreation, @@ -461,6 +461,7 @@ HRESULT STDMETHODCALLTYPE AbiCustomRegistry::RegisterOperatorKernel( constantCpuInputCapture, shapeInferrerCapture.Get(), &defaultAttributesCapture); + return Status::OK(); }; onnxruntime::KernelCreateInfo create_info(builder.Build(), lotusKernelCreateFn); diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp index cac81590ed..ba65508d2b 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphPartitioner.cpp @@ -681,9 +681,10 @@ namespace Dml printf("\n"); #endif - auto fused_kernel_func = [partitionNodePropsMap, transferredInitializerMap](const onnxruntime::OpKernelInfo& info) mutable ->onnxruntime::OpKernel* + auto fused_kernel_func = [partitionNodePropsMap, transferredInitializerMap](onnxruntime::FuncManager& func_mgr, const onnxruntime::OpKernelInfo& info, std::unique_ptr& out) mutable ->onnxruntime::Status { - return CreateFusedGraphKernel(info, partitionNodePropsMap, *transferredInitializerMap); + out.reset(CreateFusedGraphKernel(info, partitionNodePropsMap, *transferredInitializerMap)); + return Status::OK(); }; // build the kernel definition on the fly, and register it to the fused_kernel_regisitry. diff --git a/onnxruntime/core/session/IOBinding.h b/onnxruntime/core/session/IOBinding.h index 2b97dbeb22..61f7557f03 100644 --- a/onnxruntime/core/session/IOBinding.h +++ b/onnxruntime/core/session/IOBinding.h @@ -50,30 +50,30 @@ class IOBinding { common::Status BindInput(const std::string& name, const OrtValue& ml_value); /** - * If the BindInput calls are async this function acts as a barrier to ensure all inputs are fully copied - * before you call the Run() method. There is no point calling Run() if you're inputs are not ready at the - * desired location. - * This is a blocking call and is a wrapper over IExecutionProvider::Sync(). - * Call InferenceSession::Run() only after calling this method or else you'll end up wasting cycles inside Run(). - */ + * If the BindInput calls are async this function acts as a barrier to ensure all inputs are fully copied + * before you call the Run() method. There is no point calling Run() if you're inputs are not ready at the + * desired location. + * This is a blocking call and is a wrapper over IExecutionProvider::Sync(). + * Call InferenceSession::Run() only after calling this method or else you'll end up wasting cycles inside Run(). + */ common::Status SynchronizeInputs(); common::Status SynchronizeOutputs(); /** - * Bind an output name to a provided pre-allocated OrtValue. - */ + * Bind an output name to a provided pre-allocated OrtValue. + */ common::Status BindOutput(const std::string& name, const OrtValue& ml_value); /** - * Bind an output name to a device. - * - * @param device Device to allocate the output on. Default is CPU. - */ + * Bind an output name to a device. + * + * @param device Device to allocate the output on. Default is CPU. + */ common::Status BindOutput(const std::string& name, OrtDevice device = {}); /** - * This simply collects the outputs obtained after calling Run() inside the @param outputs. - */ + * This simply collects the outputs obtained after calling Run() inside the @param outputs. + */ const std::vector& GetOutputNames() const; const std::vector& GetOutputs() const; std::vector& GetOutputs(); @@ -82,22 +82,22 @@ class IOBinding { const std::vector& GetInputs() const; /** - * Get a CPU allocator from provider for async copy later if the provider supports that - * If it doesn't support that, return the default allocator from CPU provider - * \return a nonnull pointer - */ + * Get a CPU allocator from provider for async copy later if the provider supports that + * If it doesn't support that, return the default allocator from CPU provider + * \return a nonnull pointer + */ AllocatorPtr GetCPUAllocator(int id, onnxruntime::ProviderType provider_type) const; /** - * clear inputs or outputs. IOBinding is stateful. There are cases we need to reset its state. - */ + * clear inputs or outputs. IOBinding is stateful. There are cases we need to reset its state. + */ void ClearOutputs(); void ClearInputs(); + IOBinding(const SessionState& session_state); private: friend InferenceSession; - IOBinding(const SessionState& session_state); const SessionState& session_state_; std::vector feed_names_; std::vector feeds_; diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc index e5cb9fa176..cb721b65f8 100644 --- a/onnxruntime/core/session/abi_session_options.cc +++ b/onnxruntime/core/session/abi_session_options.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "core/graph/onnx_protobuf.h" +#include "core/common/gsl_suppress.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" #include "core/framework/error_code_helper.h" diff --git a/onnxruntime/core/session/allocator_adapters.cc b/onnxruntime/core/session/allocator_adapters.cc index a3a8cb273a..6dd495e578 100644 --- a/onnxruntime/core/session/allocator_adapters.cc +++ b/onnxruntime/core/session/allocator_adapters.cc @@ -47,7 +47,9 @@ void IAllocatorImplWrappingOrtAllocator::Free(void* p) { } } // namespace onnxruntime - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif ORT_API_STATUS_IMPL(OrtApis::CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info, _Outptr_ OrtAllocator** out) { API_IMPL_BEGIN diff --git a/onnxruntime/core/session/custom_ops.cc b/onnxruntime/core/session/custom_ops.cc index 4d6b8b382b..6510888995 100644 --- a/onnxruntime/core/session/custom_ops.cc +++ b/onnxruntime/core/session/custom_ops.cc @@ -258,8 +258,9 @@ common::Status CreateCustomRegistry(const std::vector& op_do def_builder.Provider(onnxruntime::kCpuExecutionProvider); } - KernelCreateFn kernel_create_fn = [op](const OpKernelInfo& info) -> OpKernel* { - return new CustomOpKernel(info, *op); + KernelCreateFn kernel_create_fn = [op](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { + out = std::make_unique(info, *op); + return Status::OK(); }; KernelCreateInfo create_info(def_builder.Build(), kernel_create_fn); diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc index 563159ca93..ba479a1b80 100644 --- a/onnxruntime/core/session/environment.cc +++ b/onnxruntime/core/session/environment.cc @@ -50,7 +50,7 @@ Status Environment::Create(std::unique_ptr logging_mana std::unique_ptr& environment, const OrtThreadingOptions* tp_options, bool create_global_thread_pools) { - environment = std::unique_ptr(new Environment()); + environment = std::make_unique(); auto status = environment->Initialize(std::move(logging_manager), tp_options, create_global_thread_pools); return status; } diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index b525224b79..4641e2da89 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -1982,8 +1982,7 @@ common::Status InferenceSession::NewIOBinding(std::unique_ptr* io_bin } } - // private constructor, can't use make_unique - *io_binding = std::unique_ptr(new IOBinding(*session_state_)); + *io_binding = std::make_unique(*session_state_); return Status::OK(); } diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 74880c7ba3..086c7236f3 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -2131,6 +2131,8 @@ ORT_API_STATUS_IMPL(OrtApis::CreateArenaCfgV2, _In_reads_(num_keys) const char* API_IMPL_END } +//Allow using raw new/delete because this is for C. +GSL_SUPPRESS(r .11) ORT_API(void, OrtApis::ReleaseArenaCfg, _Frees_ptr_opt_ OrtArenaCfg* ptr) { delete ptr; } diff --git a/onnxruntime/core/session/ort_env.cc b/onnxruntime/core/session/ort_env.cc index 834bbbeae9..dd10f0eb68 100644 --- a/onnxruntime/core/session/ort_env.cc +++ b/onnxruntime/core/session/ort_env.cc @@ -16,7 +16,7 @@ using namespace onnxruntime; using namespace onnxruntime::logging; -OrtEnv* OrtEnv::p_instance_ = nullptr; +std::unique_ptr OrtEnv::p_instance_; int OrtEnv::ref_count_ = 0; onnxruntime::OrtMutex OrtEnv::m_; @@ -52,19 +52,19 @@ OrtEnv* OrtEnv::GetInstance(const OrtEnv::LoggingManagerConstructionInfo& lm_inf if (lm_info.logging_function) { std::unique_ptr logger = std::make_unique(lm_info.logging_function, lm_info.logger_param); - lmgr.reset(new LoggingManager(std::move(logger), - static_cast(lm_info.default_warning_level), - false, - LoggingManager::InstanceType::Default, - &name)); + lmgr = std::make_unique(std::move(logger), + static_cast(lm_info.default_warning_level), + false, + LoggingManager::InstanceType::Default, + &name); } else { auto sink = MakePlatformDefaultLogSink(); - lmgr.reset(new LoggingManager(std::move(sink), - static_cast(lm_info.default_warning_level), - false, - LoggingManager::InstanceType::Default, - &name)); + lmgr = std::make_unique(std::move(sink), + static_cast(lm_info.default_warning_level), + false, + LoggingManager::InstanceType::Default, + &name); } std::unique_ptr env; if (!tp_options) { @@ -75,11 +75,11 @@ OrtEnv* OrtEnv::GetInstance(const OrtEnv::LoggingManagerConstructionInfo& lm_inf if (!status.IsOK()) { return nullptr; } - p_instance_ = new OrtEnv(std::move(env)); + p_instance_ = std::make_unique(std::move(env)); } ++ref_count_; - return p_instance_; + return p_instance_.get(); } void OrtEnv::Release(OrtEnv* env_ptr) { @@ -87,11 +87,10 @@ void OrtEnv::Release(OrtEnv* env_ptr) { return; } std::lock_guard lock(m_); - ORT_ENFORCE(env_ptr == p_instance_); // sanity check + ORT_ENFORCE(env_ptr == p_instance_.get()); // sanity check --ref_count_; if (ref_count_ == 0) { - delete p_instance_; - p_instance_ = nullptr; + p_instance_.reset(); } } diff --git a/onnxruntime/core/session/ort_env.h b/onnxruntime/core/session/ort_env.h index 8d85c1a4b3..da852b9425 100644 --- a/onnxruntime/core/session/ort_env.h +++ b/onnxruntime/core/session/ort_env.h @@ -59,30 +59,29 @@ struct OrtEnv { /** * Registers an allocator for sharing between multiple sessions. * Returns an error if an allocator with the same OrtMemoryInfo is already registered. - */ + */ onnxruntime::common::Status RegisterAllocator(onnxruntime::AllocatorPtr allocator); /** * Creates and registers an allocator for sharing between multiple sessions. * Return an error if an allocator with the same OrtMemoryInfo is already registered. - */ + */ onnxruntime::common::Status CreateAndRegisterAllocator(const OrtMemoryInfo& mem_info, const OrtArenaCfg* arena_cfg = nullptr); /** * Removes registered allocator that was previously registered for sharing between multiple sessions. - */ + */ onnxruntime::common::Status UnregisterAllocator(const OrtMemoryInfo& mem_info); + OrtEnv(std::unique_ptr value); + ~OrtEnv(); private: - static OrtEnv* p_instance_; + static std::unique_ptr p_instance_; static onnxruntime::OrtMutex m_; static int ref_count_; std::unique_ptr value_; - OrtEnv(std::unique_ptr value1); - ~OrtEnv(); - ORT_DISALLOW_COPY_AND_ASSIGNMENT(OrtEnv); }; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index bafb803e11..cb76556779 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -140,7 +140,9 @@ struct Node__EdgeIterator_Impl : Node__EdgeIterator { Node::EdgeConstIterator v_; }; - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif // wrapped = The internal object is exposed as an opaque pointer, so we wrap it in a class that forwards every call to the real calls. No members are ever directly accessed // direct = Same implementation is used for shared providers & core code, but some of the methods need to be routed through here to make the linker happy struct ProviderHostImpl : ProviderHost { diff --git a/onnxruntime/core/util/qmath.h b/onnxruntime/core/util/qmath.h index 9c272cde54..34c68ea4ff 100644 --- a/onnxruntime/core/util/qmath.h +++ b/onnxruntime/core/util/qmath.h @@ -59,12 +59,12 @@ void GetQuantizationParameter(const float* data, int64_t num_of_elements, float& block_size = num_of_elements; } - for (int i = 0; i < num_blocks; i++) { + for (int i = 0; i < num_blocks; i++) { aggregate[i].min = std::numeric_limits::max(); aggregate[i].max = std::numeric_limits::lowest(); } - const TensorOpCost unit_cost{static_cast(block_size * sizeof(float)), 2.0, static_cast(block_size)}; + const TensorOpCost unit_cost{static_cast(block_size) * sizeof(float), 2.0, static_cast(block_size)}; concurrency::ThreadPool::TryParallelFor(thread_pool, num_blocks, unit_cost, [&](std::ptrdiff_t begin, std::ptrdiff_t end) { auto begin_idx = begin * block_size; auto end_idx = std::min(std::ptrdiff_t(num_of_elements), end * block_size); @@ -105,8 +105,8 @@ void GetQuantizationParameter(const float* data, int64_t num_of_elements, float& } /** - * @brief Run MlasQuantizeLinear in parallel, with provided thread pool -*/ + * @brief Run MlasQuantizeLinear in parallel, with provided thread pool + */ template void ParQuantizeLinear(const float* Input, OutputType* Output, diff --git a/onnxruntime/core/util/thread_utils.cc b/onnxruntime/core/util/thread_utils.cc index 4446c2f7a4..312076de28 100644 --- a/onnxruntime/core/util/thread_utils.cc +++ b/onnxruntime/core/util/thread_utils.cc @@ -37,20 +37,23 @@ CreateThreadPoolHelper(Env* env, OrtThreadPoolParams options) { } return std::make_unique(env, to, options.name, options.thread_pool_size, - options.allow_spinning); + options.allow_spinning); } std::unique_ptr CreateThreadPool(Env* env, OrtThreadPoolParams options, ThreadPoolType tpool_type) { -// If openmp is enabled we don't want to create any additional threadpools for sequential execution. -// However, parallel execution relies on the existence of a separate threadpool. Hence we allow eigen threadpools -// to be created for parallel execution. + // If openmp is enabled we don't want to create any additional threadpools for sequential execution. + // However, parallel execution relies on the existence of a separate threadpool. Hence we allow eigen threadpools + // to be created for parallel execution. ORT_UNUSED_PARAMETER(tpool_type); return CreateThreadPoolHelper(env, options); } } // namespace concurrency } // namespace onnxruntime +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif namespace OrtApis { ORT_API_STATUS_IMPL(CreateThreadingOptions, _Outptr_ OrtThreadingOptions** out) { *out = new OrtThreadingOptions(); diff --git a/onnxruntime/python/numpy_helper.h b/onnxruntime/python/numpy_helper.h new file mode 100644 index 0000000000..d9165c3d31 --- /dev/null +++ b/onnxruntime/python/numpy_helper.h @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include +namespace onnxruntime { +namespace python { +constexpr bool IsNumericNumpyType(int npy_type) { + return npy_type < NPY_OBJECT || npy_type == NPY_HALF; +} +} // namespace python +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc index f169d7f67d..480fdcef04 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc @@ -9,6 +9,7 @@ #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API #include +#include "python/numpy_helper.h" #include "core/graph/graph.h" #include "core/framework/tensor_shape.h" @@ -41,10 +42,6 @@ bool IsNumpyArray(py::object& obj) { return PyObjectCheck_NumpyArray(obj.ptr()); } -bool IsNumericNumpyType(int npy_type) { - return npy_type < NPY_OBJECT || npy_type == NPY_HALF; -} - int GetNumpyArrayType(const py::object& obj) { return PyArray_TYPE(reinterpret_cast(obj.ptr())); } @@ -253,8 +250,7 @@ MLDataType NumpyTypeToOnnxRuntimeType(int numpy_type) { {NPY_UINT, DataTypeImpl::GetType()}, {NPY_LONGLONG, DataTypeImpl::GetType()}, {NPY_ULONGLONG, DataTypeImpl::GetType()}, - {NPY_OBJECT, DataTypeImpl::GetType()} - }; + {NPY_OBJECT, DataTypeImpl::GetType()}}; const auto it = type_map.find(numpy_type); if (it == type_map.end()) { throw std::runtime_error("No corresponding Numpy type for Tensor Type."); diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.h b/onnxruntime/python/onnxruntime_pybind_mlvalue.h index eb1b43a55d..d253dfdcb1 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.h +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.h @@ -23,8 +23,6 @@ namespace python { extern const char* PYTHON_ORTVALUE_OBJECT_NAME; extern const char* PYTHON_ORTVALUE_NATIVE_OBJECT_ATTR; -bool IsNumericNumpyType(int npy_type); - bool IsNumericNumpyArray(const pybind11::object& py_object); bool IsNumpyArray(pybind11::object& obj); diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index a4e7523ecf..f37539906f 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -9,6 +9,7 @@ #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API #include +#include "python/numpy_helper.h" #include "core/framework/ort_value.h" #include "core/framework/tensor.h" diff --git a/onnxruntime/python/onnxruntime_pybind_sparse_tensor.cc b/onnxruntime/python/onnxruntime_pybind_sparse_tensor.cc index f8f51d257d..5e2839ed5f 100644 --- a/onnxruntime/python/onnxruntime_pybind_sparse_tensor.cc +++ b/onnxruntime/python/onnxruntime_pybind_sparse_tensor.cc @@ -9,6 +9,7 @@ #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API #include +#include "python/numpy_helper.h" #include "core/framework/tensor_shape.h" #include "core/framework/tensor.h" diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 2ed442df75..2c3a194ede 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -202,15 +202,15 @@ py::object GetPyObjectFromSparseTensor(size_t pos, const OrtValue& ort_value, co if (!data_transfer_manager) { LOGS(logger, WARNING) << "Returned OrtValue with sparse tensor at position: " << pos << " is on GPU but no data_transfer_manager provided." << " Returned it will have its data on GPU, you can copy it using numpy_array_to_cpu()"; - py_sparse_tensor.reset(new PySparseTensor(ort_value)); + py_sparse_tensor = std::make_unique(ort_value); } else { auto dst_sparse_tensor = std::make_unique(src_sparse_tensor.DataType(), src_sparse_tensor.DenseShape(), GetAllocator()); auto status = src_sparse_tensor.Copy(*data_transfer_manager, 0, *dst_sparse_tensor); OrtPybindThrowIfError(status); - py_sparse_tensor.reset(new PySparseTensor(std::move(dst_sparse_tensor))); + py_sparse_tensor = std::make_unique(std::move(dst_sparse_tensor)); } } else { - py_sparse_tensor.reset(new PySparseTensor(ort_value)); + py_sparse_tensor = std::make_unique(ort_value); } py::object result = py::cast(py_sparse_tensor.get(), py::return_value_policy::take_ownership); @@ -1301,7 +1301,7 @@ including arg name, arg type (contains both type and shape).)pbdoc") }, "node shape (assuming the node holds a tensor)"); - py::class_(m, "SessionObjectInitializer"); + py::class_ sessionObjectInitializer(m, "SessionObjectInitializer"); py::class_(m, "InferenceSession", R"pbdoc(This is the main class used to run a model.)pbdoc") // In Python3, a Python bytes object will be passed to C++ functions that accept std::string or char* // without any conversion. So this init method can be used for model file path (string) and model content (bytes) @@ -1542,7 +1542,7 @@ void InitializeEnv() { InitArray(); Env::Default().GetTelemetryProvider().SetLanguageProjection(OrtLanguageProjection::ORT_PROJECTION_PYTHON); OrtPybindThrowIfError(Environment::Create(std::make_unique( - std::unique_ptr{new CLogSink{}}, + std::make_unique(), Severity::kWARNING, false, LoggingManager::InstanceType::Default, &SessionObjectInitializer::default_logger_id), session_env)); diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 0218c2c5f1..db4eda1259 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -379,9 +379,9 @@ class PySparseTensor { std::unique_ptr AsOrtValue() const; private: - // instance_ represents data that comes as input. Thus we depend on numpy - // arrays that own the underlying memory to stay around. We store copies - // of py::objects for those arrays in backing_storage_ as an extra ref-count. + // instance_ represents data that comes as input. Thus we depend on numpy + //arrays that own the underlying memory to stay around. We store copies + //of py::objects for those arrays in backing_storage_ as an extra ref-count. // If we have and are able to copy from the OrtValue returned by run() to CPU, then this owns the data // and backing_storage_ is empty. @@ -393,6 +393,11 @@ class PySparseTensor { }; #endif // !defined(DISABLE_SPARSE_TENSORS) +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(push) +//You can attempt to make 'onnxruntime::python::SessionObjectInitializer::Get' constexpr +#pragma warning(disable : 26497) +#endif class SessionObjectInitializer { public: typedef const PySessionOptions& Arg1; @@ -413,7 +418,9 @@ class SessionObjectInitializer { return SessionObjectInitializer(); } }; - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif Environment& GetEnv(); // Initialize an InferenceSession. diff --git a/onnxruntime/test/contrib_ops/tensor_op_test.cc b/onnxruntime/test/contrib_ops/tensor_op_test.cc index 2d4c836782..a72d2d3d83 100644 --- a/onnxruntime/test/contrib_ops/tensor_op_test.cc +++ b/onnxruntime/test/contrib_ops/tensor_op_test.cc @@ -14,7 +14,7 @@ namespace test { using ExpectResult = OpTester::ExpectResult; TEST(CropContribOpTest, CropBorderOnly) { - const int N = 2, C = 1, H = 3, W = 4; + constexpr int N = 2, C = 1, H = 3, W = 4; std::vector X = {1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 3.0f, 4.0f, 5.0f, 3.0f, 4.0f, 5.0f, 6.0f, @@ -37,7 +37,7 @@ TEST(CropContribOpTest, CropBorderOnly) { } TEST(CropContribOpTest, CropBorderAndScale) { - const int N = 2, C = 1, H = 3, W = 4; + constexpr int N = 2, C = 1, H = 3, W = 4; std::vector X = {1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 3.0f, 4.0f, 5.0f, 3.0f, 4.0f, 5.0f, 6.0f, @@ -65,7 +65,7 @@ TEST(CropContribOpTest, CropBorderAndScale) { } TEST(ImageScalerContribOpTest, ImageScalerTest) { - const int64_t N = 1, C = 2, H = 2, W = 2; + constexpr int64_t N = 1, C = 2, H = 2, W = 2; std::vector X = { 1.0f, 3.0f, 3.0f, 5.0f, @@ -92,9 +92,9 @@ TEST(ImageScalerContribOpTest, ImageScalerTest) { } void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normalize_variance) { - const int64_t N = 2, C = 2, H = 2, W = 3; - int64_t one = 1; - int64_t zero = 0; + constexpr int64_t N = 2, C = 2, H = 2, W = 3; + constexpr int64_t one = 1; + constexpr int64_t zero = 0; std::vector X = {3.0f, -3.0f, -1.0f, 1.0f, 2.0f, -1.0f, @@ -118,9 +118,9 @@ void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normaliz } void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_variance) { - const int64_t N = 2, C = 2, H = 2, W = 3; - int64_t one = 1; - int64_t zero = 0; + constexpr int64_t N = 2, C = 2, H = 2, W = 3; + constexpr int64_t one = 1; + constexpr int64_t zero = 0; std::vector N1C1 = {3.0f, -3.0f, -1.0f, 1.0f, 2.0f, -1.0f}; diff --git a/onnxruntime/test/eager/ort_invoker_test.cc b/onnxruntime/test/eager/ort_invoker_test.cc index d5b0c02d09..ed2efccd76 100644 --- a/onnxruntime/test/eager/ort_invoker_test.cc +++ b/onnxruntime/test/eager/ort_invoker_test.cc @@ -87,8 +87,9 @@ class TestKernel final : public OpKernel { } }; -OpKernel* CreateTestKernel(const OpKernelInfo& info) { - return new TestKernel(info); +Status CreateTestKernel(FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { + out = std::make_unique(info); + return Status::OK(); } TEST(InvokerTest, CustomOp) { diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc index 379ef1d4e4..78b598dcf1 100644 --- a/onnxruntime/test/framework/allocation_planner_test.cc +++ b/onnxruntime/test/framework/allocation_planner_test.cc @@ -230,13 +230,13 @@ class PlannerTest : public ::testing::Test { ASSERT_NE(ep, nullptr); auto info = std::make_unique( *p_node, kernel_def, *ep, state_->GetInitializedTensors(), state_->GetOrtValueNameIdxMap(), - state_->GetFuncMgr(), state_->GetDataTransferMgr()); + state_->GetDataTransferMgr()); op_kernel_infos_.push_back(std::move(info)); if (!KernelRegistry::HasImplementationOf(*reg, *p_node, onnxruntime::kCpuExecutionProvider)) { auto st = reg->Register( KernelCreateInfo(std::make_unique(kernel_def), - [](const OpKernelInfo& info) -> OpKernel* { return new DummyOpKernel(info); })); + [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); })); ORT_ENFORCE(st.IsOK(), st.ErrorMessage()); } @@ -575,7 +575,7 @@ TEST_F(PlannerTest, LocationPlanningForPassThroughExplicitAndImplicitSubgraphInp /* Inputs: iter_num, cond_in, loop carried state variables. iter_num_in cond_in [loop_state_var] (unused) | | - [Identity] [If] + [Identity] [If] | | cond_out loop_state_var_out */ diff --git a/onnxruntime/test/framework/float_16_test.cc b/onnxruntime/test/framework/float_16_test.cc index cf453d099e..f8bdb78660 100644 --- a/onnxruntime/test/framework/float_16_test.cc +++ b/onnxruntime/test/framework/float_16_test.cc @@ -145,7 +145,7 @@ TEST(Float16_Tests, Mul_16_Test) { auto def = MulFP16KernelDef(); //Register a foo kernel which is doing Add, but bind to Mul. - KernelCreateFn kernel_create_fn = [](const OpKernelInfo& info) -> OpKernel* { return new MulFP16Kernel(info); }; + KernelCreateFn kernel_create_fn = [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }; EXPECT_TRUE(registry->RegisterCustomKernel(def, kernel_create_fn).IsOK()); EXPECT_TRUE(session_object.Load(MUL_MODEL_URI).IsOK()); diff --git a/onnxruntime/test/framework/kernel_registry_test.cc b/onnxruntime/test/framework/kernel_registry_test.cc index be9dd2a260..e2490b6ff4 100644 --- a/onnxruntime/test/framework/kernel_registry_test.cc +++ b/onnxruntime/test/framework/kernel_registry_test.cc @@ -26,8 +26,9 @@ class FakeKernel final : public OpKernel { } }; -OpKernel* CreateFakeKernel(const OpKernelInfo& info) { - return new FakeKernel(info); +Status CreateFakeKernel(FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { + out = std::make_unique(info); + return Status::OK(); } } // namespace diff --git a/onnxruntime/test/framework/local_kernel_registry_test.cc b/onnxruntime/test/framework/local_kernel_registry_test.cc index 1cfdf97311..67e65fee60 100644 --- a/onnxruntime/test/framework/local_kernel_registry_test.cc +++ b/onnxruntime/test/framework/local_kernel_registry_test.cc @@ -95,8 +95,9 @@ KernelDefBuilder FooKernelDef(const char* schema_name) { return def; } -OpKernel* CreateFooKernel(const OpKernelInfo& kernel_info) { - return new FooKernel(kernel_info); +Status CreateFooKernel(FuncManager&, const OpKernelInfo& kernel_info, std::unique_ptr& out) { + out = std::make_unique>(kernel_info); + return Status::OK(); } // kernel with optional outputs @@ -181,8 +182,9 @@ class OptionalOpKernel : public OpKernel { } }; -OpKernel* CreateOptionalOpKernel(const OpKernelInfo& kernel_info) { - return new OptionalOpKernel(kernel_info); +Status CreateOptionalOpKernel(FuncManager&, const OpKernelInfo& kernel_info, std::unique_ptr& out) { + out = std::make_unique>(kernel_info); + return Status::OK(); } static const std::string MUL_MODEL_URI = "testdata/mul_1.onnx"; diff --git a/onnxruntime/test/framework/opaque_kernels_test.cc b/onnxruntime/test/framework/opaque_kernels_test.cc index 632f68deb0..a65f9a19a8 100644 --- a/onnxruntime/test/framework/opaque_kernels_test.cc +++ b/onnxruntime/test/framework/opaque_kernels_test.cc @@ -291,9 +291,9 @@ TEST_F(OpaqueTypeTests, RunModel) { EXPECT_TRUE(registry->RegisterOpSet(schemas, onnxruntime::kMLDomain, 8, 9).IsOK()); // Register our kernels here auto ctor_def = ConstructSparseTensorDef(); - EXPECT_TRUE(registry->RegisterCustomKernel(ctor_def, [](const OpKernelInfo& info) { return new ConstructSparseTensor(info); }).IsOK()); + EXPECT_TRUE(registry->RegisterCustomKernel(ctor_def, [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { out = std::make_unique(info); return Status::OK(); }).IsOK()); auto shape_def = ConstructFetchSparseShape(); - EXPECT_TRUE(registry->RegisterCustomKernel(shape_def, [](const OpKernelInfo& info) { return new FetchSparseTensorShape(info); }).IsOK()); + EXPECT_TRUE(registry->RegisterCustomKernel(shape_def, [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { out = std::make_unique(info); return Status::OK(); }).IsOK()); IOnnxRuntimeOpSchemaRegistryList custom_schema_registries_ = {registry->GetOpschemaRegistry()}; std::unordered_map domain_to_version = {{onnxruntime::kMLDomain, 8}}; diff --git a/onnxruntime/test/framework/parallel_executor_test.cc b/onnxruntime/test/framework/parallel_executor_test.cc index 49c7800c1b..8dcaad10c5 100644 --- a/onnxruntime/test/framework/parallel_executor_test.cc +++ b/onnxruntime/test/framework/parallel_executor_test.cc @@ -82,7 +82,7 @@ TEST(ParallelExecutor, TestStatusPropagation) { std::vector schemas{TestOp::OpSchema()}; Status status; ASSERT_TRUE((status = registry->RegisterOpSet(schemas, TestOp::OpDomain, 10, 11)).IsOK()) << status; - KernelCreateFn kernel_create_fn = [](const OpKernelInfo& info) { return new typename TestOp::OpKernelImpl(info); }; + KernelCreateFn kernel_create_fn = [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { out = std::make_unique(info); return Status::OK(); }; auto kernel_def = TestOp::KernelDef(); ASSERT_TRUE((status = registry->RegisterCustomKernel(kernel_def, kernel_create_fn)).IsOK()) << status; @@ -125,7 +125,7 @@ TEST_P(ParallelExecutorThreadPoolTest, TestNullInterOpThreadPool) { std::vector schemas{TestOp::OpSchema()}; Status status; ASSERT_TRUE((status = registry->RegisterOpSet(schemas, TestOp::OpDomain, 10, 11)).IsOK()) << status; - KernelCreateFn kernel_create_fn = [](const OpKernelInfo& info) { return new typename TestOp::OpKernelImpl(info); }; + KernelCreateFn kernel_create_fn = [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { out = std::make_unique(info); return Status::OK(); }; auto kernel_def = TestOp::KernelDef(); ASSERT_TRUE((status = registry->RegisterCustomKernel(kernel_def, kernel_create_fn)).IsOK()) << status; @@ -144,6 +144,6 @@ TEST_P(ParallelExecutorThreadPoolTest, TestNullInterOpThreadPool) { } INSTANTIATE_TEST_SUITE_P(ParallelExecutorThreadPoolTests, ParallelExecutorThreadPoolTest, - testing::Values(1, 0)); + testing::Values(1, 0)); } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/framework/session_state_test.cc b/onnxruntime/test/framework/session_state_test.cc index c6d49e7356..344f563fab 100644 --- a/onnxruntime/test/framework/session_state_test.cc +++ b/onnxruntime/test/framework/session_state_test.cc @@ -76,7 +76,7 @@ TEST_P(SessionStateAddGetKernelTest, AddGetKernelTest) { auto kernel_def = KernelDefBuilder().SetName("Variable").Provider(kCpuExecutionProvider).SinceVersion(1, 10).Build(); OpKernelInfo p_info(node, *kernel_def, *cpu_execution_provider, s.GetConstantInitializedTensors(), - s.GetOrtValueNameIdxMap(), s.GetFuncMgr(), s.GetDataTransferMgr()); + s.GetOrtValueNameIdxMap(), s.GetDataTransferMgr()); unique_ptr p_kernel; p_kernel.reset(new TestOpKernel(p_info)); size_t orig_num_outputs = p_kernel->Node().OutputDefs().size(); @@ -88,7 +88,7 @@ TEST_P(SessionStateAddGetKernelTest, AddGetKernelTest) { node.SetExecutionProviderType(kCpuExecutionProvider); std::shared_ptr kernel_registry = std::make_shared(); ASSERT_STATUS_OK(kernel_registry->Register(KernelCreateInfo( - std::move(kernel_def), [](const OpKernelInfo& info) -> OpKernel* { return new TestOpKernel(info); }))); + std::move(kernel_def), [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }))); kernel_registry_manager.RegisterKernelRegistry(kernel_registry); ASSERT_STATUS_OK(s.FinalizeSessionState(ORT_TSTR(""), kernel_registry_manager)); @@ -511,7 +511,7 @@ TEST_P(SessionStatePrepackingTest, PrePackingTest) { auto kernel_def = KernelDefBuilder().SetName("PrePackingTest").Provider(kCpuExecutionProvider).SinceVersion(1).Build(); ASSERT_STATUS_OK(kernel_registry->Register( KernelCreateInfo(std::move(kernel_def), - [](const OpKernelInfo& info) -> OpKernel* { return new PrePackingTestOpKernel(info); }))); + [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }))); kernel_registry_manager.RegisterKernelRegistry(kernel_registry); PlaceAllNodesToCPUEP(model.MainGraph()); @@ -553,7 +553,7 @@ TEST(SessionStateTest, SharedInitalizersWithPrePackingTest) { auto kernel_def = KernelDefBuilder().SetName("PrePackingTest").Provider(kCpuExecutionProvider).SinceVersion(1).Build(); ASSERT_STATUS_OK(kernel_registry->Register( KernelCreateInfo(std::move(kernel_def), - [](const OpKernelInfo& info) -> OpKernel* { return new PrePackingTestOpKernel(info); }))); + [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }))); kernel_registry_manager.RegisterKernelRegistry(kernel_registry); // Part 1: Pre-packing enabled + no shared initializers = no pre-packed weights caching diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc index 3bba5d1a9d..30d1232a7b 100644 --- a/onnxruntime/test/framework/sparse_kernels_test.cc +++ b/onnxruntime/test/framework/sparse_kernels_test.cc @@ -95,8 +95,8 @@ This operator constructs a sparse tensor from three tensors that provide a COO } /** - * @brief An implementation of the SparseFromCOO op. - */ + * @brief An implementation of the SparseFromCOO op. + */ class OpKernelImpl final : public OpKernel { public: OpKernelImpl(const OpKernelInfo& info) : OpKernel{info} {} @@ -264,7 +264,7 @@ struct SparseToValues { return schema; } - // A kernel implementation of SparseToValues + // A kernel implementation of SparseToValues class OpKernelImpl final : public OpKernel { public: OpKernelImpl(const OpKernelInfo& info) : OpKernel{info} {} @@ -333,7 +333,7 @@ class SparseTensorTests : public testing::Test { .SetDomain(onnxruntime::kMLDomain) .SinceVersion(10) .Provider(onnxruntime::kCpuExecutionProvider); - KernelCreateFn kernel_create_fn = [](const OpKernelInfo& info) { return new typename Op::OpKernelImpl(info); }; + KernelCreateFn kernel_create_fn = [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { out = std::make_unique(info); return Status::OK(); }; EXPECT_TRUE(registry2->RegisterCustomKernel(kernel_def_builder, kernel_create_fn).IsOK()); }; register_actions.push_back(register_kernel); @@ -698,9 +698,10 @@ struct InsertIndices { std::vector indices_data; insert_indices_data(indices_1D, values_size, shape_size, indices_data, indices_tp); indices_tp.set_data_type(utils::ToTensorProtoElementType()); - ORT_IF_CONSTEXPR (sizeof(T) == sizeof(int8_t)) { + ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) { indices_tp.mutable_raw_data()->assign(reinterpret_cast(indices_data.data()), indices_data.size()); - } else { + } + else { // Conversion on the fly to the target data type std::vector indices(indices_data.cbegin(), indices_data.cend()); indices_tp.mutable_raw_data()->assign(reinterpret_cast(indices.data()), indices.size() * sizeof(T)); @@ -823,8 +824,7 @@ static void TestConversion( TensorProto_DataType_INT8, TensorProto_DataType_INT16, TensorProto_DataType_INT32, - TensorProto_DataType_INT64 - }; + TensorProto_DataType_INT64}; for (auto dt : indices_types) { TestConversion(true, dt, inserter, checker); @@ -1037,7 +1037,7 @@ TensorProto CreateDenseTensor(size_t indices_start, std::vector& expected_values, std::vector& expected_indicies) { TensorProto result; std::vector values = CreateSparseValues(indices_start); - auto ind_start = static_cast(indices_start); + auto ind_start = static_cast(indices_start); expected_indicies = {ind_start, ind_start + 1}; for (const auto& ind : expected_indicies) { expected_values.push_back(values[ind]); @@ -1110,11 +1110,11 @@ void RawSparseDataChecker(gsl::span expected_bfloat, template static void TestDenseToSparseConversionValues(size_t indices_start, - std::function& values, TensorProto& tp)> inserter, - std::function expected, - gsl::span expected_indicies, - const SparseTensorProto& actual)> - checker) { + std::function& values, TensorProto& tp)> inserter, + std::function expected, + gsl::span expected_indicies, + const SparseTensorProto& actual)> + checker) { std::vector expected_values; std::vector expected_indicies; // Path is required for loading external data @@ -1862,7 +1862,7 @@ TEST(SparseTensorConversionTests, BlockSparse) { indices_span.cbegin(), indices_span.cend())); } } -#endif // !defined(DISABLE_SPARSE_TENSORS) +#endif // !defined(DISABLE_SPARSE_TENSORS) } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/mlas/bench/bench_qgemm.cpp b/onnxruntime/test/mlas/bench/bench_qgemm.cpp index d2bf9a57e6..1d21c8732b 100644 --- a/onnxruntime/test/mlas/bench/bench_qgemm.cpp +++ b/onnxruntime/test/mlas/bench/bench_qgemm.cpp @@ -13,9 +13,9 @@ static const std::vector qgemm_arg_names = {"M", "N", "K", "Batch", "Threads"}; void QGEMM(benchmark::State& state, bool pack_b, bool a_is_signed) { - const bool b_is_signed = true; - const uint8_t a_zero_point = 29; - const uint8_t b_zero_point = 179; + constexpr bool b_is_signed = true; + constexpr uint8_t a_zero_point = 29; + constexpr uint8_t b_zero_point = 179; if (state.range(0) <= 0) throw std::invalid_argument("M must greater than 0!"); if (state.range(1) <= 0) throw std::invalid_argument("N must greater than 0!"); diff --git a/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.h b/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.h index 439ebb027e..e516fa8a0b 100644 --- a/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.h +++ b/onnxruntime/test/mlas/unittest/test_conv2d_nchwc.h @@ -31,9 +31,9 @@ class MlasNchwcConv2DTest : public MlasConv2DTest { const float* Filter, const float* Bias, float* Output) override { - int64_t InputShape[] = {int64_t(BatchCount), int64_t(GroupCount * InputChannels), int64_t(InputHeight), int64_t(InputWidth)}; - int64_t FilterShape[] = {int64_t(GroupCount * FilterCount), int64_t(InputChannels), int64_t(KernelHeight), int64_t(KernelWidth)}; - int64_t OutputShape[] = {int64_t(BatchCount), int64_t(GroupCount * FilterCount), int64_t(OutputHeight), int64_t(OutputWidth)}; + int64_t InputShape[] = {int64_t(BatchCount), int64_t(GroupCount) * int64_t(InputChannels), int64_t(InputHeight), int64_t(InputWidth)}; + int64_t FilterShape[] = {int64_t(GroupCount) * int64_t(FilterCount), int64_t(InputChannels), int64_t(KernelHeight), int64_t(KernelWidth)}; + int64_t OutputShape[] = {int64_t(BatchCount), int64_t(GroupCount) * int64_t(FilterCount), int64_t(OutputHeight), int64_t(OutputWidth)}; int64_t KernelShape[] = {int64_t(KernelHeight), int64_t(KernelWidth)}; int64_t DilationShape[] = {int64_t(DilationHeight), int64_t(DilationWidth)}; @@ -149,7 +149,7 @@ class MlasNchwcConv2DTest : public MlasConv2DTest { public: static const char* GetTestSuiteName(void) { - static const std::string suite_name(Threaded? "Conv2dNchwc_Threaded" : "Conv2dNchwc_SingleThread"); + static const std::string suite_name(Threaded ? "Conv2dNchwc_Threaded" : "Conv2dNchwc_SingleThread"); return suite_name.c_str(); } diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index 0b1b16d301..f4fb214066 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -265,22 +265,22 @@ void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo& model #if !defined(ORT_MINIMAL_BUILD) std::unique_ptr TestModelInfo::LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url) { - return std::unique_ptr(new OnnxModelInfo(model_url)); + return std::make_unique(model_url); } #endif std::unique_ptr TestModelInfo::LoadOrtModel(_In_ const PATH_CHAR_TYPE* model_url) { - return std::unique_ptr(new OnnxModelInfo(model_url, true)); + return std::make_unique(model_url, true); } /** - * test_case_dir must have contents of: - * model.onnx - * ???/input_??.pb - * ???/output_??.pb - * ???/input_??.pb - * ???/output_??.pb - */ + * test_case_dir must have contents of: + * model.onnx + * ???/input_??.pb + * ???/output_??.pb + * ???/input_??.pb + * ???/output_??.pb + */ class OnnxTestCase : public ITestCase { private: std::string test_case_name_; @@ -352,9 +352,9 @@ std::unique_ptr CreateOnnxTestCase(const std::string& test_case_name, std::unique_ptr model, double default_per_sample_tolerance, double default_relative_per_sample_tolerance) { - return std::unique_ptr(new OnnxTestCase(test_case_name, std::move(model), - default_per_sample_tolerance, - default_relative_per_sample_tolerance)); + return std::make_unique(test_case_name, std::move(model), + default_per_sample_tolerance, + default_relative_per_sample_tolerance); } void OnnxTestCase::GetPerSampleTolerance(double* value) const { diff --git a/onnxruntime/test/onnx/dataitem_request.cc b/onnxruntime/test/onnx/dataitem_request.cc index 41e81baa13..abe8111a29 100644 --- a/onnxruntime/test/onnx/dataitem_request.cc +++ b/onnxruntime/test/onnx/dataitem_request.cc @@ -39,7 +39,7 @@ void DataTaskRequestContext::Request(const Callback& cb, concurrency::ThreadPool const ITestCase& c, Ort::Session& session, OrtAllocator* allocator, size_t task_id) { assert(cb); - std::unique_ptr self(new DataTaskRequestContext(cb, c, session, allocator, task_id)); + std::unique_ptr self = std::make_unique(cb, c, session, allocator, task_id); CallableFactory f(self.get()); auto runnable = f.GetCallable<&DataTaskRequestContext::RunAsync>(); onnxruntime::concurrency::ThreadPool::Schedule(tp, [runnable]() { runnable.Invoke(); }); diff --git a/onnxruntime/test/onnx/dataitem_request.h b/onnxruntime/test/onnx/dataitem_request.h index 417b800e5c..96a0e64cd8 100644 --- a/onnxruntime/test/onnx/dataitem_request.h +++ b/onnxruntime/test/onnx/dataitem_request.h @@ -8,7 +8,6 @@ #include "core/common/common.h" #include "core/platform/env_time.h" - class ITestCase; struct OrtAllocator; @@ -40,7 +39,7 @@ class DataTaskRequestContext { /// this task id /// execution result and elapsed time static std::pair Run(const ITestCase& c, ::Ort::Session& session, - OrtAllocator* allocator, size_t task_id); + OrtAllocator* allocator, size_t task_id); /// /// Schedules a data task to run on a threadpool. The function @@ -68,7 +67,6 @@ class DataTaskRequestContext { return spent_time_; } - private: DataTaskRequestContext(const Callback& cb, const ITestCase& test_case, ::Ort::Session& session, OrtAllocator* allocator, size_t task_id) @@ -80,6 +78,7 @@ class DataTaskRequestContext { SetTimeSpecToZero(&spent_time_); } + private: void RunAsync(); std::pair RunImpl(); diff --git a/onnxruntime/test/onnx/microbenchmark/activation.cc b/onnxruntime/test/onnx/microbenchmark/activation.cc index d0b417b84e..d763e89219 100644 --- a/onnxruntime/test/onnx/microbenchmark/activation.cc +++ b/onnxruntime/test/onnx/microbenchmark/activation.cc @@ -69,7 +69,7 @@ struct KernelAndDef { .SetDomain(domain) .TypeConstraint("T", DataTypeImpl::GetTensorType()) .Build(); - OpKernelInfo info(main_node, *out.def, *out.a, {}, {}, {}, {}); + OpKernelInfo info(main_node, *out.def, *out.a, {}, {}, {}); out.kernel = std::make_unique(info); return out; } diff --git a/onnxruntime/test/onnx/tensorprotoutils.cc b/onnxruntime/test/onnx/tensorprotoutils.cc index 4cff3880d5..902ee79d1b 100644 --- a/onnxruntime/test/onnx/tensorprotoutils.cc +++ b/onnxruntime/test/onnx/tensorprotoutils.cc @@ -69,7 +69,7 @@ static void UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length ORT_CXX_API_THROW(MakeString("UnpackTensor: the pre-allocated size does not match the raw data size, expected ", expected_size_in_bytes, ", got ", raw_data_length), OrtErrorCode::ORT_FAIL); - if constexpr(endian::native != endian::little) { + if constexpr (endian::native != endian::little) { ORT_CXX_API_THROW("UnpackTensorWithRawData only handles little-endian native byte order for now.", OrtErrorCode::ORT_NOT_IMPLEMENTED); } @@ -298,7 +298,9 @@ OrtStatus* OrtInitializeBufferForTensor(void* input, size_t input_len, } ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type); - +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(disable : 26409) +#endif static void UnInitTensor(void* param) noexcept { UnInitializeParam* p = reinterpret_cast(param); OrtUninitializeBuffer(p->preallocated, p->preallocated_size, p->ele_type); diff --git a/onnxruntime/test/onnx/testcase_request.cc b/onnxruntime/test/onnx/testcase_request.cc index ccb64e7131..bc6429905b 100644 --- a/onnxruntime/test/onnx/testcase_request.cc +++ b/onnxruntime/test/onnx/testcase_request.cc @@ -85,7 +85,7 @@ void TestCaseRequestContext::Request(const Callback& cb, PThreadPool tpool, concurrent_runs = 1; } - std::unique_ptr self(new TestCaseRequestContext(cb, tpool, c, env, session_opts, test_case_id)); + std::unique_ptr self = std::make_unique(cb, tpool, c, env, session_opts, test_case_id); CallableFactory f(self.get()); auto runnable = f.GetCallable<&TestCaseRequestContext::RunAsync>(); onnxruntime::concurrency::ThreadPool::Schedule(tpool, [runnable, concurrent_runs]() { runnable.Invoke(concurrent_runs); }); diff --git a/onnxruntime/test/onnx/testcase_request.h b/onnxruntime/test/onnx/testcase_request.h index 75d02db3d6..4d53a0867d 100644 --- a/onnxruntime/test/onnx/testcase_request.h +++ b/onnxruntime/test/onnx/testcase_request.h @@ -78,11 +78,10 @@ class TestCaseRequestContext { /// The impact is mitigated by the fact that __Ctor is still private. ~TestCaseRequestContext() = default; - private: - TestCaseRequestContext(const Callback& cb, PThreadPool tp, const ITestCase& test_case, Ort::Env& env, const Ort::SessionOptions& session_opts, size_t test_case_id); + private: bool SetupSession(); std::shared_ptr GetResult() const { @@ -119,6 +118,5 @@ class TestCaseRequestContext { mutable bool finished_ = false; }; -} +} //namespace test } // namespace onnxruntime - diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index af7f5739f7..abc080a31a 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -1339,7 +1339,7 @@ TEST_F(GraphTransformationTests, GemmTransposeFusionOutput) { ASSERT_TRUE(new_input_defs[1]->Name() == "A"); } -// ((A')'B')' = BA' +// ((A')'B')' = BA' TEST_F(GraphTransformationTests, GemmTransposeFusionInputOutput) { auto model_uri = MODEL_FOLDER "fusion/gemm_transpose_inputs_output_transposed.onnx"; std::shared_ptr p_model; @@ -1368,7 +1368,7 @@ TEST_F(GraphTransformationTests, GemmTransposeFusionInputOutput) { ASSERT_TRUE(new_input_defs[1]->Name() == "A"); } -// (A'(B'))' = BA +// (A'(B'))' = BA TEST_F(GraphTransformationTests, GemmTransposeFusionInputOutput2) { auto model_uri = MODEL_FOLDER "fusion/gemm_transpose_inputs_output_transposed_2.onnx"; std::shared_ptr p_model; @@ -1497,8 +1497,8 @@ TEST_F(GraphTransformationTests, GemmSumFusionInternalNodes) { ASSERT_EQ(op_to_count["Identity"], 4); ASSERT_EQ(graph.NumberOfNodes(), 5); - for(Node &node : graph.Nodes()) { - if(node.OpType() == "Gemm") { + for (Node& node : graph.Nodes()) { + if (node.OpType() == "Gemm") { ASSERT_FALSE(static_cast(node.GetAttributes().at("transA").i())); ASSERT_FALSE(static_cast(node.GetAttributes().at("transB").i())); ASSERT_EQ(node.GetAttributes().at("alpha").f(), 1.0); @@ -4339,8 +4339,8 @@ TEST_F(GraphTransformationTests, ComputationReductionTransformer_GatherND_E2E) { values_unsqueezed_masked_lm_positions); ASSERT_TRUE(expected_ort_values.size() == actual_ort_values.size()); - const double per_sample_tolerance = 1e-4; - const double relative_per_sample_tolerance = 1e-4; + constexpr double per_sample_tolerance = 1e-4; + constexpr double relative_per_sample_tolerance = 1e-4; for (size_t i = 0; i < expected_ort_values.size(); i++) { auto ret = CompareOrtValue(actual_ort_values[i], expected_ort_values[i], per_sample_tolerance, relative_per_sample_tolerance, false); @@ -4411,7 +4411,7 @@ TEST_F(GraphTransformationTests, MatMulScaleFusionFusableModels) { EXPECT_EQ(transformed_op_counts["com.microsoft.FusedMatMul"], 1); // check combined scale, individual scales should all have the same value - const float scale_value = 3.0f; + constexpr float scale_value = 3.0f; const int num_scales = original_op_counts["Mul"] + original_op_counts["Div"] + original_op_counts["com.microsoft.FusedMatMul"]; @@ -4694,7 +4694,7 @@ TEST_F(GraphTransformationTests, PropagateCastOpsTests) { // Create a temporary directory, which will be deleted automatically, to save/load the transformed models. TemporaryDirectory temp_dir{ORT_TSTR("propagate_casts_test_output_dir")}; for (PropagateCastOpsTestSpecs test_case : test_cases) { - for (auto scenario : test_case.casts_count_map) { + for (const auto& scenario : test_case.casts_count_map) { Strategy strategy = scenario.first.first; int level = scenario.first.second; int expected_casts_count = scenario.second; diff --git a/onnxruntime/test/perftest/TFModelInfo.cc b/onnxruntime/test/perftest/TFModelInfo.cc index 8b161faf90..82f5359545 100644 --- a/onnxruntime/test/perftest/TFModelInfo.cc +++ b/onnxruntime/test/perftest/TFModelInfo.cc @@ -8,8 +8,7 @@ #include std::unique_ptr TFModelInfo::Create(_In_ const PATH_CHAR_TYPE* model_url) { - auto* model_info = new TFModelInfo{}; - std::unique_ptr ret(model_info); + std::unique_ptr model_info = std::make_unique(); model_info->model_url_ = model_url; std::basic_string meta_file_path = model_url; @@ -50,7 +49,7 @@ std::unique_ptr TFModelInfo::Create(_In_ const PATH_CHAR_TYPE* mo } } - return ret; + return model_info; } int TFModelInfo::GetInputCount() const { return static_cast(input_names_.size()); } diff --git a/onnxruntime/test/perftest/TFModelInfo.h b/onnxruntime/test/perftest/TFModelInfo.h index bf7f871a5a..2ca60010e3 100644 --- a/onnxruntime/test/perftest/TFModelInfo.h +++ b/onnxruntime/test/perftest/TFModelInfo.h @@ -22,9 +22,9 @@ class TFModelInfo : public TestModelInfo { ~TFModelInfo() override = default; static std::unique_ptr Create(_In_ const PATH_CHAR_TYPE* model_url); + TFModelInfo() = default; private: - TFModelInfo() = default; std::basic_string model_url_; std::vector input_names_; std::vector output_names_; diff --git a/onnxruntime/test/perftest/performance_runner.cc b/onnxruntime/test/perftest/performance_runner.cc index 8536085611..f789ed5745 100644 --- a/onnxruntime/test/perftest/performance_runner.cc +++ b/onnxruntime/test/perftest/performance_runner.cc @@ -35,7 +35,7 @@ static std::once_flag default_pool_init; Eigen::ThreadPoolInterface* GetDefaultThreadPool(const onnxruntime::Env& env) { std::call_once(default_pool_init, [&env] { int core_num = env.GetNumCpuCores(); - default_pool.reset(new DefaultThreadPoolType(core_num)); + default_pool = std::make_unique(core_num); }); return default_pool.get(); } @@ -258,8 +258,7 @@ static std::unique_ptr CreateSession(Ort::Env& env, std::random_dev const PerformanceTestConfig& performance_test_config_, const TestModelInfo& test_model_info) { if (CompareCString(performance_test_config_.backend.c_str(), ORT_TSTR("ort")) == 0) { - return std::unique_ptr( - new OnnxRuntimeTestSession(env, rd, performance_test_config_, test_model_info)); + return std::make_unique(env, rd, performance_test_config_, test_model_info); } #ifdef HAVE_TENSORFLOW if (CompareCString(performance_test_config_.backend.c_str(), ORT_TSTR("tf")) == 0) { diff --git a/onnxruntime/test/providers/cpu/activation/activation_op_test.cc b/onnxruntime/test/providers/cpu/activation/activation_op_test.cc index fb6ae200ab..a530bbb8c8 100644 --- a/onnxruntime/test/providers/cpu/activation/activation_op_test.cc +++ b/onnxruntime/test/providers/cpu/activation/activation_op_test.cc @@ -105,17 +105,19 @@ TEST_F(ActivationOpTest, Relu) { TestActivationOp("Relu", input_values, [](float x) { return std::max(x, 0.0f); }); - TestActivationOp("Relu", - input_values_double, - [](double x) { return std::max(x, 0.0); }, - {}, - /*is_tensorrt_supported=*/ false); - TestActivationOp("Relu", - input_values_int8, - [](int8_t x) { return std::max(x, static_cast(0)); }, - {}, - /*is_tensorrt_supported=*/ false, - /*opset_version= */ 14); + TestActivationOp( + "Relu", + input_values_double, + [](double x) { return std::max(x, 0.0); }, + {}, + /*is_tensorrt_supported=*/false); + TestActivationOp( + "Relu", + input_values_int8, + [](int8_t x) { return std::max(x, static_cast(0)); }, + {}, + /*is_tensorrt_supported=*/false, + /*opset_version= */ 14); } TEST_F(ActivationOpTest, Elu) { @@ -217,9 +219,9 @@ TEST_F(ActivationOpTest, PRelu_MultiChannel) { std::vector inputs{1.0f, 2.0f, -4.0f, 3.0f, 0.0f, 5.0f, -9.0f, 8.0f}; std::vector slopes{1.0f, -2.0f}; std::vector outputs; - const int64_t num_images = 2; - const int64_t num_channels = 2; - const int64_t num_pixels = 2; + constexpr int64_t num_images = 2; + constexpr int64_t num_channels = 2; + constexpr int64_t num_pixels = 2; for (unsigned i = 0; i < inputs.size(); i++) outputs.push_back(formula(inputs[i], slopes[i / num_pixels % num_channels])); diff --git a/onnxruntime/test/providers/cpu/generator/random_test.cc b/onnxruntime/test/providers/cpu/generator/random_test.cc index e1be1e28f9..a59e19f2f1 100644 --- a/onnxruntime/test/providers/cpu/generator/random_test.cc +++ b/onnxruntime/test/providers/cpu/generator/random_test.cc @@ -15,9 +15,9 @@ TEST(Random, RandomNormal2DDouble) { std::vector dims{20, 50}; - float scale = 10.f; - float mean = 0.f; - float seed = 123.f; + constexpr float scale = 10.f; + constexpr float mean = 0.f; + constexpr float seed = 123.f; test.AddAttribute("scale", scale); test.AddAttribute("mean", mean); @@ -44,9 +44,9 @@ void RunRandomNormalLike3DFloat(bool infer_dtype = false) { std::vector dims{2, 2, 3}; - float scale = 10.f; - float mean = 0.f; - float seed = 123.f; + constexpr float scale = 10.f; + constexpr float mean = 0.f; + constexpr float seed = 123.f; test.AddAttribute("scale", scale); test.AddAttribute("mean", mean); @@ -79,7 +79,7 @@ TEST(Random, RandomNormalLike3DDouble) { } TEST(Random, RandomNormalLikeInferDType) { - const bool infer_dtype = true; + constexpr bool infer_dtype = true; RunRandomNormalLike3DFloat(infer_dtype); } @@ -88,9 +88,9 @@ TEST(Random, RandomUniform1DFloat) { std::vector dims{10}; - float low = 0.f; - float high = 100.f; - float seed = 123.f; + constexpr float low = 0.f; + constexpr float high = 100.f; + constexpr float seed = 123.f; test.AddAttribute("low", low); test.AddAttribute("high", high); @@ -116,9 +116,9 @@ void RunRandomUniformLikeTest(bool infer_dtype = false) { std::vector dims{2, 6}; - float low = 0.f; - float high = 100.f; - float seed = 123.f; + constexpr float low = 0.f; + constexpr float high = 100.f; + constexpr float seed = 123.f; test.AddAttribute("low", low); test.AddAttribute("high", high); @@ -149,12 +149,12 @@ TEST(Random, RandomUniformLike2DDouble) { } TEST(Random, RandomUniformLikeInferDType) { - const bool infer_dtype = true; + constexpr bool infer_dtype = true; RunRandomUniformLikeTest(infer_dtype); } TEST(Random, InvalidDType) { - float seed = 123.f; + constexpr float seed = 123.f; std::vector dims{1, 4}; std::vector input{0, 0, 0, 0}; @@ -179,8 +179,8 @@ TEST(Random, InvalidDType) { { OpTester test("RandomUniform"); - float low = 0.f; - float high = 100.f; + constexpr float low = 0.f; + constexpr float high = 100.f; test.AddAttribute("low", low); test.AddAttribute("high", high); @@ -195,8 +195,8 @@ TEST(Random, InvalidDType) { { OpTester test("RandomNormalLike"); - float scale = 10.f; - float mean = 0.f; + constexpr float scale = 10.f; + constexpr float mean = 0.f; test.AddAttribute("scale", scale); test.AddAttribute("mean", mean); @@ -211,8 +211,8 @@ TEST(Random, InvalidDType) { { OpTester test("RandomUniformLike"); - float low = 0.f; - float high = 100.f; + constexpr float low = 0.f; + constexpr float high = 100.f; test.AddAttribute("low", low); test.AddAttribute("high", high); @@ -234,10 +234,10 @@ for verification. TEST(Random, MultinomialGoodCase) { OpTester test("Multinomial"); - const int64_t num_samples = 10; - const float seed = 1618.f; - const int batch_size = 2; - const int num_classes = 3; + constexpr int64_t num_samples = 10; + constexpr float seed = 1618.f; + constexpr int batch_size = 2; + constexpr int num_classes = 3; const std::vector input_dims{batch_size, num_classes}; std::vector input(TensorShape(input_dims).Size()); @@ -264,9 +264,9 @@ TEST(Random, MultinomialGoodCase) { TEST(Random, MultinomialDefaultDType) { auto run_test = [](int num_run_calls, const std::vector& expected_output) { OpTester test("Multinomial"); - const int64_t num_samples = 10; - const int batch_size = 2; - const float seed = 1618.f; + constexpr int64_t num_samples = 10; + constexpr int batch_size = 2; + constexpr float seed = 1618.f; const std::vector input_dims{2, 3}; std::vector input(TensorShape(input_dims).Size()); @@ -307,10 +307,10 @@ TEST(Random, MultinomialDefaultDType) { TEST(Random, MultinomialInvalidDtype) { OpTester test("Multinomial"); - const int64_t num_samples = 10; - const int batch_size = 2; - const int num_classes = 3; - const float seed = 1618.f; + constexpr int64_t num_samples = 10; + constexpr int batch_size = 2; + constexpr int num_classes = 3; + constexpr float seed = 1618.f; const std::vector input_dims{batch_size, num_classes}; std::vector input(TensorShape(input_dims).Size()); diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index 68c764813f..841e075855 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -1572,7 +1572,6 @@ TEST(MathOpTest, Xor) { test.Run(); } - TEST(MathOpTest, Xor_Issue8880) { OpTester test("Xor"); test.AddInput("A", {1}, {true}); @@ -2247,7 +2246,7 @@ TEST(MathOpTest, ErfMoreData) { test.Run(); } -const int ModOp_ver = 10; +constexpr int ModOp_ver = 10; TEST(ModOpTest, Fmod_float_mixed_sign) { OpTester test("Mod", ModOp_ver); diff --git a/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc b/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc index aa5e103874..0cb078b456 100644 --- a/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc +++ b/onnxruntime/test/providers/cpu/math/quantize_linear_matmul_test.cc @@ -415,8 +415,8 @@ TEST(QuantizeLinearMatmulOpTest, PerColumn_ND_S8S8) { } /** - * @brief Extend QLinearMatMul for verifying prepacking behavior -*/ + * @brief Extend QLinearMatMul for verifying prepacking behavior + */ struct PrePackTestOp { // TODO!! use template and macro to extract a common utility out of this // for grey box kernel testing by extending kernel classes. @@ -487,7 +487,7 @@ TEST(QuantizeLinearMatmulOpTest, QLinearMatMulPrePack) { std::vector schemas{PrePackTestOp::OpSchema()}; Status status; ASSERT_TRUE((status = registry->RegisterOpSet(schemas, PrePackTestOp::OpDomain, 10, 11)).IsOK()) << status; - KernelCreateFn kernel_create_fn = [](const OpKernelInfo& info) { return new typename PrePackTestOp::QLinearMatMulPrePackT(info); }; + KernelCreateFn kernel_create_fn = [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) { out = std::make_unique(info); return Status::OK(); }; auto kernel_def = PrePackTestOp::KernelDef(); ASSERT_TRUE((status = registry->RegisterCustomKernel(kernel_def, kernel_create_fn)).IsOK()) << status; diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc index 55f516bed7..12e98e41f5 100644 --- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc +++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc @@ -13,10 +13,10 @@ namespace onnxruntime { namespace test { -const float FLOAT_INF = std::numeric_limits::infinity(); -const float FLOAT_NINF = -std::numeric_limits::infinity(); -const double DOUBLE_INF = std::numeric_limits::infinity(); -const double DOUBLE_NINF = -std::numeric_limits::infinity(); +constexpr float FLOAT_INF = std::numeric_limits::infinity(); +constexpr float FLOAT_NINF = -std::numeric_limits::infinity(); +constexpr double DOUBLE_INF = std::numeric_limits::infinity(); +constexpr double DOUBLE_NINF = -std::numeric_limits::infinity(); // Disable TensorRT on some of the tests because the limit in its parser: axis >=0 && axis < nbDims template @@ -778,7 +778,7 @@ TEST(ReductionOpTest, ReduceMax_int32) { #if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_MYRIAD) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily #else - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 #endif } @@ -799,7 +799,7 @@ TEST(ReductionOpTest, ReduceMax_int64) { #if defined(OPENVINO_CONFIG_GPU_FP32) || defined(OPENVINO_CONFIG_GPU_FP16) || defined(OPENVINO_CONFIG_MYRIAD) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily #else - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 #endif } @@ -820,7 +820,7 @@ TEST(ReductionOpTest, ReduceMax_int8) { #if defined(OPENVINO_CONFIG_MYRIAD) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily #else - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 #endif } @@ -841,7 +841,7 @@ TEST(ReductionOpTest, ReduceMax_uint8) { #if defined(OPENVINO_CONFIG_MYRIAD) test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); // OpenVINO: Disabled temporarily #else - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: axis must be 0 #endif } @@ -1049,7 +1049,7 @@ TEST(ReductionOpTest, ReduceMean) { 9.0f, 10.0f, 11.0f, 12.0f}); test.AddOutput("reduced", {1, 2, 1}, {5.5f, 7.5f}); - + test.Run(); } diff --git a/onnxruntime/test/providers/cpu/tensor/copy_test.cc b/onnxruntime/test/providers/cpu/tensor/copy_test.cc index 91c496a8ed..f8a72a5c8f 100644 --- a/onnxruntime/test/providers/cpu/tensor/copy_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/copy_test.cc @@ -51,17 +51,16 @@ TEST_F(CopyTest, Contiguous3D) { TEST_F(CopyTest, Transpose4D) { // Test performing a transpose using a strided copy - int64_t numel = 2 * 3 * 4 * 5; - double* src = new double[numel]; + constexpr int64_t numel = 2 * 3 * 4 * 5; + std::unique_ptr src = std::make_unique(numel); for (int i = 0; i < numel; i++) { src[i] = static_cast(i); } - - double* dst = new double[numel]; + std::unique_ptr dst = std::make_unique(numel); std::vector dst_strides = {60, 5, 15, 1}; std::vector src_strides = {60, 20, 5, 1}; - StridedCopy(tp.get(), dst, dst_strides, {2, 3, 4, 5}, src, src_strides); + StridedCopy(tp.get(), dst.get(), dst_strides, {2, 3, 4, 5}, src.get(), src_strides); // stride to access the dst tensor as if it were contiguous std::vector contig_dst_strides = {60, 15, 5, 1}; @@ -78,18 +77,15 @@ TEST_F(CopyTest, Transpose4D) { } } } - delete[] src; - delete[] dst; } TEST_F(CopyTest, Concat2D) { // test performing a concat using a strided copy - double* src = new double[6 * 2]; + std::unique_ptr src = std::make_unique(6 * 2); for (int i = 0; i < 6 * 2; i++) { src[i] = static_cast(i); } - - double* dst = new double[10 * 5]; + std::unique_ptr dst = std::make_unique(10 * 5); for (int i = 0; i < 10 * 5; i++) { dst[i] = 0; } @@ -97,7 +93,7 @@ TEST_F(CopyTest, Concat2D) { std::vector dst_strides = {5, 1}; std::vector src_strides = {2, 1}; std::ptrdiff_t offset = 3; - StridedCopy(tp.get(), dst + offset, dst_strides, {6, 2}, src, src_strides); + StridedCopy(tp.get(), dst.get() + offset, dst_strides, {6, 2}, src.get(), src_strides); for (int i0 = 0; i0 < 10; i0++) { for (int i1 = 0; i1 < 5; i1++) { @@ -110,8 +106,6 @@ TEST_F(CopyTest, Concat2D) { } } } - delete[] src; - delete[] dst; } TEST_F(CopyTest, CoalesceTensorsTest) { diff --git a/onnxruntime/test/providers/cpu/tensor/split_op_test.cc b/onnxruntime/test/providers/cpu/tensor/split_op_test.cc index b2dc3a44b3..c3b5b25ee7 100644 --- a/onnxruntime/test/providers/cpu/tensor/split_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/split_op_test.cc @@ -50,7 +50,7 @@ void RunTest(int64_t axis, const std::vector split_sizes, const ShapeAn } TEST(SplitOperatorTest, Axis0EqualSplitFloat) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -73,7 +73,7 @@ TEST(SplitOperatorTest, Axis0EqualSplitFloat) { template ::value, T>::type> static void SplitTestInt() { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector> outputs; // input shape and data @@ -107,7 +107,7 @@ TEST(SplitOperatorTest, Axis0EqualSplitInt64) { } TEST(SplitOperatorTest, Axis0EqualSplitString) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -129,7 +129,7 @@ TEST(SplitOperatorTest, Axis0EqualSplitString) { } TEST(SplitOperatorTest, Axis0UnequalSplitFloat) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -152,7 +152,7 @@ TEST(SplitOperatorTest, Axis0UnequalSplitFloat) { } TEST(SplitOperatorTest, Axis0UnequalSplitString) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -175,7 +175,7 @@ TEST(SplitOperatorTest, Axis0UnequalSplitString) { } TEST(SplitOperatorTest, Axis1EqualSplitFloat) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector outputs; // input shape and data @@ -195,7 +195,7 @@ TEST(SplitOperatorTest, Axis1EqualSplitFloat) { } TEST(SplitOperatorTest, Axis1EqualSplitString) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector outputs; // input shape and data @@ -215,7 +215,7 @@ TEST(SplitOperatorTest, Axis1EqualSplitString) { } TEST(SplitOperatorTest, Axis1UnequalSplitFloat) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector outputs; // input shape and data @@ -237,7 +237,7 @@ TEST(SplitOperatorTest, Axis1UnequalSplitFloat) { } TEST(SplitOperatorTest, Axis1UnequalSplitString) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector outputs; // input shape and data @@ -271,7 +271,7 @@ ShapeAndData CreateInput(std::vector shape) { } TEST(SplitOperatorTest, Axis2EqualSplit) { - const int64_t axis = 2; + constexpr int64_t axis = 2; std::vector outputs; ShapeAndFloatData input = CreateInput({2, 2, 6}); @@ -301,7 +301,7 @@ TEST(SplitOperatorTest, Axis2EqualSplit) { } TEST(SplitOperatorTest, Axis2UnequalSplit) { - const int64_t axis = 2; + constexpr int64_t axis = 2; std::vector outputs; ShapeAndFloatData input = CreateInput({2, 2, 6}); @@ -333,7 +333,7 @@ TEST(SplitOperatorTest, Axis2UnequalSplit) { } TEST(SplitOperatorTest, ZeroSizeInput) { - const int64_t axis = -1; + constexpr int64_t axis = -1; std::vector outputs{{{0, 1}, {}}, {{0, 1}, {}}}; ShapeAndFloatData input = CreateInput({0, 2}); @@ -343,7 +343,7 @@ TEST(SplitOperatorTest, ZeroSizeInput) { // test a split of a dimension that has leading and trailing dimensions TEST(SplitOperatorTest, Axis1SplitMiddleDimensionEqually) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector outputs; ShapeAndFloatData input = CreateInput({2, 4, 4}); @@ -367,7 +367,7 @@ TEST(SplitOperatorTest, Axis1SplitMiddleDimensionEqually) { // test a split of a dimension that has leading and trailing dimensions TEST(SplitOperatorTest, Axis1SplitMiddleDimensionUnequally) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector outputs; ShapeAndFloatData input = CreateInput({2, 4, 4}); @@ -392,7 +392,7 @@ TEST(SplitOperatorTest, Axis1SplitMiddleDimensionUnequally) { } TEST(SplitOperatorTest, NegativeAxis) { - const int64_t axis = -1; // split last axis equally + constexpr int64_t axis = -1; // split last axis equally std::vector outputs; // input shape and data @@ -412,7 +412,7 @@ TEST(SplitOperatorTest, NegativeAxis) { } TEST(SplitOperatorTest, InvalidAxis) { - const int64_t axis = 2; + constexpr int64_t axis = 2; std::vector outputs; // input shape and data @@ -429,7 +429,7 @@ TEST(SplitOperatorTest, InvalidAxis) { // sum of values in splits is too small TEST(SplitOperatorTest, SplitAttributeSumTooSmall) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -448,7 +448,7 @@ TEST(SplitOperatorTest, SplitAttributeSumTooSmall) { } TEST(SplitOperatorTest, InvalidValueInSplitAttribute) { - const int64_t axis = -1; + constexpr int64_t axis = -1; std::vector outputs; // input shape and data @@ -467,7 +467,7 @@ TEST(SplitOperatorTest, InvalidValueInSplitAttribute) { // split as input TEST(SplitOperatorTest, Axis0UnequalSplitInputFloat) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -491,7 +491,7 @@ TEST(SplitOperatorTest, Axis0UnequalSplitInputFloat) { // split as input TEST(SplitOperatorTest, Axis0UnequalSplitInputFloat_not_initializer) { - const int64_t axis = 0; + constexpr int64_t axis = 0; std::vector outputs; // input shape and data @@ -576,7 +576,7 @@ SplitMiddleDimension() // test split for uint8_t data that has leading and trailing dimensions TEST(SplitOperatorTest, Uint8Axis1SplitMiddleDimensionUnequally) { - const int64_t axis = 1; + constexpr int64_t axis = 1; std::vector> outputs; ShapeAndData input = CreateInput({2, 4, 4}); @@ -602,7 +602,7 @@ TEST(SplitOperatorTest, Uint8Axis1SplitMiddleDimensionUnequally) { // test split for uint8_t data on the last axis equally TEST(SplitOperatorTest, Uint8NegativeAxis) { - const int64_t axis = -1; + constexpr int64_t axis = -1; std::vector> outputs; ShapeAndData input = {{2, 4}, @@ -621,7 +621,7 @@ TEST(SplitOperatorTest, Uint8NegativeAxis) { } TEST(SplitOperatorTest, MissingOptionalInputAdded) { - const int64_t axis = 1; // split last axis equally + constexpr int64_t axis = 1; // split last axis equally std::vector outputs; // input shape and data diff --git a/onnxruntime/test/providers/cpu/tensor/tensor_op_test.cc b/onnxruntime/test/providers/cpu/tensor/tensor_op_test.cc index 1a7fada780..f2eeb06640 100644 --- a/onnxruntime/test/providers/cpu/tensor/tensor_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/tensor_op_test.cc @@ -146,7 +146,7 @@ TEST(TensorOpTest, ShapeTest3D) { } void MeanVarianceNormalizationFunctionDefaultPerChannel() { - const int64_t N = 2, C = 2, H = 2, W = 3; + constexpr int64_t N = 2, C = 2, H = 2, W = 3; std::vector N1C1 = {3.0f, -3.0f, -1.0f, 1.0f, 2.0f, -1.0f}; @@ -209,7 +209,7 @@ void MeanVarianceNormalizationFunctionDefaultPerChannel() { } void MeanVarianceNormalizationFunctionAcrossChannels(std::vector axes) { - const int64_t N = 2, C = 2, H = 2, W = 3; + constexpr int64_t N = 2, C = 2, H = 2, W = 3; std::vector X = {3.0f, -3.0f, -1.0f, 1.0f, 2.0f, -1.0f, diff --git a/onnxruntime/test/providers/cpu/tensor/unique_op_test.cc b/onnxruntime/test/providers/cpu/tensor/unique_op_test.cc index a8e43fdc53..c539e59b5d 100644 --- a/onnxruntime/test/providers/cpu/tensor/unique_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/unique_op_test.cc @@ -142,7 +142,7 @@ TEST(Unique, Axis0_Unsorted) { 0.f, 1.f, 1.f, 0.f}; - const int64_t axis = 0; + constexpr int64_t axis = 0; bool sorted = false; const std::vector Y_dims{3, 2}; const std::vector Y{0.f, 1.f, @@ -167,7 +167,7 @@ TEST(Unique, Axis0_Sorted) { 0.f, 1.f, 1.f, 0.f}; - const int64_t axis = 0; + constexpr int64_t axis = 0; bool sorted = true; const std::vector Y_dims{3, 2}; const std::vector Y{0.f, 1.f, @@ -192,7 +192,7 @@ TEST(Unique, Axis0_Unsorted_String) { "0.f", "1.f", "1.f", "0.f"}; - const int64_t axis = 0; + constexpr int64_t axis = 0; bool sorted = false; const std::vector Y_dims{3, 2}; const std::vector Y{"0.f", "1.f", @@ -222,7 +222,7 @@ TEST(Unique, Axis1_Unsorted) { 2, 1, 0, 1}; - const int64_t axis = 1; + constexpr int64_t axis = 1; bool sorted = false; const std::vector Y_dims{2, 3, 2}; const std::vector Y{1, 1, @@ -256,7 +256,7 @@ TEST(Unique, Axis1_Sorted) { 2, 1, 0, 1}; - const int64_t axis = 1; + constexpr int64_t axis = 1; bool sorted = true; const std::vector Y_dims{2, 3, 2}; const std::vector Y{0, 1, @@ -286,7 +286,7 @@ TEST(Unique, Axis2_Unsorted) { 1, 1, 0, 1, 2, 1, 0, 1}; - const int64_t axis = 2; + constexpr int64_t axis = 2; bool sorted = false; const std::vector Y_dims{2, 2, 3}; const std::vector Y{1, 1, 0, @@ -314,7 +314,7 @@ TEST(Unique, Axis2_Sorted) { 1, 1, 0, 1, 2, 1, 0, 1}; - const int64_t axis = 2; + constexpr int64_t axis = 2; bool sorted = true; const std::vector Y_dims{2, 2, 3}; const std::vector Y{0, 1, 1, @@ -335,7 +335,7 @@ TEST(Unique, Axis2_Sorted) { } TEST(Unique, InvalidAxis) { - const int64_t axis = 12; + constexpr int64_t axis = 12; const std::vector X_dims{2, 3}; const std::vector X{1.f, 4.f, 1.f, 2.f, 2.f, 0.f}; const std::vector Y_dims{}; diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc index d59ec0f4bd..0aacbf14ce 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_partitioning_tests.cc @@ -49,8 +49,8 @@ TEST(InternalTestingEP, TestSortResultsInSinglePartition) { ASSERT_STATUS_OK(session->Initialize()); - const auto& func_mgr = session->GetSessionState().GetFuncMgr(); - NodeComputeInfo* compute_func = nullptr; + auto& func_mgr = const_cast(session->GetSessionState()).GetMutableFuncMgr(); + const NodeComputeInfo* compute_func = nullptr; int num_partitions{0}, num_other_nodes{0}; @@ -94,8 +94,8 @@ TEST(InternalTestingEP, TestDependenciesCorrectlyHandled) { ASSERT_STATUS_OK(session->Initialize()); // this should fail if we don't process dependencies correctly - const auto& func_mgr = session->GetSessionState().GetFuncMgr(); - NodeComputeInfo* compute_func = nullptr; + auto& func_mgr = const_cast(session->GetSessionState()).GetMutableFuncMgr(); + const NodeComputeInfo* compute_func = nullptr; int num_partitions{0}; int num_other_nodes{0}; @@ -235,8 +235,8 @@ static void TestNnapiPartitioning(const std::string& test_name, const std::strin unsupported_op_str = oss.str(); } - const auto& func_mgr = session->GetSessionState().GetFuncMgr(); - NodeComputeInfo* compute_func = nullptr; + auto& func_mgr = const_cast(session->GetSessionState()).GetMutableFuncMgr(); + const NodeComputeInfo* compute_func = nullptr; stats.num_nodes_not_handled = 0; stats.num_compiled_nodes = 0; diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc index cdd2eb6445..f456809f0b 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc @@ -174,8 +174,8 @@ TEST(InternalTestingEP, TestLoadOrtModelWithReducedOpCoverage) { // Conv+Add gets fused by level 1 optimizer into single node. The 'Conv'/'Add'/'Relu' nodes should be compiled and // handled by the custom EP. fallback to CPU for MaxPool. ASSERT_EQ(graph.NumberOfNodes(), 6); - const auto& func_mgr = session->GetSessionState().GetFuncMgr(); - NodeComputeInfo* compute_func = nullptr; + auto& func_mgr = const_cast(session->GetSessionState()).GetMutableFuncMgr(); + const NodeComputeInfo* compute_func = nullptr; // the generated op type should have a hash for the model based on the model path const std::string expected_op_type_prefix = "InternalTestingEP_9611636968429821767_"; @@ -197,14 +197,14 @@ TEST(InternalTestingEP, TestLoadOrtModelWithReducedOpCoverage) { // count nodes assigned to the test EP and make sure they all have valid compute funcs static int CountAndValidateAssignedNodes(const Graph& current_graph, const std::unordered_set& supported_ops, - const FuncManager& func_mgr) { + FuncManager& func_mgr) { int count = 0; for (const auto& node : current_graph.Nodes()) { EXPECT_EQ(supported_ops.count(node.OpType()), size_t(0)) << "Nodes with supported op types should have been replaced. Node with type " << node.OpType() << " was not."; if (node.GetExecutionProviderType() == utils::kInternalTestingExecutionProvider) { - NodeComputeInfo* compute_func = nullptr; + const NodeComputeInfo* compute_func = nullptr; EXPECT_STATUS_OK(func_mgr.GetFuncs(node.Name(), compute_func)); EXPECT_NE(compute_func, nullptr); ++count; @@ -232,7 +232,7 @@ TEST(InternalTestingEP, TestModelWithSubgraph) { CreateSession(SessionOptions{}, session, ort_model_path, enable_custom_ep, &supported_ops); const auto& graph = session->GetGraph(); - const auto& func_mgr = session->GetSessionState().GetFuncMgr(); + auto& func_mgr = const_cast(session->GetSessionState()).GetMutableFuncMgr(); int num_replaced_nodes = CountAndValidateAssignedNodes(graph, supported_ops, func_mgr); @@ -317,7 +317,7 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) { ASSERT_STATUS_OK(session.Initialize()); int num_replaced_nodes = CountAndValidateAssignedNodes( - session.GetGraph(), supported_ops, session.GetSessionState().GetFuncMgr()); + session.GetGraph(), supported_ops, const_cast(session.GetSessionState()).GetMutableFuncMgr()); ASSERT_EQ(num_replaced_nodes, 3); } @@ -335,7 +335,7 @@ TEST(InternalTestingEP, TestOrtModelWithCompileFailure) { // 2 Conv nodes shoule be replaced with fused nodes const auto& graph = session.GetGraph(); int num_replaced_nodes = CountAndValidateAssignedNodes( - session.GetGraph(), {"Conv"}, session.GetSessionState().GetFuncMgr()); + session.GetGraph(), {"Conv"}, const_cast(session.GetSessionState()).GetMutableFuncMgr()); ASSERT_EQ(num_replaced_nodes, 2); diff --git a/onnxruntime/test/testdata/custom_execution_provider_library/my_allocator.cc b/onnxruntime/test/testdata/custom_execution_provider_library/my_allocator.cc index ac053eca95..aca03e7a6b 100644 --- a/onnxruntime/test/testdata/custom_execution_provider_library/my_allocator.cc +++ b/onnxruntime/test/testdata/custom_execution_provider_library/my_allocator.cc @@ -11,6 +11,7 @@ MyEPAllocator::MyEPAllocator(OrtDevice::DeviceId device_id) } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(disable : 26400) +#pragma warning(disable : 26409) #endif void* MyEPAllocator::Alloc(size_t size) { void* device_address = new (std::nothrow) uint8_t[size]; diff --git a/onnxruntime/test/util/test_allocator.cc b/onnxruntime/test/util/test_allocator.cc index 1383894372..002e759bd0 100644 --- a/onnxruntime/test/util/test_allocator.cc +++ b/onnxruntime/test/util/test_allocator.cc @@ -17,6 +17,7 @@ MockedOrtAllocator::~MockedOrtAllocator() { } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(disable : 26400) +#pragma warning(disable : 26409) #endif void* MockedOrtAllocator::Alloc(size_t size) { constexpr size_t extra_len = sizeof(size_t); diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 82c984608a..f9c900295f 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1620,7 +1620,8 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): if not args.disable_ml_ops: run_subprocess([sys.executable, 'onnxruntime_test_python_mlops.py'], cwd=cwd, dll_path=dll_path) - if args.enable_training and args.use_cuda: + # The following test has multiple failures on Windows + if args.enable_training and args.use_cuda and not is_windows(): # run basic frontend tests run_training_python_frontend_tests(cwd=cwd) diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml index 4278039acd..1236a78627 100644 --- a/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/gpu.yml @@ -2,7 +2,7 @@ parameters: DoEsrp: 'false' jobs: -- template: ../../templates/win-ci-2019.yml +- template: win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' ArtifactName: 'drop-nuget-dml' @@ -25,7 +25,7 @@ jobs: mkdir $(Build.ArtifactStagingDirectory)\testdata copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata -- template: ../../templates/win-ci-2019.yml +- template: win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' ArtifactName: 'drop-win-dml-x86-zip' @@ -47,7 +47,7 @@ jobs: mkdir $(Build.ArtifactStagingDirectory)\testdata copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata -- template: ../../templates/win-ci-2019.yml +- template: win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' ArtifactName: 'drop-win-dml-arm64-zip' @@ -69,7 +69,7 @@ jobs: mkdir $(Build.ArtifactStagingDirectory)\testdata copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata -- template: ../../templates/win-ci-2019.yml +- template: win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' ArtifactName: 'drop-win-dml-arm-zip' diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci-2019.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml similarity index 97% rename from tools/ci_build/github/azure-pipelines/templates/win-ci-2019.yml rename to tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml index 04e21bcd22..976ae5b921 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci-2019.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml @@ -197,14 +197,14 @@ jobs: arguments: ${{ parameters.BuildArch }} modifyEnvironment: true # Esrp signing - - template: win-esrp-dll.yml + - template: ../../templates/win-esrp-dll.yml parameters: FolderPath: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' DisplayName: 'ESRP - Sign Native dlls' DoEsrp: ${{ parameters.DoEsrp }} Pattern: 'onnx_test_runner.exe, onnxruntime_perf_test.exe,*.dll' #keep sync with src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj - - template: win-esrp-dll.yml + - template: ../../templates/win-esrp-dll.yml parameters: FolderPath: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\$(BuildConfig)' DisplayName: 'ESRP - Sign C# dlls' @@ -238,7 +238,7 @@ jobs: filename: 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat' arguments: ${{ parameters.BuildArch }} modifyEnvironment: true - - template: win-esrp-dll.yml + - template: ../../templates/win-esrp-dll.yml parameters: FolderPath: '$(Build.SourcesDirectory)\js\node\bin\napi-v3\win32\x64' DisplayName: 'ESRP - Sign Node.js binding binaries' @@ -267,11 +267,11 @@ jobs: workingDirectory: '$(Build.ArtifactStagingDirectory)' - ${{ if eq(parameters['DoCompliance'], 'true') }}: - - template: compliance.yml + - template: ../../templates/compliance.yml parameters : msbuildPlatform: ${{ parameters.sln_platform }} - - template: component-governance-component-detection-steps.yml + - template: ../../templates/component-governance-component-detection-steps.yml parameters : condition : 'succeeded' diff --git a/tools/ci_build/github/azure-pipelines/templates/win-cpu-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-cpu-ci.yml index 8189eda2cc..47fc0e388e 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-cpu-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-cpu-ci.yml @@ -173,6 +173,7 @@ jobs: - ${{ if eq(parameters.EnablePython, true) }}: - powershell: | + python3 -m pip uninstall -y ort-nightly-gpu ort-nightly onnxruntime onnxruntime-gpu -qq Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' @@ -196,26 +197,32 @@ jobs: **/*.pdb **/*.dll - - task: PythonScript@0 - displayName: 'Generate cmake config' + #Manually set msBuildCommandline so that we can also set CAExcludePath + #build_dir must be a sub folder of $(Build.SourcesDirectory) + - task: SDLNativeRules@3 + displayName: 'Run the PREfast SDL Native Rules for MSBuild' inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config RelWithDebInfo --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }} --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON' - workingDirectory: '$(Build.BinariesDirectory)' + msBuildArchitecture: amd64 + setupCommandlines: 'python $(Build.SourcesDirectory)\tools\ci_build\build.py --config RelWithDebInfo --build_dir $(Build.SourcesDirectory)\b --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }} --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON' + msBuildCommandline: '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.SourcesDirectory)\b\RelWithDebInfo\onnxruntime.sln" /p:RunCodeAnalysis=true /p:platform=${{ parameters.msbuildPlatform }} /p:configuration=RelWithDebInfo /p:VisualStudioVersion="16.0" /m /p:PreferredToolArchitecture=x64' + excludedPaths: '$(Build.SourcesDirectory)\b#$(Build.SourcesDirectory)\cmake#C:\program files (x86)' + rulesetName: Custom + customRuleset: $(Build.SourcesDirectory)\cmake\Sdl.ruleset - - - task: VSBuild@1 - displayName: 'Build' + - task: SdtReport@2 + displayName: 'Create Security Analysis Report' inputs: - solution: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\onnxruntime.sln' - platform: ${{ parameters.msbuildPlatform }} - configuration: ${{ parameters.BuildConfig }} - msbuildArgs: /m /p:CAExcludePath="$(Build.BinariesDirectory);$(Build.SourcesDirectory)\cmake;C:\program files (x86)" /p:PreferredToolArchitecture=x64 - msbuildArchitecture: ${{ parameters.buildArch }} - maximumCpuCount: true - logProjectEvents: false - workingFolder: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}' - createLogFile: true + SDLNativeRules: true + + - task: PublishSecurityAnalysisLogs@3 + displayName: 'Publish Security Analysis Logs' + continueOnError: true + + - task: PostAnalysis@2 + displayName: 'Guardian Break' + inputs: + GdnBreakGdnToolSDLNativeRulesSeverity: Warning + GdnBreakGdnToolSDLNativeRules: true - ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}: diff --git a/tools/ci_build/github/azure-pipelines/templates/win-gpu-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-gpu-ci.yml index e919400234..e9167d3684 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-gpu-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-gpu-ci.yml @@ -20,6 +20,10 @@ parameters: - name: isX86 type: boolean default: false + +- name: isTraining + type: boolean + default: false - name: EnablePython type: boolean @@ -109,6 +113,13 @@ jobs: workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx' displayName: 'Install ONNX' + - ${{ if eq(parameters.isTraining, true) }}: + - script: | + python -m pip install -r $(Build.SourcesDirectory)\tools\ci_build\github\linux\docker\scripts\training\ortmodule\stage1\requirements_torch1.9.0_cu11.1.txt + python -m pip install -r $(Build.SourcesDirectory)\tools\ci_build\github\linux\docker\scripts\training\requirements.txt + workingDirectory: '$(Build.BinariesDirectory)' + displayName: 'Install python modules' + - task: NuGetToolInstaller@0 displayName: Use Nuget 5.7.0 inputs: @@ -182,6 +193,7 @@ jobs: - ${{ if eq(parameters.EnablePython, true) }}: - powershell: | + python3 -m pip uninstall -y ort-nightly-gpu ort-nightly onnxruntime onnxruntime-gpu -qq Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' @@ -205,26 +217,35 @@ jobs: **/*.pdb **/*.dll - - task: PythonScript@0 - displayName: 'Generate cmake config' - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config Debug --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }} onnxruntime_ENABLE_STATIC_ANALYSIS=ON' - workingDirectory: '$(Build.BinariesDirectory)' - - - task: VSBuild@1 - displayName: 'Build' + #Manually set msBuildCommandline so that we can also set CAExcludePath + #build_dir must be a sub folder of $(Build.SourcesDirectory) + #TODO: move this step to a CPU-only machine to save GPU resources. + - task: SDLNativeRules@3 + displayName: 'Run the PREfast SDL Native Rules for MSBuild' inputs: - solution: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\onnxruntime.sln' - platform: ${{ parameters.msbuildPlatform }} - configuration: ${{ parameters.BuildConfig }} - msbuildArgs: /m /p:CAExcludePath="$(Build.BinariesDirectory);$(Build.SourcesDirectory)\cmake;C:\program files (x86)" /p:PreferredToolArchitecture=x64 - msbuildArchitecture: ${{ parameters.buildArch }} - maximumCpuCount: true - logProjectEvents: false - workingFolder: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}' - createLogFile: true + msBuildArchitecture: amd64 + setupCommandlines: 'python $(Build.SourcesDirectory)\tools\ci_build\build.py --config RelWithDebInfo --build_dir $(Build.SourcesDirectory)\b --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }} onnxruntime_ENABLE_STATIC_ANALYSIS=ON' + msBuildCommandline: '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin\amd64\msbuild.exe" "$(Build.SourcesDirectory)\b\RelWithDebInfo\onnxruntime.sln" /p:RunCodeAnalysis=true /p:platform=${{ parameters.msbuildPlatform }} /p:configuration=RelWithDebInfo /p:VisualStudioVersion="16.0" /m /p:PreferredToolArchitecture=x64' + excludedPaths: '$(Build.SourcesDirectory)\b#$(Build.SourcesDirectory)\cmake#C:\program files (x86)' + rulesetName: Custom + customRuleset: $(Build.SourcesDirectory)\cmake\Sdl.ruleset + publishXML: true + + - task: SdtReport@2 + displayName: 'Create Security Analysis Report' + inputs: + SDLNativeRules: true + + - task: PublishSecurityAnalysisLogs@3 + displayName: 'Publish Security Analysis Logs' + continueOnError: true + + - task: PostAnalysis@2 + displayName: 'Guardian Break v2' + inputs: + GdnBreakGdnToolSDLNativeRulesSeverity: Warning + GdnBreakGdnToolSDLNativeRules: true - ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}: diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml index a2d9dc3048..3fd5f46721 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-ci-pipeline.yml @@ -5,229 +5,54 @@ parameters: default: true stages: -- stage: inference +- stage: cuda dependsOn: [] jobs: - - job: 'build' - pool: 'Win-GPU-2019' - strategy: - matrix: - cuda: - additionalBuildFlags: --build_java --build_nodejs --use_cuda --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52 --gen_doc validate --enable_cuda_profiling - EnvSetupScript: setup_env_cuda_11.bat - ORT_EP_NAME: CUDA - dml: - additionalBuildFlags: --use_dml --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0 --enable_wcos --use_winml - EnvSetupScript: setup_env.bat - ORT_EP_NAME: DML - variables: - OrtPackageId: 'Microsoft.ML.OnnxRuntime.Gpu' - MsbuildArguments: '-maxcpucount' - TESTONGPU: 'ON' - OnnxRuntimeBuildDirectory: '$(Build.BinariesDirectory)' - DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true - setVcvars: true + - template: templates/win-gpu-ci.yml + parameters: BuildConfig: 'RelWithDebInfo' - UseOmp: '' + EnvSetupScript: setup_env_cuda_11.bat buildArch: x64 + additionalBuildFlags: --enable_pybind --build_java --build_nodejs --use_cuda --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --gen_doc validate --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52 msbuildPlatform: x64 isX86: false - ALLOW_RELEASED_ONNX_OPSET_ONLY: '0' - DocUpdateNeeded: false - timeoutInMinutes: 180 - workspace: - clean: all - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.7' - addToPath: true - architecture: $(buildArch) + job_name_suffix: x64_RelWithDebInfo + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + RunStaticCodeAnalysis: false + ORT_EP_NAME: CUDA + DocUpdateNeeded: true - - task: NodeTool@0 - inputs: - versionSpec: '12.x' - force32bit: $(isX86) - - - task: JavaToolInstaller@0 - #Our build machine doesn't have java x86 - condition: and(succeeded(), eq(variables['buildArch'], 'x64')) - inputs: - versionSpec: '11' - jdkArchitectureOption: $(buildArch) - jdkSourceOption: 'PreInstalled' - - - task: BatchScript@1 - displayName: 'setup env' - inputs: - filename: '$(Build.SourcesDirectory)\tools\ci_build\github\windows\$(EnvSetupScript)' - modifyEnvironment: true - workingFolder: '$(Build.BinariesDirectory)' - - - script: | - set ORT_DOXY_SRC=$(Build.SourcesDirectory) - set ORT_DOXY_OUT=$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig) - mkdir %ORT_DOXY_SRC% - mkdir %ORT_DOXY_OUT% - "C:\Program Files\doxygen\bin\doxygen.exe" $(Build.SourcesDirectory)\tools\ci_build\github\Doxyfile_csharp.cfg - - workingDirectory: '$(Build.SourcesDirectory)' - displayName: 'API Documentation Check and generate' - - - script: | - python -m pip install -q setuptools wheel numpy - workingDirectory: '$(Build.BinariesDirectory)' - displayName: 'Install python modules' - - - powershell: | - $Env:USE_MSVC_STATIC_RUNTIME=1 - $Env:ONNX_ML=1 - $Env:CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=OFF -DProtobuf_USE_STATIC_LIBS=ON -DONNX_USE_LITE_PROTO=ON -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=$(buildArch)-windows-static" - python setup.py bdist_wheel - python -m pip uninstall -y onnx -qq - Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} - workingDirectory: '$(Build.SourcesDirectory)\cmake\external\onnx' - displayName: 'Install ONNX' - - - task: NuGetToolInstaller@0 - displayName: Use Nuget 5.7.0 - inputs: - versionSpec: 5.7.0 - - - task: NuGetCommand@2 - displayName: 'NuGet restore' - inputs: - command: 'restore' - feedsToUse: 'config' - restoreSolution: '$(Build.SourcesDirectory)\packages.config' - nugetConfigPath: '$(Build.SourcesDirectory)\NuGet.config' - restoreDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)' - - - task: PythonScript@0 - displayName: 'Generate cmake config' - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests $(additionalBuildFlags)' - workingDirectory: '$(Build.BinariesDirectory)' - - - task: VSBuild@1 - displayName: 'Build' - inputs: - solution: '$(Build.BinariesDirectory)\$(BuildConfig)\onnxruntime.sln' - platform: $(msbuildPlatform) - configuration: $(BuildConfig) - msbuildArgs: $(MsbuildArguments) - msbuildArchitecture: $(buildArch) - maximumCpuCount: true - logProjectEvents: false - workingFolder: '$(Build.BinariesDirectory)\$(BuildConfig)' - createLogFile: true - - - task: PythonScript@0 - displayName: 'Build wheel' - inputs: - scriptPath: '$(Build.SourcesDirectory)\setup.py' - arguments: 'bdist_wheel' - workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' - - - task: MSBuild@1 - displayName: 'Restore NuGet Packages' - inputs: - solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' - platform: 'Any CPU' - configuration: '$(BuildConfig)' - msbuildArguments: '-t:restore -p:OrtPackageId=$(OrtPackageId)' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: MSBuild@1 - displayName: 'Build C#' - inputs: - solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' - platform: 'Any CPU' - configuration: '$(BuildConfig)' - msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:DefineConstants=USE_$(ORT_EP_NAME)' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: DotNetCoreCLI@2 - displayName: 'Test C#' - condition: and(and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo')),eq('${{ parameters.RunOnnxRuntimeTests}}', true)) - inputs: - command: test - projects: '$(Build.SourcesDirectory)\csharp\test\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp\Microsoft.ML.OnnxRuntime.Tests.NetCoreApp.csproj' - configuration: '$(BuildConfig)' - arguments: '--configuration $(BuildConfig) -p:Platform="Any CPU" -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=$(OrtPackageId) -p:DefineConstants=USE_$(ORT_EP_NAME)' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - powershell: | - Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} - python $(Build.SourcesDirectory)\tools\ci_build\build.py --config $(BuildConfig) --build_dir $(Build.BinariesDirectory) $(UseOmp) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests $(additionalBuildFlags) - workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)' - condition: and(succeeded(), eq('${{ parameters.RunOnnxRuntimeTests}}', true)) - displayName: 'Run tests' - - # if the validation from --gen_doc failed it sets a variable so we can publish the latest version of the docs - # as an artifact, allowing a developer to download this and replace the current version instead of having to build - # and generate the docs locally themselves. handle each of the two md files separately - simpler than copying - # them to another location and publishing from there in a single task. - - task: PublishBuildArtifacts@1 - condition: and(failed(), eq(variables['DocUpdateNeeded'], 'true')) - inputs: - pathtoPublish: '$(Build.SourcesDirectory)/docs/OperatorKernels.md' - artifactName: 'OperatorKernels.md' - - - task: PublishBuildArtifacts@1 - condition: and(failed(), eq(variables['DocUpdateNeeded'], 'true')) - inputs: - pathtoPublish: '$(Build.SourcesDirectory)/docs/ContribOperators.md' - artifactName: 'ContribOperators.md' - - - - task: PublishSecurityAnalysisLogs@3 - displayName: 'Publish Security Analysis Logs' - condition: and(succeeded(), eq(variables['BuildConfig'], 'RelWithDebInfo')) - inputs: - ArtifactName: CodeAnalysisLogs - - - task: PublishTestResults@2 - displayName: 'Publish unit test results' - inputs: - testResultsFiles: '**/*.results.xml' - searchFolder: '$(Build.BinariesDirectory)' - testRunTitle: 'Unit Test Run' - condition: succeededOrFailed() - - - template: templates/component-governance-component-detection-steps.yml - parameters : - condition : 'succeeded' - stage: training dependsOn: [] jobs: - - template: templates/win-ci-2019.yml - parameters: - AgentPool : 'Win-GPU-2019' - JobName: 'Win_GPU_Training' - # TODO fix test failures and remove --skip_onnx_tests - BuildCommand: >- - --build_dir $(Build.BinariesDirectory) - --build_shared_lib - --cmake_generator "Visual Studio 16 2019" - --enable_onnx_tests - --enable_training - --skip_onnx_tests - --skip_submodule_sync - --use_cuda - --cuda_version 10.2 - --cuda_home "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" - --cudnn_home "C:\local\cudnn-10.2-windows10-x64-v8.0.3.33\cuda" - --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52" - BuildArch: 'x64' - EnvSetupScript: 'setup_env_cuda.bat' - sln_platform: 'x64' - CudaVersion: '10.2' - OrtPackageId: 'Microsoft.ML.OnnxRuntime.Gpu' - BuildConfigurations: ['RelWithDebInfo'] - # Enable unreleased onnx opsets in CI builds - # This facilitates testing the implementation for the new opsets - AllowReleasedOpsetOnly: '0' - DoCompliance: 'true' \ No newline at end of file + - template: templates/win-gpu-ci.yml + parameters: + BuildConfig: 'RelWithDebInfo' + EnvSetupScript: setup_env_cuda_11.bat + buildArch: x64 + additionalBuildFlags: --enable_pybind --enable_training --use_cuda --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --skip_onnx_tests --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52 + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_RelWithDebInfo + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + RunStaticCodeAnalysis: false + ORT_EP_NAME: CUDA + isTraining: true + +- stage: dml + dependsOn: [] + jobs: + - template: templates/win-gpu-ci.yml + parameters: + BuildConfig: 'RelWithDebInfo' + EnvSetupScript: setup_env.bat + buildArch: x64 + additionalBuildFlags: --enable_pybind --use_dml --enable_wcos --use_winml --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.18362.0 + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_RelWithDebInfo + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + RunStaticCodeAnalysis: false + ORT_EP_NAME: DML +