Update ruff and clang-format versions (#21479)

ruff -> 0.5.4
clang-format -> 18
This commit is contained in:
Justin Chu 2024-07-24 11:50:11 -07:00 коммит произвёл GitHub
Родитель eb9b377306
Коммит c203d89958
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
152 изменённых файлов: 3725 добавлений и 3786 удалений

Просмотреть файл

@ -73,7 +73,7 @@ def add_github_dep(name, parsed_url):
return
# Make a REST call to convert to tag to a git commit
url = f"https://api.github.com/repos/{org_name}/{repo_name}/git/refs/tags/{tag}"
print("requesting %s ..." % url)
print("requesting {url} ...")
res = requests.get(url, auth=(args.username, args.token))
response_json = res.json()
tag_object = response_json["object"]

Просмотреть файл

@ -19,7 +19,7 @@ def check_all_delegates_have_unmanaged_function_pointer_attribute(file: pathlib.
line_num = 0
with open(str(file.resolve(strict=True))) as f:
prev_line = ""
for line in f.readlines():
for line in f:
line_num += 1
# strip so it's easier to deal with commented out lines.

Просмотреть файл

@ -17,13 +17,13 @@ namespace onnxruntime {
class NotImplementedException : public std::logic_error {
public:
explicit NotImplementedException(const char* _Message = "Function not yet implemented") noexcept : std::logic_error(_Message){};
explicit NotImplementedException(const std::string& _Message = "Function not yet implemented") noexcept : std::logic_error(_Message){};
explicit NotImplementedException(const char* _Message = "Function not yet implemented") noexcept : std::logic_error(_Message) {};
explicit NotImplementedException(const std::string& _Message = "Function not yet implemented") noexcept : std::logic_error(_Message) {};
};
class TypeMismatchException : public std::logic_error {
public:
TypeMismatchException() noexcept : logic_error("Type mismatch"){};
TypeMismatchException() noexcept : logic_error("Type mismatch") {};
};
class OnnxRuntimeException : public std::exception {

Просмотреть файл

@ -32,7 +32,7 @@ class Stream {
return {};
};
// block the host thread until all the tasks in the stream finished.
virtual void Flush(){};
virtual void Flush() {};
// The framework may reuse the stream instance for multiple iterations.
// This is the API that provide a chance to let the device stream cleanup
// resource at the end of a iteration.

Просмотреть файл

@ -76,6 +76,6 @@ class Barrier {
// Multiple threads can wait on the same Notification object,
// but only one caller must call Notify() on the object.
struct Notification : Barrier {
Notification() : Barrier(1){};
Notification() : Barrier(1) {};
};
} // namespace onnxruntime

Просмотреть файл

@ -219,18 +219,18 @@ class ThreadPoolProfiler {
WAIT_REVOKE,
MAX_EVENT
};
ThreadPoolProfiler(int, const CHAR_TYPE*){};
ThreadPoolProfiler(int, const CHAR_TYPE*) {};
~ThreadPoolProfiler() = default;
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ThreadPoolProfiler);
void Start(){};
void Start() {};
std::string Stop() { return "not available for minimal build"; }
void LogStart(){};
void LogStart() {};
void LogEnd(ThreadPoolEvent){};
void LogEndAndStart(ThreadPoolEvent){};
void LogStartAndCoreAndBlock(std::ptrdiff_t){};
void LogCoreAndBlock(std::ptrdiff_t){};
void LogThreadId(int){};
void LogRun(int){};
void LogThreadId(int) {};
void LogRun(int) {};
std::string DumpChildThreadStat() { return {}; }
};
#else

Просмотреть файл

@ -6,5 +6,5 @@
// CustomOpContext defines an interface allowing a custom op to access ep-specific resources.
struct CustomOpContext {
CustomOpContext() = default;
virtual ~CustomOpContext(){};
virtual ~CustomOpContext() {};
};

Просмотреть файл

@ -24,9 +24,9 @@ namespace Experimental {
struct Session : Ort::Session {
Session(Env& env, std::basic_string<ORTCHAR_T>& model_path, SessionOptions& options)
: Ort::Session(env, model_path.data(), options){};
: Ort::Session(env, model_path.data(), options) {};
Session(Env& env, void* model_data, size_t model_data_length, SessionOptions& options)
: Ort::Session(env, model_data, model_data_length, options){};
: Ort::Session(env, model_data, model_data_length, options) {};
// overloaded Run() with sensible defaults
std::vector<Ort::Value> Run(const std::vector<std::string>& input_names,
@ -52,7 +52,7 @@ struct Session : Ort::Session {
struct Value : Ort::Value {
Value(OrtValue* p)
: Ort::Value(p){};
: Ort::Value(p) {};
template <typename T>
static Ort::Value CreateTensor(T* p_data, size_t p_data_element_count, const std::vector<int64_t>& shape);

Просмотреть файл

@ -2175,8 +2175,8 @@ struct Op : detail::Base<OrtOp> {
/// </summary>
struct ShapeInferContext {
struct SymbolicInteger {
SymbolicInteger(int64_t i) : i_(i), is_int_(true){};
SymbolicInteger(const char* s) : s_(s), is_int_(false){};
SymbolicInteger(int64_t i) : i_(i), is_int_(true) {};
SymbolicInteger(const char* s) : s_(s), is_int_(false) {};
SymbolicInteger(const SymbolicInteger&) = default;
SymbolicInteger(SymbolicInteger&&) = default;

Просмотреть файл

@ -29,7 +29,7 @@ class ArgBase {
ArgBase(OrtKernelContext* ctx,
size_t indice,
bool is_input) : ctx_(ctx), indice_(indice), is_input_(is_input) {}
virtual ~ArgBase(){};
virtual ~ArgBase() {};
protected:
struct KernelContext ctx_;

Просмотреть файл

@ -267,83 +267,83 @@ Status RegisterQuantizationKernels(KernelRegistry& kernel_registry) {
Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
static const BuildKernelCreateInfoFn function_table[] = {
BuildKernelCreateInfo<void>, // default entry to avoid the list become empty after ops-reducing
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp)>,
BuildKernelCreateInfo<void>, // default entry to avoid the list become empty after ops-reducing
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp)>,
// add more kernels here
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GridSample)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Attention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, BeamSearch)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, WhisperBeamSearch)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ExpandDims)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedConv)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GreedySearch)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MultiHeadAttention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GroupQueryAttention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SparseAttention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, RotaryEmbedding)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Sampling)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Range)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, WordConvEmbedding)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GatherND)>,
// add more kernels here
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GridSample)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Attention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, BeamSearch)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, WhisperBeamSearch)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ExpandDims)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedConv)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GreedySearch)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MultiHeadAttention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GroupQueryAttention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SparseAttention)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, RotaryEmbedding)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Sampling)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, AttnLSTM)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, string, Tokenizer)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Range)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, WordConvEmbedding)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, GatherND)>,
#if !defined(DISABLE_SPARSE_TENSORS)
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, SparseToDenseMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, SparseToDenseMatMul)>,
#endif
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MurmurHash3)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FusedMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulNBits)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulBnb4)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MurmurHash3)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FusedMatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulNBits)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulBnb4)>,
#ifndef ORT_MINIMAL_BUILD
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulFpQ4)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulFpQ4)>,
#endif
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MaxpoolWithMask)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Pad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Unique)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CropAndResize)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BiasGelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Gelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FastGelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, NGramRepeatBlock)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BifurcationDetector)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QuickGelu)>,
// These ops were experimental ops in onnx domain which have been removed now. We add them here as
// contrib ops to main backward compatibility
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Affine)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Crop)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, DynamicSlice)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, ImageScaler)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 8, MeanVarianceNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, ParametricSoftplus)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, ScaledTanh)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 9, ThresholdedRelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Scale)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 16, float, LayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 16, double, LayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, float, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Inverse)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Trilu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, MaxpoolWithMask)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Pad)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Unique)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ConvTransposeWithDynamicPads)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CropAndResize)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, CDist)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, CDist)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BiasGelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Gelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FastGelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, NGramRepeatBlock)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BifurcationDetector)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QuickGelu)>,
// These ops were experimental ops in onnx domain which have been removed now. We add them here as
// contrib ops to main backward compatibility
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Affine)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Crop)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, DynamicSlice)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, ImageScaler)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 8, MeanVarianceNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, ParametricSoftplus)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, ScaledTanh)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 9, ThresholdedRelu)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Scale)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 16, float, LayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 16, double, LayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, float, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, double, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Inverse)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, Trilu)>,
#ifdef ENABLE_ATEN
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kPytorchAtenDomain, 1, ATen)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kPytorchAtenDomain, 1, ATen)>,
#endif
#ifdef ENABLE_TRAINING_OPS
// Should remove the shrunken_gather include from ENABLE_TRAINING_OPS once 1). compute optimizer is enabled for inference or
// 2). this is needed by inference for other purpose.
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ShrunkenGather)>,
// Should remove the shrunken_gather include from ENABLE_TRAINING_OPS once 1). compute optimizer is enabled for inference or
// 2). this is needed by inference for other purpose.
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, ShrunkenGather)>,
#endif
};

Просмотреть файл

@ -173,7 +173,7 @@ void CropAndResizeForward(const TensorShape& output_shape,
}
}
} // for pw
} // for ph
} // for ph
},
0); // for n
}

Просмотреть файл

@ -17,7 +17,7 @@ struct Alibi {
const int max_seqlen_k, max_seqlen_q;
__forceinline__ __device__ Alibi(const float alibi_slope, const int max_seqlen_k, const int max_seqlen_q)
: alibi_slope(alibi_slope), max_seqlen_k(max_seqlen_k), max_seqlen_q(max_seqlen_q){};
: alibi_slope(alibi_slope), max_seqlen_k(max_seqlen_k), max_seqlen_q(max_seqlen_q) {};
template <typename Engine, typename Layout>
__forceinline__ __device__ void apply_alibi(Tensor<Engine, Layout>& tensor,

Просмотреть файл

@ -116,7 +116,7 @@ struct Mask {
__forceinline__ __device__ Mask(const int max_seqlen_k, const int max_seqlen_q,
const int window_size_left, const int window_size_right,
const float alibi_slope = 0.f)
: max_seqlen_k(max_seqlen_k), max_seqlen_q(max_seqlen_q), window_size_left(window_size_left), window_size_right(window_size_right), alibi_slope(!Has_alibi ? 0.0 : alibi_slope){};
: max_seqlen_k(max_seqlen_k), max_seqlen_q(max_seqlen_q), window_size_left(window_size_left), window_size_right(window_size_right), alibi_slope(!Has_alibi ? 0.0 : alibi_slope) {};
// Causal_mask: whether this particular iteration needs causal masking
template <bool Causal_mask = false, bool Is_even_MN = true, typename Engine, typename Layout>

Просмотреть файл

@ -121,7 +121,7 @@ struct Softmax {
using TensorT = decltype(make_tensor<float>(Shape<Int<kNRows>>{}));
TensorT row_max, row_sum;
__forceinline__ __device__ Softmax(){};
__forceinline__ __device__ Softmax() {};
template <bool Is_first, bool Check_inf = false, typename Tensor0, typename Tensor1>
__forceinline__ __device__ void softmax_rescale_o(Tensor0& acc_s, Tensor1& acc_o, float softmax_scale_log2) {

Просмотреть файл

@ -231,206 +231,206 @@ KernelCreateInfo BuildKernelCreateInfo<void>() {
Status RegisterCudaContribKernels(KernelRegistry& kernel_registry) {
static const BuildKernelCreateInfoFn function_table[] = {
BuildKernelCreateInfo<void>, // default entry to avoid the list become empty after ops-reducing
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, GridSample)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, FastGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, FastGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Gelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, Gelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Gelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BiasGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, BiasSplitGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, BiasSplitGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, BiasAdd)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, BiasAdd)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, QuickGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, QuickGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, QuickGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, GatedRelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, GatedRelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RemovePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RemovePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RestorePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RestorePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Rfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, Rfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Rfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Irfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, Irfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Irfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ComplexMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, ComplexMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ComplexMulConj)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, ComplexMulConj)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, NGramRepeatBlock)>,
BuildKernelCreateInfo<void>, // default entry to avoid the list become empty after ops-reducing
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, GridSample)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, FastGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, FastGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Gelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, Gelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Gelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BiasGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, BiasSplitGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, BiasSplitGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, BiasAdd)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, BiasAdd)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, QuickGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, QuickGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, QuickGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, GatedRelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, GatedRelativePositionBias)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RemovePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RemovePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RestorePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RestorePadding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Rfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, Rfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Rfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Irfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, double, Irfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Irfft)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ComplexMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, ComplexMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ComplexMulConj)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, ComplexMulConj)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, NGramRepeatBlock)>,
// These ops were experimental ops in onnx domain which have been removed now. We add them here as
// contrib ops to maintain backward compatibility
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, Affine)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, Affine)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, Affine)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Attention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Attention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, PackedAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, PackedAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, PackedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, PackedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BeamSearch)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, WhisperBeamSearch)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ConvTransposeWithDynamicPads)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, Crop)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, Crop)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, Crop)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QMoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, GroupQueryAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, GroupQueryAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DecoderAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DecoderAttention)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, int32_t, DynamicSlice)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, int64_t, DynamicSlice)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, EmbedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, EmbedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, GreedySearch)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, GroupNorm)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, NhwcConv)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, NhwcConv)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ImageScaler)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ImageScaler)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ImageScaler)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, LongformerAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, LongformerAttention)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ParametricSoftplus)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ParametricSoftplus)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ParametricSoftplus)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, RotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, GemmaRotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, Sampling)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ScaledTanh)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ScaledTanh)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ScaledTanh)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, SkipGroupNorm)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, SkipLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, SkipLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ThresholdedRelu)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ThresholdedRelu)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ThresholdedRelu)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, float_float_float, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, double_double_double, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, MLFloat16_float_MLFloat16, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, float_float_MLFloat16, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, MLFloat16_float_float, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, BFloat16_float_BFloat16, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float_float_float, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double_double_double, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16_float_MLFloat16, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float_float_MLFloat16, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16_float_float, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, BFloat16_float_BFloat16, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, Inverse)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MatMulNBits)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MatMulNBits)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, MatMulBnb4)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MatMulBnb4)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MatMulBnb4)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BiasSoftmax)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BiasDropout)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BitmaskDropout)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BitmaskBiasDropout)>,
// These ops were experimental ops in onnx domain which have been removed now. We add them here as
// contrib ops to maintain backward compatibility
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, Affine)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, Affine)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, Affine)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, Attention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, Attention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, PackedAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, PackedAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, PackedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, PackedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BeamSearch)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, WhisperBeamSearch)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ConvTransposeWithDynamicPads)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, Crop)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, Crop)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, Crop)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QMoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, GroupQueryAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, GroupQueryAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DecoderAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DecoderAttention)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, int32_t, DynamicSlice)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, int64_t, DynamicSlice)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, EmbedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, EmbedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, GreedySearch)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, GroupNorm)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, NhwcConv)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, NhwcConv)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ImageScaler)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ImageScaler)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ImageScaler)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, LongformerAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, LongformerAttention)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ParametricSoftplus)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ParametricSoftplus)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ParametricSoftplus)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, RotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, RotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, RotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, GemmaRotaryEmbedding)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, Sampling)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ScaledTanh)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ScaledTanh)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ScaledTanh)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, SkipGroupNorm)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, SkipLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, SkipLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, SkipSimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float, ThresholdedRelu)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double, ThresholdedRelu)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16, ThresholdedRelu)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, float_float_float, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, double_double_double, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, MLFloat16_float_MLFloat16, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, float_float_MLFloat16, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, MLFloat16_float_float, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_VERSIONED_TYPED_CLASS_NAME(1, 16, BFloat16_float_BFloat16, LayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float_float_float, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, double_double_double, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16_float_MLFloat16, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, float_float_MLFloat16, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, MLFloat16_float_float, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_ONNX_OP_TYPED_CLASS_NAME(1, BFloat16_float_BFloat16, SimplifiedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, Inverse)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MatMulNBits)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MatMulNBits)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, MatMulBnb4)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, MatMulBnb4)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, MatMulBnb4)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BiasSoftmax)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BiasDropout)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BitmaskDropout)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, BitmaskBiasDropout)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int8_t_MLFloat16, QuantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, uint8_t_MLFloat16, QuantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int8_t_MLFloat16, DequantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, uint8_t_MLFloat16, DequantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float_int8_t, QAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16_int8_t, QAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, UnfoldTensor)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, DynamicTimeWarping)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, Trilu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, FastGelu)>,
// TransposedMatMul is still here for backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, FusedConv)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QuantizeWithOrder)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, DequantizeWithOrder)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedLongformerAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DecoderMaskedSelfAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DecoderMaskedSelfAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DecoderMaskedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DecoderMaskedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, GemmFloat8)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, SparseAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, SparseAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int8_t_MLFloat16, QuantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, uint8_t_MLFloat16, QuantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int8_t_MLFloat16, DequantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, uint8_t_MLFloat16, DequantizeLinear)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float_int8_t, QAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16_int8_t, QAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, UnfoldTensor)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, DynamicTimeWarping)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, Trilu)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, FastGelu)>,
// TransposedMatMul is still here for backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, TransposeMatMul)>, // backward compatibility
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, FusedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, FusedConv)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedLayerNormalization)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedGelu)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QuantizeWithOrder)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, DequantizeWithOrder)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, QOrderedLongformerAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DecoderMaskedSelfAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DecoderMaskedSelfAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DecoderMaskedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DecoderMaskedMultiHeadAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, GemmFloat8)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, SparseAttention)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, BFloat16, SparseAttention)>,
#ifdef ENABLE_ATEN
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kPytorchAtenDomain, 1, ATen)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCudaExecutionProvider, kPytorchAtenDomain, 1, ATen)>,
#endif
#ifdef ENABLE_TRAINING_OPS
// Should remove the shrunken_gather include from ENABLE_TRAINING_OPS once
// 1). compute optimizer is enabled for inference or
// 2). this is needed by inference for other purpose.
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, ShrunkenGather)>,
// Should remove the shrunken_gather include from ENABLE_TRAINING_OPS once
// 1). compute optimizer is enabled for inference or
// 2). this is needed by inference for other purpose.
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, ShrunkenGather)>,
#endif
#if defined(ORT_USE_NCCL)
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, AllReduce)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, AllGather)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, AllToAll)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, AllReduce)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, AllGather)>,
BuildKernelCreateInfo<CUDA_MS_OP_CLASS_NAME(1, AllToAll)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ShardedMoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, ShardedMoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, ShardedMoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, ShardedMoE)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedMatMul)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedSlice)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedSlice)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedSlice)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedSlice)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedReshape)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReshape)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReshape)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedReshape)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReshape)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReshape)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedExpand)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedExpand)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedExpand)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedExpand)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedExpand)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedExpand)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReduceSum)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReduceSum)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReduceSum)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReduceSum)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReduceMax)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReduceMax)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReduceMax)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReduceMax)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReduceMean)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReduceMean)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedReduceMean)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedReduceMean)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedUnsqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedUnsqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedUnsqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedUnsqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedUnsqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedUnsqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedSqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedSqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedSqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, int64_t, DistributedSqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, float, DistributedSqueeze)>,
BuildKernelCreateInfo<CUDA_MS_OP_TYPED_CLASS_NAME(1, MLFloat16, DistributedSqueeze)>,
#endif
};

Просмотреть файл

@ -20,7 +20,7 @@ class ExLibLoader {
virtual ~ExLibLoader();
protected:
virtual void PreUnloadLibrary(void* /*handle*/){};
virtual void PreUnloadLibrary(void* /*handle*/) {};
std::map<std::string, void*> dso_name_data_map_;

Просмотреть файл

@ -2665,10 +2665,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA(CropAndResize, 1,
#if !defined(DISABLE_FLOAT8_TYPES)
#define GEMM_FLOAT8_TYPES \
{ "tensor(float8e4m3fn)", "tensor(float8e5m2)", "tensor(float16)", "tensor(bfloat16)", "tensor(float)" }
{"tensor(float8e4m3fn)", "tensor(float8e5m2)", "tensor(float16)", "tensor(bfloat16)", "tensor(float)"}
#else
#define GEMM_FLOAT8_TYPES \
{ "tensor(float16)", "tensor(bfloat16)", "tensor(float)" }
{"tensor(float16)", "tensor(bfloat16)", "tensor(float)"}
#endif
ONNX_MS_OPERATOR_SET_SCHEMA(GemmFloat8, 1,

Просмотреть файл

@ -86,7 +86,7 @@ class TensorRef {
/// <returns>Flattened tensor data in bytes</returns>
virtual std::vector<uint8_t> Data() const = 0;
virtual ~TensorRef(){};
virtual ~TensorRef() {};
};
/// <summary>
@ -131,7 +131,7 @@ class ValueInfoRef {
/// <param name="axes">Indices of dimensions to add. Indices are relative to final shape.</param>
virtual void UnsqueezeDims(const std::vector<int64_t>& axes) = 0;
virtual ~ValueInfoRef(){};
virtual ~ValueInfoRef() {};
};
/// <summary>
@ -248,7 +248,7 @@ class NodeRef {
/// <returns>Id</returns>
virtual int64_t Id() const = 0;
virtual ~NodeRef(){};
virtual ~NodeRef() {};
};
/// <summary>
@ -449,7 +449,7 @@ class GraphRef {
/// <returns>True if output of the Graph.</returns>
virtual bool IsGraphOutput(std::string_view name) const = 0;
virtual ~GraphRef(){};
virtual ~GraphRef() {};
};
} // namespace api

Просмотреть файл

@ -228,11 +228,9 @@ inline std::basic_string<PATH_CHAR_TYPE> GetLastComponent(const std::basic_strin
typename std::basic_string<PATH_CHAR_TYPE>::size_type pos = input.length();
PATH_CHAR_TYPE sep = GetPathSep<PATH_CHAR_TYPE>();
// remove trailing backslash
for (; pos > 1 && input[pos - 1] == sep; --pos)
;
for (; pos > 1 && input[pos - 1] == sep; --pos);
input.resize(pos);
for (; pos != 0 && input[pos - 1] != sep; --pos)
;
for (; pos != 0 && input[pos - 1] != sep; --pos);
return input.substr(pos);
}

Просмотреть файл

@ -502,7 +502,7 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array,
class Execution {
public:
Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags);
~Execution(){};
~Execution() {};
Status LoadModel();
Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -41,16 +41,16 @@ TreeEnsembleClassifier<T>::TreeEnsembleClassifier(const OpKernelInfo& info) : Op
template <typename T>
Status TreeEnsembleClassifier<T>::GetRemovableAttributes(InlinedVector<std::string>& removable_attributes) const {
InlinedVector<std::string> names {
"base_values", "nodes_falsenodeids", "nodes_featureids", "nodes_hitrates",
"nodes_missing_value_tracks_true", "nodes_modes", "nodes_nodeids", "nodes_treeids",
"nodes_truenodeids", "nodes_values", "class_ids", "class_treeids", "class_nodeids",
"class_weights", "classlabels_strings",
"classlabels_int64s"
InlinedVector<std::string> names{
"base_values", "nodes_falsenodeids", "nodes_featureids", "nodes_hitrates",
"nodes_missing_value_tracks_true", "nodes_modes", "nodes_nodeids", "nodes_treeids",
"nodes_truenodeids", "nodes_values", "class_ids", "class_treeids", "class_nodeids",
"class_weights", "classlabels_strings",
"classlabels_int64s"
#if !defined(ORT_MINIMAL_BUILD)
"base_values_as_tensor",
"nodes_hitrates_as_tensor", "nodes_values_as_tensor",
"class_weights_as_tensor"
"base_values_as_tensor",
"nodes_hitrates_as_tensor", "nodes_values_as_tensor",
"class_weights_as_tensor"
#endif
};
removable_attributes.swap(names);

Просмотреть файл

@ -48,16 +48,16 @@ TreeEnsembleRegressor<T>::TreeEnsembleRegressor(const OpKernelInfo& info) : OpKe
template <typename T>
Status TreeEnsembleRegressor<T>::GetRemovableAttributes(InlinedVector<std::string>& removable_attributes) const {
InlinedVector<std::string> names {
"base_values", "nodes_falsenodeids", "nodes_featureids", "nodes_hitrates",
"nodes_missing_value_tracks_true", "nodes_modes", "nodes_nodeids", "nodes_treeids",
"nodes_truenodeids", "nodes_values",
"target_ids", "target_treeids", "target_nodeids",
"target_weights"
InlinedVector<std::string> names{
"base_values", "nodes_falsenodeids", "nodes_featureids", "nodes_hitrates",
"nodes_missing_value_tracks_true", "nodes_modes", "nodes_nodeids", "nodes_treeids",
"nodes_truenodeids", "nodes_values",
"target_ids", "target_treeids", "target_nodeids",
"target_weights"
#if !defined(ORT_MINIMAL_BUILD)
"base_values_as_tensor",
"nodes_hitrates_as_tensor", "nodes_values_as_tensor",
"class_weights_as_tensor"
"base_values_as_tensor",
"nodes_hitrates_as_tensor", "nodes_values_as_tensor",
"class_weights_as_tensor"
#endif
};
removable_attributes.swap(names);

Просмотреть файл

@ -195,8 +195,8 @@ Status NonMaxSuppression::Compute(OpKernelContext* ctx) const {
}
sorted_boxes.pop();
} // while
} // for class_index
} // for batch_index
} // for class_index
} // for batch_index
constexpr auto last_dim = 3;
const auto num_selected = selected_indices.size();

Просмотреть файл

@ -251,9 +251,9 @@ void RoiAlignForward(const TensorShape& output_shape, const T* bottom_data, floa
top_data[index] = output_val;
} // for pw
} // for ph
} // for c
} // for n
} // for ph
} // for c
} // for n
});
}
} // namespace

Просмотреть файл

@ -128,7 +128,7 @@ Status Expand<T>::Compute(OpKernelContext* context) const {
memcpy(output_data + output_offset, input_data + input_offset, onnxruntime::narrow<size_t>(copy_byte));
output_offsets[onnxruntime::narrow<size_t>(i)] = output_offset;
} // for i
}; // distribute_fn
}; // distribute_fn
auto per_thread_tasks =
distribute_count / concurrency::ThreadPool::DegreeOfParallelism(context->GetOperatorThreadPool());
@ -169,9 +169,9 @@ Status Expand<T>::Compute(OpKernelContext* context) const {
copy_byte >>= 1;
}
} // while
} // if
} // for
}; // copy_fn
} // if
} // for
}; // copy_fn
if (per_thread_tasks > 20) {
concurrency::ThreadPool::TryParallelFor(
context->GetOperatorThreadPool(),
@ -181,7 +181,7 @@ Status Expand<T>::Compute(OpKernelContext* context) const {
} else {
copy_fn(0, onnxruntime::narrow<std::ptrdiff_t>(distribute_count));
} // else
} // for
} // for
return Status::OK();
} // Expand::compute

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -18,7 +18,7 @@ constexpr CudaGraphAnnotation_t kCudaGraphAnnotationSkip = -1;
constexpr CudaGraphAnnotation_t kCudaGraphAnnotationDefault = 0;
struct CudaGraphSet {
CudaGraphSet(){};
CudaGraphSet() {};
~CudaGraphSet();
void Clear();
@ -31,7 +31,7 @@ struct CudaGraphSet {
};
struct CUDAGraphManager {
CUDAGraphManager(){};
CUDAGraphManager() {};
CUDAGraphManager(cudaStream_t stream);
~CUDAGraphManager();

Просмотреть файл

@ -33,7 +33,7 @@ class CudaProfiler final : public EpProfiler {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CudaProfiler);
~CudaProfiler() {}
bool StartProfiling(TimePoint) override { return true; }
void EndProfiling(TimePoint, Events&) override{};
void EndProfiling(TimePoint, Events&) override {};
void Start(uint64_t) override{};
void Stop(uint64_t) override{};
};

Просмотреть файл

@ -18,7 +18,7 @@ namespace cuda {
template <typename T, bool NHWC>
class ConvTranspose : public CudaKernel {
public:
ConvTranspose(const OpKernelInfo& info) : CudaKernel(info), conv_transpose_attrs_(info){};
ConvTranspose(const OpKernelInfo& info) : CudaKernel(info), conv_transpose_attrs_(info) {};
Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
bool& is_packed, [[maybe_unused]] PrePackedWeights* prepacked_weights) override;
Status ComputeInternal(OpKernelContext* context) const override;

Просмотреть файл

@ -45,7 +45,7 @@ enum class Color : uint32_t {
class RangeCreatorBase {
public:
RangeCreatorBase(const std::string message, const Color color)
: message_(message), color_(color), is_begin_called_(false), is_end_called_(false){};
: message_(message), color_(color), is_begin_called_(false), is_end_called_(false) {};
// Check if Begin and End are both called.
// It's pointless if not all of them are called.
@ -100,7 +100,7 @@ class RangeCreatorBase {
class NvtxRangeCreator final : public RangeCreatorBase {
public:
NvtxRangeCreator(const std::string message, const Color color)
: RangeCreatorBase(message, color){};
: RangeCreatorBase(message, color) {};
void BeginImpl() override;
void EndImpl() override;
@ -114,7 +114,7 @@ class NvtxRangeCreator final : public RangeCreatorBase {
class NvtxNestedRangeCreator final : public RangeCreatorBase {
public:
NvtxNestedRangeCreator(const std::string message, const Color color)
: RangeCreatorBase(message, color){};
: RangeCreatorBase(message, color) {};
void BeginImpl() override;
void EndImpl() override;
@ -123,7 +123,7 @@ class NvtxNestedRangeCreator final : public RangeCreatorBase {
class NvtxMarkerCreator final {
public:
NvtxMarkerCreator(const std::string message, const Color color)
: message_(message), color_(color){};
: message_(message), color_(color) {};
void Mark();
private:

Просмотреть файл

@ -35,7 +35,7 @@ enum class BroadcastIndexType : int32_t {
template <typename T>
class IConstantBuffer {
public:
virtual ~IConstantBuffer(){};
virtual ~IConstantBuffer() {};
virtual const T* GetBuffer(cudaStream_t stream, size_t count) = 0;
};

Просмотреть файл

@ -13,23 +13,23 @@ const std::vector<MLDataType>& CastOpTypeConstraints() {
// Must be done as a local static for a shared provider, to avoid the prefast warning:
// Global initializer calls a non-constexpr function 'onnxruntime::DataTypeImpl::GetTensorType<onnxruntime::MLFloat16>'
// In a shared provider, GetTensorType is a function call into Onnxruntime and isn't constexpr
static std::vector<MLDataType> types {
DataTypeImpl::GetTensorType<MLFloat16>(),
DataTypeImpl::GetTensorType<BFloat16>(),
DataTypeImpl::GetTensorType<float>(),
DataTypeImpl::GetTensorType<double>(),
DataTypeImpl::GetTensorType<int8_t>(),
DataTypeImpl::GetTensorType<int16_t>(),
DataTypeImpl::GetTensorType<int32_t>(),
DataTypeImpl::GetTensorType<int64_t>(),
DataTypeImpl::GetTensorType<uint8_t>(),
DataTypeImpl::GetTensorType<uint16_t>(),
DataTypeImpl::GetTensorType<uint32_t>(),
DataTypeImpl::GetTensorType<uint64_t>(),
DataTypeImpl::GetTensorType<bool>()
static std::vector<MLDataType> types{
DataTypeImpl::GetTensorType<MLFloat16>(),
DataTypeImpl::GetTensorType<BFloat16>(),
DataTypeImpl::GetTensorType<float>(),
DataTypeImpl::GetTensorType<double>(),
DataTypeImpl::GetTensorType<int8_t>(),
DataTypeImpl::GetTensorType<int16_t>(),
DataTypeImpl::GetTensorType<int32_t>(),
DataTypeImpl::GetTensorType<int64_t>(),
DataTypeImpl::GetTensorType<uint8_t>(),
DataTypeImpl::GetTensorType<uint16_t>(),
DataTypeImpl::GetTensorType<uint32_t>(),
DataTypeImpl::GetTensorType<uint64_t>(),
DataTypeImpl::GetTensorType<bool>()
#if !defined(DISABLE_FLOAT8_TYPES)
,
DataTypeImpl::GetTensorType<Float8E4M3FN>(), DataTypeImpl::GetTensorType<Float8E5M2>()
,
DataTypeImpl::GetTensorType<Float8E4M3FN>(), DataTypeImpl::GetTensorType<Float8E5M2>()
#endif
};
return types;

Просмотреть файл

@ -42,7 +42,7 @@ enum ORT_DataType : int {
*/
class DnnlNodeCapability {
public:
virtual ~DnnlNodeCapability(){};
virtual ~DnnlNodeCapability() {};
/**
* virtual function expected to be implemented for different node
* types.

Просмотреть файл

@ -18,7 +18,7 @@ class DnnlNode;
class DnnlNodeArg {
public:
DnnlNodeArg(DnnlNode* node, size_t index, bool is_output)
: node_(node), index_(index), is_output_(is_output){};
: node_(node), index_(index), is_output_(is_output) {};
DnnlNodeArg() = default;
DnnlNode* GetNode() { return node_; };
size_t GetIndex() { return index_; };

Просмотреть файл

@ -15,7 +15,7 @@ class JsCPUAllocator : public CPUAllocator {
: CPUAllocator(
OrtMemoryInfo("JsCPUAllocator", OrtAllocatorType::OrtDeviceAllocator,
OrtDevice(OrtDevice::CPU, OrtDevice::MemType::DEFAULT, 0),
0, OrtMemTypeCPU)){};
0, OrtMemTypeCPU)) {};
};
class JsCustomAllocator : public IAllocator {

Просмотреть файл

@ -11,8 +11,8 @@ namespace js {
class DataTransfer : public IDataTransfer {
public:
DataTransfer(){};
~DataTransfer(){};
DataTransfer() {};
~DataTransfer() {};
bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;

Просмотреть файл

@ -125,7 +125,7 @@ Status ExpandOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Type not supported.");
} // switch
} // if-else
} // if-else
const std::string& output_name = node_unit.Outputs()[0].node_arg.Name();
std::string shape_input_name(input_name + "_" + output_name);

Просмотреть файл

@ -163,7 +163,7 @@ Status ProcessConstantValue(QnnModelWrapper& qnn_model_wrapper,
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Type not supported.");
} // switch
} // if-else
} // if-else
QnnParamWrapper constant_value_param(node_unit.Index(),
node_unit.Name(),

Просмотреть файл

@ -10,7 +10,7 @@
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#define ALIGN_PTR_UP(ptr, align, type) \
reinterpret_cast<type>((reinterpret_cast<std::uintptr_t>(ptr) + (align)-1) & ~((align)-1))
reinterpret_cast<type>((reinterpret_cast<std::uintptr_t>(ptr) + (align) - 1) & ~((align) - 1))
namespace onnxruntime {
namespace qnn {

Просмотреть файл

@ -34,7 +34,7 @@ class RocmProfiler final : public EpProfiler {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(RocmProfiler);
~RocmProfiler() {}
bool StartProfiling(TimePoint) override { return true; }
void EndProfiling(TimePoint, Events&) override{};
void EndProfiling(TimePoint, Events&) override {};
void Start(uint64_t) override{};
void Stop(uint64_t) override{};
};

Просмотреть файл

@ -24,10 +24,10 @@ struct Provider {
virtual ProviderOptions GetProviderOptions(const void* /*provider options struct*/) { return {}; }
// Update provider options from key-value string configuration
virtual void UpdateProviderOptions(void* /*provider options to be configured*/, const ProviderOptions& /*key-value string provider options*/){};
virtual void UpdateProviderOptions(void* /*provider options to be configured*/, const ProviderOptions& /*key-value string provider options*/) {};
// Get provider specific custom op domain list. Provider has the resposibility to release OrtCustomOpDomain instances it creates.
virtual void GetCustomOpDomainList(IExecutionProviderFactory* /*pointer to factory instance*/, std::vector<OrtCustomOpDomain*>& /*provider custom op domain list*/){};
virtual void GetCustomOpDomainList(IExecutionProviderFactory* /*pointer to factory instance*/, std::vector<OrtCustomOpDomain*>& /*provider custom op domain list*/) {};
virtual void Initialize() = 0; // Called right after loading the shared library, if this throws any errors Shutdown() will be called and the library unloaded
virtual void Shutdown() = 0; // Called right before unloading the shared library

Просмотреть файл

@ -24,8 +24,8 @@ struct TensorRTCustomKernel {
: compute_stream_(compute_stream) {
}
void Compute(OrtKernelContext* /*context*/){
// The implementation is in TensorRT plugin. No need to implement it here.
void Compute(OrtKernelContext* /*context*/) {
// The implementation is in TensorRT plugin. No need to implement it here.
};
private:

Просмотреть файл

@ -46,7 +46,7 @@ struct VitisAI_Provider : Provider {
}
};
// Get provider specific custom op domain list. Provider has the resposibility to release OrtCustomOpDomain instances it creates.
void GetCustomOpDomainList(IExecutionProviderFactory*, std::vector<OrtCustomOpDomain*>&) override{};
void GetCustomOpDomainList(IExecutionProviderFactory*, std::vector<OrtCustomOpDomain*>&) override {};
// Called right after loading the shared library, if this throws any errors Shutdown() will be called and the library unloaded
void Initialize() override { initialize_vitisai_ep(); }
// Called right before unloading the shared library

Просмотреть файл

@ -47,7 +47,7 @@ namespace npu {
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs, \
const NodeUnit& node_unit) override { \
LOGS_DEFAULT(INFO) << "Creating " << #onnx_op_type << " Op"; \
auto op = graph_ep->GetGraph() -> CreateOperation<tim::vx::ops::vsinpu_op_kind>(); \
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::vsinpu_op_kind>(); \
(*op).BindInputs(inputs).BindOutputs(outputs); \
return true; \
; \

Просмотреть файл

@ -60,10 +60,9 @@ using createIOpBuildItemFunc = std::function<std::unique_ptr<IOpBuilder>()>;
using OpBuildItemType = std::map<std::string, std::unique_ptr<IOpBuilder>>;
static const std::map<std::string, createIOpBuildItemFunc> reg = {
#define REGISTER_OP_BUILDER(ONNX_NODE_TYPE, BUILDER_TYPE) \
{ \
ONNX_NODE_TYPE, [] { return std::make_unique<BUILDER_TYPE>(); } \
}
#define REGISTER_OP_BUILDER(ONNX_NODE_TYPE, BUILDER_TYPE) \
{ \
ONNX_NODE_TYPE, [] { return std::make_unique<BUILDER_TYPE>(); }}
REGISTER_OP_BUILDER("Add", AddOpBuilder),
REGISTER_OP_BUILDER("Sub", SubOpBuilder),

Просмотреть файл

@ -155,11 +155,7 @@ void addIoBindingMethods(pybind11::module& m) {
.def("clear_binding_outputs", [](SessionIOBinding* io_binding) -> void {
io_binding->Get()->ClearOutputs();
})
.def(
"get_outputs", [](const SessionIOBinding* io_binding) -> const std::vector<OrtValue>& {
return io_binding->Get()->GetOutputs();
},
py::return_value_policy::reference_internal)
.def("get_outputs", [](const SessionIOBinding* io_binding) -> const std::vector<OrtValue>& { return io_binding->Get()->GetOutputs(); }, py::return_value_policy::reference_internal)
.def("copy_outputs_to_cpu", [](const SessionIOBinding* io_binding) -> py::list {
const std::vector<OrtValue>& outputs = io_binding->Get()->GetOutputs();
@ -180,8 +176,7 @@ void addIoBindingMethods(pybind11::module& m) {
}
++pos;
}
return result;
});
return result; });
}
} // namespace python

Просмотреть файл

@ -226,7 +226,7 @@ void addOrtValueMethods(pybind11::module& m) {
ORT_THROW("Only OrtValues that are Tensors/SparseTensors are currently supported");
#else
ORT_THROW("Only OrtValues that are Tensors are supported in this build");
ORT_THROW("Only OrtValues that are Tensors are supported in this build");
#endif
})
.def("shape", [](const OrtValue* ort_value) -> py::list {
@ -275,26 +275,15 @@ void addOrtValueMethods(pybind11::module& m) {
return *ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(*type_proto);
})
.def(
"element_type", [](const OrtValue* ort_value) -> int32_t {
return GetTensorProtoType(*ort_value);
},
"Returns an integer equal to the ONNX tensor proto type of the tensor or sequence. "
"This integer is one type defined by ONNX TensorProto_DataType "
"(such as onnx.TensorProto.FLOAT)."
"Raises an exception in any other case.")
.def("has_value", [](const OrtValue* ort_value) -> bool {
return ort_value->IsAllocated();
})
.def("is_tensor", [](const OrtValue* ort_value) -> bool {
return ort_value->IsTensor();
})
.def("is_sparse_tensor", [](const OrtValue* ort_value) -> bool {
return ort_value->IsSparseTensor();
})
.def("is_tensor_sequence", [](const OrtValue* ort_value) -> bool {
return ort_value->IsTensorSequence();
})
.def("element_type", [](const OrtValue* ort_value) -> int32_t { return GetTensorProtoType(*ort_value); },
"Returns an integer equal to the ONNX tensor proto type of the tensor or sequence. "
"This integer is one type defined by ONNX TensorProto_DataType "
"(such as onnx.TensorProto.FLOAT)."
"Raises an exception in any other case.")
.def("has_value", [](const OrtValue* ort_value) -> bool { return ort_value->IsAllocated(); })
.def("is_tensor", [](const OrtValue* ort_value) -> bool { return ort_value->IsTensor(); })
.def("is_sparse_tensor", [](const OrtValue* ort_value) -> bool { return ort_value->IsSparseTensor(); })
.def("is_tensor_sequence", [](const OrtValue* ort_value) -> bool { return ort_value->IsTensorSequence(); })
// Converts Tensor into a numpy array
.def("numpy", [](const OrtValue* ml_value) -> py::object {
ORT_ENFORCE(ml_value->IsTensor(), "Only OrtValues that are Tensors are convertible to Numpy objects");
@ -310,37 +299,22 @@ void addOrtValueMethods(pybind11::module& m) {
#else
py::object obj = GetPyObjFromTensor(*ml_value, nullptr, nullptr);
#endif
return obj;
})
return obj; })
#ifdef ENABLE_TRAINING
.def(
"to_dlpack", [](OrtValue* ort_value) -> py::object {
return py::reinterpret_steal<py::object>(ToDlpack(*ort_value));
},
"Returns a DLPack representing the tensor. This method does not copy the pointer shape, "
"instead, it copies the pointer value. The OrtValue must be persist until the dlpack structure "
"is consumed.")
.def_static(
"from_dlpack", [](py::object data, bool is_bool_tensor) {
return FromDlpack(data.ptr(), is_bool_tensor);
},
py::arg("data"), py::arg("is_bool_tensor") = false, "Converts a tensor from a external library into an OrtValue by means of the __dlpack__ protocol.")
.def(
"__dlpack__", [](OrtValue* ort_value, py::object /* stream */) -> py::object {
return py::reinterpret_steal<py::object>(ToDlpack(*ort_value));
},
py::arg("stream") = py::none(),
"Returns a DLPack representing the tensor (part of __dlpack__ protocol). "
"This method does not copy the pointer shape, instead, it copies the pointer value. "
"The OrtValue must persist until the dlpack structure is consumed.")
.def(
"__dlpack_device__", [](const OrtValue* ort_value) -> py::tuple {
.def("to_dlpack", [](OrtValue* ort_value) -> py::object { return py::reinterpret_steal<py::object>(ToDlpack(*ort_value)); },
"Returns a DLPack representing the tensor. This method does not copy the pointer shape, "
"instead, it copies the pointer value. The OrtValue must be persist until the dlpack structure "
"is consumed.")
.def_static("from_dlpack", [](py::object data, bool is_bool_tensor) { return FromDlpack(data.ptr(), is_bool_tensor); }, py::arg("data"), py::arg("is_bool_tensor") = false, "Converts a tensor from a external library into an OrtValue by means of the __dlpack__ protocol.")
.def("__dlpack__", [](OrtValue* ort_value, py::object /* stream */) -> py::object { return py::reinterpret_steal<py::object>(ToDlpack(*ort_value)); }, py::arg("stream") = py::none(),
"Returns a DLPack representing the tensor (part of __dlpack__ protocol). "
"This method does not copy the pointer shape, instead, it copies the pointer value. "
"The OrtValue must persist until the dlpack structure is consumed.")
.def("__dlpack_device__", [](const OrtValue* ort_value) -> py::tuple {
ORT_ENFORCE(ort_value->IsTensor(), "Only tensor type OrtValues are supported");
const onnxruntime::Tensor& tensor = ort_value->Get<Tensor>();
DLDevice device = onnxruntime::dlpack::GetDlpackDevice(*ort_value, tensor.Location().device.Id());
return py::make_tuple(static_cast<int>(device.device_type), device.device_id);
},
"Returns a tuple of integers, (device, device index) (part of __dlpack__ protocol).")
return py::make_tuple(static_cast<int>(device.device_type), device.device_id); }, "Returns a tuple of integers, (device, device index) (part of __dlpack__ protocol).")
#endif
;
@ -350,13 +324,8 @@ void addOrtValueMethods(pybind11::module& m) {
v->push_back(ortvalue);
})
#ifdef ENABLE_TRAINING
.def(
"push_back", [](std::vector<OrtValue>* v, py::object dlpack_tensor, const bool is_bool_tensor) {
v->push_back(FromDlpack(dlpack_tensor.ptr(), is_bool_tensor));
},
"Add a new OrtValue after being ownership was transferred from the DLPack structure.", py::arg("dlpack_tensor"), py::arg("is_bool_tensor") = false)
.def(
"push_back_batch", [](std::vector<OrtValue>* v, std::vector<py::object>& torch_tensors, std::vector<int64_t>& data_ptrs, std::vector<py::object>& element_types, const std::vector<std::vector<int64_t>>& shapes, const std::vector<OrtDevice>& devices) {
.def("push_back", [](std::vector<OrtValue>* v, py::object dlpack_tensor, const bool is_bool_tensor) { v->push_back(FromDlpack(dlpack_tensor.ptr(), is_bool_tensor)); }, "Add a new OrtValue after being ownership was transferred from the DLPack structure.", py::arg("dlpack_tensor"), py::arg("is_bool_tensor") = false)
.def("push_back_batch", [](std::vector<OrtValue>* v, std::vector<py::object>& torch_tensors, std::vector<int64_t>& data_ptrs, std::vector<py::object>& element_types, const std::vector<std::vector<int64_t>>& shapes, const std::vector<OrtDevice>& devices) {
for (size_t i = 0; i < torch_tensors.size(); ++i) {
py::object& element_type = element_types.at(i);
const std::vector<int64_t>& shape = shapes.at(i);
@ -377,52 +346,36 @@ void addOrtValueMethods(pybind11::module& m) {
OrtValue ml_value;
Tensor::InitOrtValue(ml_type, gsl::make_span(shape), reinterpret_cast<void*>(data_ptr), info, ml_value);
v->push_back(ml_value);
}
},
"Add a batch of OrtValue's by wrapping PyTorch tensors.")
} }, "Add a batch of OrtValue's by wrapping PyTorch tensors.")
#endif
.def("reserve", [](std::vector<OrtValue>* v, const size_t len) { v->reserve(len); })
.def("shrink_to_fit", [](std::vector<OrtValue>* v) { v->shrink_to_fit(); })
.def("__len__", [](const std::vector<OrtValue>& v) { return v.size(); })
.def(
"__iter__", [](const std::vector<OrtValue>& v) {
return py::make_iterator(v.cbegin(), v.cend());
},
py::keep_alive<0, 1>())
.def("__getitem__", [](const std::vector<OrtValue>& v, const size_t idx) {
return v.at(idx);
})
.def(
"bool_tensor_indices", [](std::vector<OrtValue>* v) -> std::vector<int64_t> {
.def("__iter__", [](const std::vector<OrtValue>& v) { return py::make_iterator(v.cbegin(), v.cend()); }, py::keep_alive<0, 1>())
.def("__getitem__", [](const std::vector<OrtValue>& v, const size_t idx) { return v.at(idx); })
.def("bool_tensor_indices", [](std::vector<OrtValue>* v) -> std::vector<int64_t> {
std::vector<int64_t> indices;
for (size_t i = 0; i < v->size(); ++i) {
if (GetTensorProtoType((*v)[i]) == ONNX_NAMESPACE::TensorProto_DataType_BOOL) {
indices.push_back(static_cast<int64_t>(i));
}
}
return indices;
},
"Returns the indices of every boolean tensor in this vector of OrtValue. "
"In case of a boolean tensor, method to_dlpacks returns a uint8 tensor instead of a boolean tensor. "
"If torch consumes the dlpack structure, `.to(torch.bool)` must be applied to the torch tensor "
"to get a boolean tensor.")
return indices; },
"Returns the indices of every boolean tensor in this vector of OrtValue. "
"In case of a boolean tensor, method to_dlpacks returns a uint8 tensor instead of a boolean tensor. "
"If torch consumes the dlpack structure, `.to(torch.bool)` must be applied to the torch tensor "
"to get a boolean tensor.")
#ifdef ENABLE_TRAINING
.def("dlpack_at", [](std::vector<OrtValue>* v, const size_t idx) {
return py::reinterpret_steal<py::object>(ToDlpack(v->at(idx)));
})
.def("dlpack_at", [](std::vector<OrtValue>* v, const size_t idx) { return py::reinterpret_steal<py::object>(ToDlpack(v->at(idx))); })
#endif
.def(
"element_type_at", [](std::vector<OrtValue>* v, const size_t idx) -> int32_t {
return GetTensorProtoType(v->at(idx));
},
"Returns an integer equal to the ONNX proto type of the tensor at position i. "
"This integer is one type defined by ONNX TensorProto_DataType "
"(such as onnx.TensorProto.FLOAT)."
"Raises an exception in any other case.",
py::arg("idx"))
.def("element_type_at", [](std::vector<OrtValue>* v, const size_t idx) -> int32_t { return GetTensorProtoType(v->at(idx)); },
"Returns an integer equal to the ONNX proto type of the tensor at position i. "
"This integer is one type defined by ONNX TensorProto_DataType "
"(such as onnx.TensorProto.FLOAT)."
"Raises an exception in any other case.",
py::arg("idx"))
#ifdef ENABLE_TRAINING
.def(
"to_dlpacks", [](const std::vector<OrtValue>& v, py::object to_tensor) -> py::list {
.def("to_dlpacks", [](const std::vector<OrtValue>& v, py::object to_tensor) -> py::list {
if (v.size() == 0)
return py::list();
@ -469,9 +422,8 @@ void addOrtValueMethods(pybind11::module& m) {
Py_DECREF(capsule);
}
}
return list_dlpacks;
},
R"pbdoc(Converts all OrtValue into tensors through DLPack protocol, the method creates
return list_dlpacks; },
R"pbdoc(Converts all OrtValue into tensors through DLPack protocol, the method creates
a DLPack structure for every tensors, then calls python function `to_tensor` to a new object
consuming the DLPack structure or return a list of capsule if this function is None.
@ -488,7 +440,7 @@ It creates many tensors acquiring ownership of existing OrtValue.
This method saves one object creation and an C++ allocation
for every transferred tensor.
)pbdoc",
py::arg("to_tensor"))
py::arg("to_tensor"))
#endif
;

Просмотреть файл

@ -397,8 +397,7 @@ void addSparseTensorMethods(pybind11::module& m) {
// pybind apparently has a bug with returning enums from def_property_readonly or methods
// returning a method object instead of the enumeration value
// so we are using def_property and throw on a potential modification
.def_property(
"format", [](const PySparseTensor* py_tensor) -> OrtSparseFormat {
.def_property("format", [](const PySparseTensor* py_tensor) -> OrtSparseFormat {
const SparseTensor& tensor = py_tensor->Instance();
auto retval = OrtSparseFormat::ORT_SPARSE_UNDEFINED;
switch (tensor.Format()) {

Просмотреть файл

@ -1425,7 +1425,7 @@ void addGlobalMethods(py::module& m) {
ORT_UNUSED_PARAMETER(algo);
ORT_THROW("set_cudnn_conv_algo_search is not supported in ROCM");
#else
cudnn_conv_algo_search = algo;
cudnn_conv_algo_search = algo;
#endif
});
// TODO remove deprecated global config
@ -1436,7 +1436,7 @@ void addGlobalMethods(py::module& m) {
ORT_UNUSED_PARAMETER(use_single_stream);
ORT_THROW("set_do_copy_in_default_stream is not supported in ROCM");
#else
do_copy_in_default_stream = use_single_stream;
do_copy_in_default_stream = use_single_stream;
#endif
});
// TODO remove deprecated global config
@ -1801,10 +1801,10 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc")
}
ORT_THROW_IF_ERROR(options->value.AddExternalInitializers(names_ptrs, values_ptrs));
#else
ORT_UNUSED_PARAMETER(options);
ORT_UNUSED_PARAMETER(names);
ORT_UNUSED_PARAMETER(ort_values);
ORT_THROW("External initializers are not supported in this build.");
ORT_UNUSED_PARAMETER(options);
ORT_UNUSED_PARAMETER(names);
ORT_UNUSED_PARAMETER(ort_values);
ORT_THROW("External initializers are not supported in this build.");
#endif
});
@ -1866,8 +1866,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
return *(na.Type());
},
"node type")
.def(
"__str__", [](const onnxruntime::NodeArg& na) -> std::string {
.def("__str__", [](const onnxruntime::NodeArg& na) -> std::string {
std::ostringstream res;
res << "NodeArg(name='" << na.Name() << "', type='" << *(na.Type()) << "', shape=";
auto shape = na.Shape();
@ -1893,11 +1892,8 @@ including arg name, arg type (contains both type and shape).)pbdoc")
}
res << ")";
return std::string(res.str());
},
"converts the node into a readable string")
.def_property_readonly(
"shape", [](const onnxruntime::NodeArg& na) -> std::vector<py::object> {
return std::string(res.str()); }, "converts the node into a readable string")
.def_property_readonly("shape", [](const onnxruntime::NodeArg& na) -> std::vector<py::object> {
auto shape = na.Shape();
std::vector<py::object> arr;
if (shape == nullptr || shape->dim_size() == 0) {
@ -1914,9 +1910,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
arr[i] = py::none();
}
}
return arr;
},
"node shape (assuming the node holds a tensor)");
return arr; }, "node shape (assuming the node holds a tensor)");
py::class_<SessionObjectInitializer> sessionObjectInitializer(m, "SessionObjectInitializer");
py::class_<PyInferenceSession>(m, "InferenceSession", R"pbdoc(This is the main class used to run a model.)pbdoc")
@ -2107,51 +2101,28 @@ including arg name, arg type (contains both type and shape).)pbdoc")
.def_property_readonly("get_profiling_start_time_ns", [](const PyInferenceSession* sess) -> uint64_t {
return sess->GetSessionHandle()->GetProfiling().GetStartTimeNs();
})
.def(
"get_providers", [](const PyInferenceSession* sess) -> const std::vector<std::string>& {
return sess->GetSessionHandle()->GetRegisteredProviderTypes();
},
py::return_value_policy::reference_internal)
.def(
"get_provider_options", [](const PyInferenceSession* sess) -> const ProviderOptionsMap& {
return sess->GetSessionHandle()->GetAllProviderOptions();
},
py::return_value_policy::reference_internal)
.def_property_readonly(
"session_options", [](const PyInferenceSession* sess) -> PySessionOptions* {
.def("get_providers", [](const PyInferenceSession* sess) -> const std::vector<std::string>& { return sess->GetSessionHandle()->GetRegisteredProviderTypes(); }, py::return_value_policy::reference_internal)
.def("get_provider_options", [](const PyInferenceSession* sess) -> const ProviderOptionsMap& { return sess->GetSessionHandle()->GetAllProviderOptions(); }, py::return_value_policy::reference_internal)
.def_property_readonly("session_options", [](const PyInferenceSession* sess) -> PySessionOptions* {
auto session_options = std::make_unique<PySessionOptions>();
session_options->value = sess->GetSessionHandle()->GetSessionOptions();
return session_options.release();
},
py::return_value_policy::take_ownership)
.def_property_readonly(
"inputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
return session_options.release(); }, py::return_value_policy::take_ownership)
.def_property_readonly("inputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
auto res = sess->GetSessionHandle()->GetModelInputs();
OrtPybindThrowIfError(res.first);
return *(res.second);
},
py::return_value_policy::reference_internal)
.def_property_readonly(
"outputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
return *(res.second); }, py::return_value_policy::reference_internal)
.def_property_readonly("outputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
auto res = sess->GetSessionHandle()->GetModelOutputs();
OrtPybindThrowIfError(res.first);
return *(res.second);
},
py::return_value_policy::reference_internal)
.def_property_readonly(
"overridable_initializers", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
return *(res.second); }, py::return_value_policy::reference_internal)
.def_property_readonly("overridable_initializers", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
auto res = sess->GetSessionHandle()->GetOverridableInitializers();
OrtPybindThrowIfError(res.first);
return *(res.second);
},
py::return_value_policy::reference_internal)
.def_property_readonly(
"model_meta", [](const PyInferenceSession* sess) -> const onnxruntime::ModelMetadata& {
return *(res.second); }, py::return_value_policy::reference_internal)
.def_property_readonly("model_meta", [](const PyInferenceSession* sess) -> const onnxruntime::ModelMetadata& {
auto res = sess->GetSessionHandle()->GetModelMetadata();
OrtPybindThrowIfError(res.first);
return *(res.second);
},
py::return_value_policy::reference_internal)
return *(res.second); }, py::return_value_policy::reference_internal)
.def("run_with_iobinding", [](PyInferenceSession* sess, SessionIOBinding& io_binding, RunOptions* run_options = nullptr) -> void {
Status status;
// release GIL to allow multiple python threads to invoke Run() in parallel.
@ -2161,8 +2132,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
else
status = sess->GetSessionHandle()->Run(*run_options, *io_binding.Get());
if (!status.IsOK())
throw std::runtime_error("Error in execution: " + status.ErrorMessage());
})
throw std::runtime_error("Error in execution: " + status.ErrorMessage()); })
.def("get_tuning_results", [](PyInferenceSession* sess) -> py::list {
#if !defined(ORT_MINIMAL_BUILD)
auto results = sess->GetSessionHandle()->GetTuningResults();
@ -2177,8 +2147,8 @@ including arg name, arg type (contains both type and shape).)pbdoc")
return ret;
#else
ORT_UNUSED_PARAMETER(sess);
ORT_THROW("TunableOp and get_tuning_results are not supported in this build.");
ORT_UNUSED_PARAMETER(sess);
ORT_THROW("TunableOp and get_tuning_results are not supported in this build.");
#endif
})
.def("set_tuning_results", [](PyInferenceSession* sess, py::list results, bool error_on_invalid) -> void {
@ -2209,10 +2179,10 @@ including arg name, arg type (contains both type and shape).)pbdoc")
throw std::runtime_error("Error in execution: " + status.ErrorMessage());
}
#else
ORT_UNUSED_PARAMETER(sess);
ORT_UNUSED_PARAMETER(results);
ORT_UNUSED_PARAMETER(error_on_invalid);
ORT_THROW("TunableOp and set_tuning_results are not supported in this build.");
ORT_UNUSED_PARAMETER(sess);
ORT_UNUSED_PARAMETER(results);
ORT_UNUSED_PARAMETER(error_on_invalid);
ORT_THROW("TunableOp and set_tuning_results are not supported in this build.");
#endif
});

Просмотреть файл

@ -24,8 +24,7 @@ def check_distro_info():
if __my_distro_ver__ not in ["10", "11"]:
warnings.warn(
"Unsupported Windows version (%s). ONNX Runtime supports Windows 10 and above, only."
% __my_distro_ver__
f"Unsupported Windows version ({__my_distro_ver__}). ONNX Runtime supports Windows 10 and above, only."
)
elif __my_system__ == "linux":
"""Although the 'platform' python module for getting Distro information works well on standard OS images
@ -54,11 +53,11 @@ def check_distro_info():
if int(__my_distro_ver__.split(".")[0]) < 11:
warnings.warn(
"Unsupported macOS version (%s). ONNX Runtime supports macOS 11.0 or later." % (__my_distro_ver__)
f"Unsupported macOS version ({__my_distro_ver__}). ONNX Runtime supports macOS 11.0 or later."
)
else:
warnings.warn(
"Unsupported platform (%s). ONNX Runtime supports Linux, macOS and Windows platforms, only." % __my_system__
f"Unsupported platform ({__my_system__}). ONNX Runtime supports Linux, macOS and Windows platforms, only."
)
@ -115,10 +114,10 @@ def validate_build_package_info():
cudart_version = None
def print_build_package_info():
warnings.warn("onnxruntime training package info: package_name: %s" % package_name)
warnings.warn("onnxruntime training package info: __version__: %s" % version)
warnings.warn("onnxruntime training package info: cuda_version: %s" % cuda_version)
warnings.warn("onnxruntime build info: cudart_version: %s" % cudart_version)
warnings.warn(f"onnxruntime training package info: package_name: {package_name}")
warnings.warn(f"onnxruntime training package info: __version__: {version}")
warnings.warn(f"onnxruntime training package info: cuda_version: {cuda_version}")
warnings.warn(f"onnxruntime build info: cudart_version: {cudart_version}")
# collection cuda library info from current environment.
from onnxruntime.capi.onnxruntime_collect_build_info import find_cudart_versions
@ -127,7 +126,7 @@ def validate_build_package_info():
if cudart_version and local_cudart_versions and cudart_version not in local_cudart_versions:
print_build_package_info()
warnings.warn("WARNING: failed to find cudart version that matches onnxruntime build info")
warnings.warn("WARNING: found cudart versions: %s" % local_cudart_versions)
warnings.warn(f"WARNING: found cudart versions: {local_cudart_versions}")
else:
# TODO: rcom
pass

Просмотреть файл

@ -22,7 +22,7 @@ _registered_ops: typing.AbstractSet[str] = set()
def _reg(symbolic_fn: typing.Callable):
name = "::%s" % symbolic_fn.__name__
name = f"::{symbolic_fn.__name__}"
torch.onnx.register_custom_op_symbolic(name, symbolic_fn, _OPSET_VERSION)
_registered_ops.add(name)

Просмотреть файл

@ -1076,7 +1076,7 @@ class HistogramCollector(CalibrationDataCollector):
for i in range(num_half_quantized_bin, zero_bin_index + 1, 1):
start_index = zero_bin_index - i
end_index = zero_bin_index + i + 1 if (zero_bin_index + i + 1) <= num_bins else num_bins
end_index = min(zero_bin_index + i + 1, num_bins)
thresholds[i - num_half_quantized_bin] = (hist_edges[start_index], hist_edges[end_index])

Просмотреть файл

@ -24,7 +24,7 @@ def get_attribute(node, attr_name, default_value=None):
def get_dim_from_proto(dim):
return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) is str else None # noqa: E721
return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) is str else None
def is_sequence(type_proto):
@ -92,19 +92,19 @@ def get_opset(mp, domain=None):
def as_scalar(x):
if type(x) == list: # noqa: E721
if type(x) is list:
assert len(x) == 1
return x[0]
elif type(x) == np.ndarray:
elif type(x) is np.ndarray:
return x.item()
else:
return x
def as_list(x, keep_none):
if type(x) == list: # noqa: E721
if type(x) is list:
return x
elif type(x) == np.ndarray:
elif type(x) is np.ndarray:
return list(x)
elif keep_none and x is None:
return None
@ -113,7 +113,7 @@ def as_list(x, keep_none):
def sympy_reduce_product(x):
if type(x) == list: # noqa: E721
if type(x) is list:
value = sympy.Integer(1)
for v in x:
value = value * v
@ -258,7 +258,7 @@ class SymbolicShapeInference:
self.prefix_ = prefix
def _add_suggested_merge(self, symbols, apply=False):
assert all([(type(s) == str and s in self.symbolic_dims_) or is_literal(s) for s in symbols]) # noqa: E721
assert all([(type(s) is str and s in self.symbolic_dims_) or is_literal(s) for s in symbols])
symbols = set(symbols)
for k, v in self.suggested_merge_.items():
if k in symbols:
@ -278,7 +278,7 @@ class SymbolicShapeInference:
break
if map_to is None:
for s in symbols:
if type(self.symbolic_dims_[s]) == sympy.Symbol:
if type(self.symbolic_dims_[s]) is sympy.Symbol:
map_to = s
break
# when nothing to map to, use the shorter one
@ -328,7 +328,7 @@ class SymbolicShapeInference:
)
def _merge_symbols(self, dims):
if not all([type(d) == str for d in dims]): # noqa: E721
if not all([type(d) is str for d in dims]):
if self.auto_merge_:
unique_dims = list(set(dims))
is_int = [is_literal(d) for d in unique_dims]
@ -408,7 +408,7 @@ class SymbolicShapeInference:
def _get_sympy_shape(self, node, idx):
sympy_shape = []
for d in self._get_shape(node, idx):
if type(d) == str: # noqa: E721
if type(d) is str:
sympy_shape.append(
self.symbolic_dims_[d]
if d in self.symbolic_dims_
@ -590,7 +590,7 @@ class SymbolicShapeInference:
# for new symbolic dims from subgraph output, add to main graph symbolic dims
subgraph_shapes = [get_shape_from_value_info(o) for o in symbolic_shape_inference.out_mp_.graph.output]
subgraph_new_symbolic_dims = {
d for s in subgraph_shapes if s for d in s if type(d) == str and d not in self.symbolic_dims_ # noqa: E721
d for s in subgraph_shapes if s for d in s if type(d) is str and d not in self.symbolic_dims_
}
new_dims = {}
for d in subgraph_new_symbolic_dims:
@ -610,7 +610,7 @@ class SymbolicShapeInference:
if all([v is not None for v in values]):
# some shape compute is in floating point, cast to int for sympy
for i, v in enumerate(values):
if type(v) != np.ndarray:
if type(v) is not np.ndarray:
continue
if len(v.shape) > 1:
new_v = None # ignore value for rank > 1
@ -924,7 +924,7 @@ class SymbolicShapeInference:
if all([d == dims[0] for d in dims]):
continue
merged = self._merge_symbols(dims)
if type(merged) == str: # noqa: E721
if type(merged) is str:
sympy_shape[d] = self.symbolic_dims_[merged] if merged else None
else:
sympy_shape[d] = merged
@ -1060,7 +1060,7 @@ class SymbolicShapeInference:
dim = shape[-i]
if letter not in letter_to_dim:
letter_to_dim[letter] = dim
elif type(dim) != sympy.Symbol:
elif type(dim) is not sympy.Symbol:
letter_to_dim[letter] = dim
num_operands = num_operands + 1
@ -1127,8 +1127,8 @@ class SymbolicShapeInference:
idx = self._try_get_value(node, 1)
if idx is not None:
data = self.sympy_data_[node.input[0]]
if type(data) == list: # noqa: E721
if type(idx) == np.ndarray and len(idx.shape) == 1:
if type(data) is list:
if type(idx) is np.ndarray and len(idx.shape) == 1:
self.sympy_data_[node.output[0]] = [data[int(i)] for i in idx]
else:
self.sympy_data_[node.output[0]] = data[int(idx)]
@ -1530,7 +1530,7 @@ class SymbolicShapeInference:
new_shape = input_shape[:2]
output_size = self._try_get_value(node, 1)
if output_size is not None:
new_shape += [dim_size.item() if type(dim_size) == np.int64 else dim_size for dim_size in output_size]
new_shape += [dim_size.item() if type(dim_size) is np.int64 else dim_size for dim_size in output_size]
else:
rank = len(input_shape)
new_shape += [str(self._new_symbolic_dim_from_output(node, 0, i)) for i in range(2, rank)]
@ -1645,7 +1645,7 @@ class SymbolicShapeInference:
deferred_dim_idx = -1
non_deferred_size = 1
for i, d in enumerate(shape_value):
if type(d) == sympy.Symbol:
if type(d) is sympy.Symbol:
new_sympy_shape.append(d)
elif d == 0:
new_sympy_shape.append(input_sympy_shape[i])
@ -1940,7 +1940,7 @@ class SymbolicShapeInference:
# handle sympy_data if needed, for slice in shape computation
if (
node.input[0] in self.sympy_data_
and [0] == axes
and axes == [0]
and starts is not None
and len(starts) == 1
and ends is not None
@ -1949,8 +1949,8 @@ class SymbolicShapeInference:
and len(steps) == 1
):
input_sympy_data = self.sympy_data_[node.input[0]]
if type(input_sympy_data) == list or ( # noqa: E721
type(input_sympy_data) == np.array and len(input_sympy_data.shape) == 1
if type(input_sympy_data) is list or (
type(input_sympy_data) is np.array and len(input_sympy_data.shape) == 1
):
self.sympy_data_[node.output[0]] = input_sympy_data[starts[0] : ends[0] : steps[0]]
@ -2616,7 +2616,7 @@ class SymbolicShapeInference:
# some models use None for symbolic dim in input, replace it with a string
input_dims[i_dim].dim_param = str(self._new_symbolic_dim(i.name, i_dim))
self.input_symbols_.update([d for d in input_shape if type(d) == str]) # noqa: E721
self.input_symbols_.update([d for d in input_shape if type(d) is str])
for s in self.input_symbols_:
if s in self.suggested_merge_:

Просмотреть файл

@ -925,8 +925,8 @@ def find_model_path(path):
logger.info(target_model_path)
if len(target_model_path) > 1:
logger.error("We expect to find only one model in " + path) # noqa: G003
raise
logger.error("We expect to find only one model in %s", path)
raise RuntimeError
return target_model_path[0]
@ -1007,7 +1007,7 @@ def parse_models_info_from_file(root_dir, path, models):
models[row["model_name"]] = {}
else:
logger.error("Model name must be provided in models_info.json")
raise
raise RuntimeError
model = models[row["model_name"]]
@ -1018,19 +1018,19 @@ def parse_models_info_from_file(root_dir, path, models):
model["working_directory"] = os.path.join(root_working_directory, row["working_directory"])
else:
logger.error("Model path must be provided in models_info.json")
raise
raise RuntimeError
if "model_path" in row:
model["model_path"] = row["model_path"]
else:
logger.error("Model path must be provided in models_info.json")
raise
raise RuntimeError
if "test_data_path" in row:
model["test_data_path"] = row["test_data_path"]
else:
logger.error("Test data path must be provided in models_info.json")
raise
raise RuntimeError
if "model_path_fp16" in row:
model["model_path_fp16"] = row["model_path_fp16"]

Просмотреть файл

@ -234,7 +234,7 @@ def calculate_trt_op_percentage(trt_op_map, cuda_op_map):
if total_ops == 0:
print("Error ...")
raise
raise RuntimeError
if len(trt_op_map) == 0:
total_cuda_and_cpu_ops = total_ops

Просмотреть файл

@ -71,7 +71,7 @@ def write_json(models):
def main():
links = []
with open("links.txt") as fh:
links = [link.rstrip() for link in fh.readlines()]
links = [link.rstrip() for link in fh]
model_list = []
for link in links:

Просмотреть файл

@ -802,7 +802,7 @@ def main():
try:
os.mkdir(args.cache_dir)
except OSError:
logger.error("Creation of the directory %s failed" % args.cache_dir) # noqa: G002
logger.error("Creation of the directory %s failed", args.cache_dir)
enable_torch = "torch" in args.engines
enable_torch2 = "torch2" in args.engines

Просмотреть файл

@ -168,11 +168,11 @@ def output_test_data(directory: str, inputs: Dict[str, np.ndarray]):
try:
os.mkdir(directory)
except OSError:
print("Creation of the directory %s failed" % directory)
print(f"Creation of the directory {directory} failed")
else:
print("Successfully created the directory %s " % directory)
print(f"Successfully created the directory {directory} ")
else:
print("Warning: directory %s existed. Files will be overwritten." % directory)
print(f"Warning: directory {directory} existed. Files will be overwritten.")
for index, (name, data) in enumerate(inputs.items()):
tensor = numpy_helper.from_array(data, name)

Просмотреть файл

@ -672,7 +672,7 @@ class FusionAttention(Fusion):
q_matmul, k_matmul, v_matmul, q_add, k_add, v_add, num_heads
)
mha_inputs.extend([q_slice.output[0], k_slice.output[0], v_slice.output[0]])
elif type(k_matmul) == NodeProto and type(v_matmul) == NodeProto:
elif type(k_matmul) is NodeProto and type(v_matmul) is NodeProto:
if self.disable_multi_head_attention_bias:
mha_inputs.extend([q_add.output[0], k_matmul.output[0], v_add.output[0]])
else:

Просмотреть файл

@ -159,7 +159,7 @@ class FusionUtils:
tensor (TensorProto): transposed tensor
"""
if not isinstance(tensor, onnx_proto.TensorProto):
raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
raise ValueError(f"Expected input type is an ONNX TensorProto but got {type(tensor)}")
if len(tensor.dims) != 2 or tensor.data_type != onnx_proto.TensorProto.INT8:
raise ValueError("Only INT8 2-D tensors can be transposed")

Просмотреть файл

@ -205,5 +205,5 @@ def export_encoder(args):
no_repeat_ngram_size=no_repeat_ngram_size,
)
time_cost = time.time() - start_time
print("--- %s seconds ---" % (time_cost))
print(f"--- {time_cost} seconds ---")
print(tokenizer.decode(pred_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False))

Просмотреть файл

@ -266,5 +266,5 @@ def export_decoder(args):
use_cache=True,
)
time_cost = time.time() - start_time
print("--- %s seconds ---" % (time_cost))
print(f"--- {time_cost} seconds ---")
print(tokenizer.decode(pred_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False))

Просмотреть файл

@ -49,7 +49,7 @@ def run_inference(args):
no_repeat_ngram_size=no_repeat_ngram_size,
)
time_cost = time.time() - start_time
print("--- %s seconds ---" % (time_cost))
print(f"--- {time_cost} seconds ---")
for j in range(batch_num):
for i in range(beam):
print(
@ -81,7 +81,7 @@ def run_inference(args):
start_time = time.time()
out = sess.run(None, ort_inputs)
time_cost = time.time() - start_time
print("--- %s seconds ---" % (time_cost))
print(f"--- {time_cost} seconds ---")
for j in range(batch_num):
for i in range(beam):
print(

Просмотреть файл

@ -117,7 +117,7 @@ class EngineBuilder:
model_name = model_name + "_" + "_".join(self.pipeline_info.controlnet)
if hash_source:
model_name += "_" + hashlib.md5("\t".join(hash_source).encode("utf-8")).digest().hex()[:8]
model_name += "_" + hashlib.md5("\t".join(hash_source).encode("utf-8")).hexdigest()[:8]
# TODO: When we support original VAE, we shall save custom VAE to another directory.

Просмотреть файл

@ -459,9 +459,9 @@ class StableDiffusionPipeline:
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + guidance * (noise_pred_text - noise_pred_uncond)
if type(self.scheduler) == UniPCMultistepScheduler:
if type(self.scheduler) is UniPCMultistepScheduler:
latents = self.scheduler.step(noise_pred, timestep, latents, return_dict=False)[0]
elif type(self.scheduler) == LCMScheduler:
elif type(self.scheduler) is LCMScheduler:
latents = self.scheduler.step(noise_pred, timestep, latents, generator=self.generator)[0]
else:
latents = self.scheduler.step(noise_pred, latents, step_offset + step_index, timestep)

Просмотреть файл

@ -1883,7 +1883,7 @@ TEST_F(PlannerTest, ParaPlanCreation) {
ORT_ENFORCE(main_graph_ort_value_index_map.GetName(per_value_plan.reused_buffer, reused).IsOK());
reuse_pairs.erase(reused);
} // if
} // for
} // for
ASSERT_TRUE(reuse_pairs.empty());
}

Просмотреть файл

@ -14,7 +14,7 @@ class OrtValueArray {
public:
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OrtValueArray);
// n must be non-negative
OrtValueArray(int n) : values(static_cast<size_t>(n), nullptr){};
OrtValueArray(int n) : values(static_cast<size_t>(n), nullptr) {};
~OrtValueArray() {
for (OrtValue* v : values) {
if (v != nullptr) Ort::GetApi().ReleaseValue(v);

Просмотреть файл

@ -27,7 +27,7 @@ class Allocs : public IExecutionProvider {
std::shared_ptr<CPUAllocator> alloc = std::make_shared<CPUAllocator>();
public:
Allocs() : IExecutionProvider("fake"){};
Allocs() : IExecutionProvider("fake") {};
AllocatorPtr GetAllocator(OrtMemType) const {
return alloc;
}

Просмотреть файл

@ -401,7 +401,7 @@ void QDQTransformerGemmTests(bool has_output_q, bool has_bias, bool beta_not_one
auto check_binary_op_graph = [&](InferenceSessionWrapper& session) {
auto op_to_count = CountOpsInGraph(session.GetGraph());
const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq);
if ((!has_output_q || std::is_same_v<Input1Type, OutputType>)&&(!has_bias || (std::is_same_v<BiasType, int32_t> && !beta_not_one)) &&
if ((!has_output_q || std::is_same_v<Input1Type, OutputType>) && (!has_bias || (std::is_same_v<BiasType, int32_t> && !beta_not_one)) &&
(std::is_same_v<Input1Type, uint8_t> || std::is_same_v<Input2Type, int8_t>)) {
EXPECT_EQ(op_to_count["com.microsoft.QGemm"], 1);
EXPECT_EQ(op_to_count["Gemm"], 0);

Просмотреть файл

@ -786,7 +786,7 @@ void QDQTransformerGemmTests(bool has_output_q, bool has_bias, bool beta_not_one
auto check_binary_op_graph = [&](InferenceSessionWrapper& session) {
auto op_to_count = CountOpsInGraph(session.GetGraph());
const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq);
if ((!has_output_q || std::is_same_v<Input1Type, OutputType>)&&(!has_bias || (std::is_same_v<BiasType, int32_t> && !beta_not_one)) &&
if ((!has_output_q || std::is_same_v<Input1Type, OutputType>) && (!has_bias || (std::is_same_v<BiasType, int32_t> && !beta_not_one)) &&
(std::is_same_v<Input1Type, uint8_t> || std::is_same_v<Input2Type, int8_t>)) {
EXPECT_EQ(op_to_count["com.microsoft.QGemm"], 1);
EXPECT_EQ(op_to_count["Gemm"], 0);

Просмотреть файл

@ -40,13 +40,13 @@ def TestReduction(op, data, axes, keepdims): # noqa: N802
def PrintResult(op, axes, keepdims, res): # noqa: N802
print(' {"%s",' % op)
print(f' {{"{op}",')
print("OpAttributesResult(")
print(" // ReductionAttribute")
print(" {")
print(" // axes_")
print("{", end="")
print(*axes, sep=", ", end="") if axes else print("")
print(*axes, sep=", ", end="") if axes else print()
print("},")
print(" // keep_dims_")
print(keepdims, ",")
@ -60,7 +60,7 @@ def PrintResult(op, axes, keepdims, res): # noqa: N802
print(" // expected values")
print("{", end="")
for i in range(res.size):
print("%5.6ff," % res.item(i))
print(f"{res.item(i):5.6f}f,")
print("})},")
@ -130,7 +130,7 @@ if __name__ == "__main__":
print("{")
for i in range(input_data.size):
print(
"%5.6ff," % input_data.item(i),
f"{input_data.item(i):5.6f}f,",
)
print("},")
print("// input_dims")

Просмотреть файл

@ -66,13 +66,13 @@ static void RunAllOpsetAllDomainPadTests(
bool pads_is_initializer;
bool value_is_initializer;
};
const std::vector<TestParams> all_test_params {
{false, false},
const std::vector<TestParams> all_test_params{
{false, false},
#if (defined(USE_NNAPI) && defined(__ANDROID__)) || (defined(USE_COREML) && defined(__APPLE__))
// only enable when building NNAPI EP on Android or building CoreML EP for Apple environment
// test runs out of memory in QEMU aarch64 environment, so don't enable otherwise
// TODO try to enable when we move from QEMU to arm64 CI machines
{true, true},
// only enable when building NNAPI EP on Android or building CoreML EP for Apple environment
// test runs out of memory in QEMU aarch64 environment, so don't enable otherwise
// TODO try to enable when we move from QEMU to arm64 CI machines
{true, true},
#endif
};
for (const auto& test_params : all_test_params) {

Просмотреть файл

@ -835,14 +835,14 @@ TEST_F(QnnHTPBackendTests, HTPGraphFinalizationOptimizationModes) {
// Test that models run with various SoC model values
TEST_F(QnnHTPBackendTests, HTPSocModels) {
constexpr std::array<const char*, 3> soc_models = { "", // No explicit SoC model specified
"0", // "Unknown"
constexpr std::array<const char*, 3> soc_models = {"", // No explicit SoC model specified
"0", // "Unknown"
#if defined(_M_ARM64)
"37" }; // SC8280X
"37"}; // SC8280X
#elif defined(__linux__)
"30" }; // SM8350
"30"}; // SM8350
#else
"" };
""};
#endif
for (auto soc_model : soc_models) {

Просмотреть файл

@ -76,7 +76,7 @@ def apply_filters(filters, category):
opset_version = f"opset{onnx.defs.onnx_opset_version()}"
validated_filters = []
for f in filters[category]:
if type(f) is list: # noqa: E721
if type(f) is list:
opset_regex = f[0]
filter_regex = f[1]
opset_match = re.match(opset_regex, opset_version)

Просмотреть файл

@ -486,9 +486,6 @@ class ApplyRotaryEmbKV(torch.autograd.Function):
return dkv, None, None, None, None
apply_rotary_emb_kv_ = ApplyRotaryEmbKV.apply
def apply_rotary_emb_kv_(
kv,
cos,

Просмотреть файл

@ -343,9 +343,9 @@ def generate_test_data(
try:
os.mkdir(path)
except OSError:
print("Creation of the directory %s failed" % path)
print(f"Creation of the directory {path} failed")
else:
print("Successfully created the directory %s " % path)
print(f"Successfully created the directory {path} ")
if input_tensor_only:
return

Просмотреть файл

@ -452,9 +452,9 @@ def generate_test_data(
try:
os.mkdir(path)
except OSError:
print("Creation of the directory %s failed" % path)
print(f"Creation of the directory {path} failed")
else:
print("Successfully created the directory %s " % path)
print(f"Successfully created the directory {path} ")
sess_options = onnxruntime.SessionOptions()
sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL

Просмотреть файл

@ -381,9 +381,9 @@ struct StandaloneCustomOp : Ort::CustomOpBase<StandaloneCustomOp, StandaloneCust
/////////////// structures to test multi-kernls-single-schema ///////////////
struct MulTopKernelFloat {
MulTopKernelFloat(const OrtKernelInfo*){};
MulTopKernelFloat(const OrtKernelInfo*) {};
~MulTopKernelFloat() = default;
void Compute(OrtKernelContext*){};
void Compute(OrtKernelContext*) {};
};
struct MulTopOpFloat : Ort::CustomOpBase<MulTopOpFloat, MulTopKernelFloat> {
@ -397,9 +397,9 @@ struct MulTopOpFloat : Ort::CustomOpBase<MulTopOpFloat, MulTopKernelFloat> {
};
struct MulTopKernelInt32 {
MulTopKernelInt32(const OrtKernelInfo*){};
MulTopKernelInt32(const OrtKernelInfo*) {};
~MulTopKernelInt32() = default;
void Compute(OrtKernelContext*){};
void Compute(OrtKernelContext*) {};
};
struct MulTopOpInt32 : Ort::CustomOpBase<MulTopOpInt32, MulTopKernelInt32> {
@ -413,9 +413,9 @@ struct MulTopOpInt32 : Ort::CustomOpBase<MulTopOpInt32, MulTopKernelInt32> {
};
struct MulTopKernelDouble {
MulTopKernelDouble(const OrtKernelInfo*){};
MulTopKernelDouble(const OrtKernelInfo*) {};
~MulTopKernelDouble() = default;
void Compute(OrtKernelContext*){};
void Compute(OrtKernelContext*) {};
};
// MulTopOpDouble and MulTopOpFloat has input count mismatch
@ -430,9 +430,9 @@ struct MulTopOpDouble : Ort::CustomOpBase<MulTopOpDouble, MulTopKernelDouble> {
};
struct MulTopKernelInt16 {
MulTopKernelInt16(const OrtKernelInfo*){};
MulTopKernelInt16(const OrtKernelInfo*) {};
~MulTopKernelInt16() = default;
void Compute(OrtKernelContext*){};
void Compute(OrtKernelContext*) {};
};
// MulTopOpInt16 and MulTopOpFloat has output count mismatch
@ -448,9 +448,9 @@ struct MulTopOpInt16 : Ort::CustomOpBase<MulTopOpInt16, MulTopKernelInt16> {
// MulTopKernelFloat16 and MulTopOpFloat has input characteristic mismatch
struct MulTopKernelFloat16 {
MulTopKernelFloat16(const OrtKernelInfo*){};
MulTopKernelFloat16(const OrtKernelInfo*) {};
~MulTopKernelFloat16() = default;
void Compute(OrtKernelContext*){};
void Compute(OrtKernelContext*) {};
};
struct MulTopOpFloat16 : Ort::CustomOpBase<MulTopOpFloat16, MulTopKernelFloat16> {

4
onnxruntime/test/testdata/CNTK/gen.py поставляемый
Просмотреть файл

@ -48,10 +48,10 @@ def Save(dir, func, feed, outputs): # noqa: N802
if actual_input_name.startswith(cntk_name):
cntk_to_actual_names[cntk_name] = actual_input_name
if type(feed) is not dict: # noqa: E721
if type(feed) is not dict:
feed = {func.arguments[0]: feed}
if type(outputs) is not dict: # noqa: E721
if type(outputs) is not dict:
outputs = {func.outputs[0]: outputs}
test_data_dir = os.path.join(dir, data_dir)

Просмотреть файл

@ -35,8 +35,7 @@ void AdasumMPI::InitializeVHDDReductionComms(WorkerGroupType worker_group) {
int nearest_power_2 = 1;
int log_size;
for (nearest_power_2 = 1, log_size = 0; (nearest_power_2 << 1) <= size;
nearest_power_2 = (nearest_power_2 << 1), log_size++)
;
nearest_power_2 = (nearest_power_2 << 1), log_size++);
int shift_val;
int level;
reduction_comms_ = std::make_unique<std::vector<MPI_Comm>>();

Просмотреть файл

@ -247,7 +247,7 @@ struct PipelineWorkerState {
struct PipelineWorkerPool {
PipelineWorkerPool() = default;
PipelineWorkerPool(size_t num_workers) : workers(num_workers), worker_states(num_workers){};
PipelineWorkerPool(size_t num_workers) : workers(num_workers), worker_states(num_workers) {};
void Join(size_t worker_id);
void JoinAll();

Просмотреть файл

@ -102,7 +102,7 @@ class OrtTorchFunctionPool final {
void UnRegisterFunctions();
private:
OrtTorchFunctionPool(){};
OrtTorchFunctionPool() {};
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OrtTorchFunctionPool);
void UnRegisterGlobalFunctions();

Просмотреть файл

@ -13,7 +13,7 @@
// See https://docs.python.org/3/c-api/init.html#non-python-created-threads for details.
class GilGuard {
public:
GilGuard() : state_(PyGILState_Ensure()){};
GilGuard() : state_(PyGILState_Ensure()) {};
~GilGuard() { PyGILState_Release(state_); };
private:

Просмотреть файл

@ -95,8 +95,8 @@ class TorchProxy {
std::vector<int64_t>& bw_output_to_input_alias_map);
private:
TorchProxy(){};
~TorchProxy(){};
TorchProxy() {};
~TorchProxy() {};
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TorchProxy);

Просмотреть файл

@ -33,7 +33,7 @@ struct OpDef {
OpDef(const std::string& type, const std::string& domain = kOnnxDomain, const int opset_version = 9)
: type(type),
domain(domain),
opset_version(opset_version){};
opset_version(opset_version) {};
std::string type;
std::string domain;
@ -52,7 +52,7 @@ struct NodeDef {
output_args(output_args),
attributes(attributes),
name(name),
priority(priority){};
priority(priority) {};
NodeDef(const std::string& op_type,
const std::vector<ArgDef>& input_args,
@ -64,7 +64,7 @@ struct NodeDef {
output_args(output_args),
attributes(attributes),
name(name),
priority(priority){};
priority(priority) {};
NodeDef(const OpDef& op_def,
const std::vector<ArgDef>& input_args,

Просмотреть файл

@ -21,7 +21,7 @@ struct LossFunctionInfo {
struct ILossFunction {
virtual GraphAugmenter::GraphDefs operator()(const Graph& graph, const LossFunctionInfo& loss_func_info) = 0;
virtual ~ILossFunction(){};
virtual ~ILossFunction() {};
};
TypeProto* GetSparseTypeProto(const NodeArg* input_arg,

Просмотреть файл

@ -887,7 +887,7 @@ struct PipelineStageNodeGroup {
// the consumer nodes of a particular initializer can be more than one, so we need a vector to store those
// nodes.
std::vector<Node*> nodes;
PipelineStageNodeGroup(const size_t stage, std::vector<Node*>& node_group) : stage_id(stage), nodes(std::move(node_group)){};
PipelineStageNodeGroup(const size_t stage, std::vector<Node*>& node_group) : stage_id(stage), nodes(std::move(node_group)) {};
};
// This function passes through the given initializer across stages specified in node_groups[i].stage_id.

Просмотреть файл

@ -21,7 +21,7 @@ struct OpInfo {
const size_t output_count = 1) : op_type(op_type),
supported_versions(supported_versions),
domain(domain),
output_count(output_count){};
output_count(output_count) {};
std::string op_type;
std::initializer_list<OperatorSetVersion> supported_versions;
@ -53,7 +53,7 @@ const OpInfo where_info = OpInfo("Where", opset_v9);
struct NodeInfo {
NodeInfo(const std::vector<OpInfo>& op_infos,
const bool required = true) : op_infos(op_infos),
required(required){};
required(required) {};
std::vector<OpInfo> op_infos;
bool required;

Просмотреть файл

@ -46,7 +46,7 @@ class TrainingSession : public InferenceSession {
TrainingSession(const SessionOptions& session_options, const Environment& env)
: InferenceSession(session_options, env), is_mixed_precision_enabled_(false) {}
virtual ~TrainingSession(){};
virtual ~TrainingSession() {};
/**
* The training configuration options.
@ -215,11 +215,11 @@ class TrainingSession : public InferenceSession {
// If the edge is unique, i.e. only have one consumer node, or all the edges
// with the same node_arg_name needs to be cut, specify the node_arg_name
// suffices.
CutEdge(std::string edge) : node_arg_name(edge){};
CutEdge(std::string edge) : node_arg_name(edge) {};
// If the edges with same node_arg_name belongs to different cut, i.e. some of its
// consumer node belongs to one partition, and some belongs to another, specify
// the consumer node names which you want to perform the cut on.
CutEdge(std::string edge, std::vector<std::string> nodes) : node_arg_name(edge), consumer_nodes(nodes){};
CutEdge(std::string edge, std::vector<std::string> nodes) : node_arg_name(edge), consumer_nodes(nodes) {};
};
// CutInfo is a group of CutEdges that describes a specific cut that composed of splitting those edges.
typedef std::vector<CutEdge> CutInfo;

Просмотреть файл

@ -60,7 +60,7 @@ class DynamicSettings {
}
private:
DynamicSettings() : onnx_fusion_status_(true){};
DynamicSettings() : onnx_fusion_status_(true) {};
bool onnx_fusion_status_;
};

Просмотреть файл

@ -861,8 +861,7 @@ int main(int argc, char* argv[]) {
OrtParameters ort_params{};
RETURN_IF_FAIL(ParseArguments(argc, argv, params, ort_params));
bool keep_looping = params.debug_break;
while (keep_looping)
;
while (keep_looping);
// setup logger, be noted: LOGS_DEFAULT must be after logging manager initialization.
string default_logger_id{"Default"};

Просмотреть файл

@ -86,36 +86,36 @@ int main(int argc, char* argv[]) {
// setup onnxruntime env
std::vector<FreeDimensionOverride> overrides = {};
SessionOptions so = {
ExecutionMode::ORT_SEQUENTIAL, // execution_mode
ExecutionOrder::DEFAULT, // execution_order
false, // enable_profiling
ORT_TSTR(""), // optimized_model_filepath
true, // enable_mem_pattern
true, // enable_mem_reuse
true, // enable_cpu_mem_arena
ORT_TSTR("onnxruntime_profile_"), // profile_file_prefix
"", // session_logid
-1, // session_log_severity_level
0, // session_log_verbosity_level
5, // max_num_graph_transformation_steps
TransformerLevel::Level1, // graph_optimization_level
{}, // intra_op_param
{}, // inter_op_param
overrides, // free_dimension_overrides
true, // use_per_session_threads
true, // thread_pool_allow_spinning
false, // use_deterministic_compute
{}, // session_configurations
{}, // initializers_to_share_map
ExecutionMode::ORT_SEQUENTIAL, // execution_mode
ExecutionOrder::DEFAULT, // execution_order
false, // enable_profiling
ORT_TSTR(""), // optimized_model_filepath
true, // enable_mem_pattern
true, // enable_mem_reuse
true, // enable_cpu_mem_arena
ORT_TSTR("onnxruntime_profile_"), // profile_file_prefix
"", // session_logid
-1, // session_log_severity_level
0, // session_log_verbosity_level
5, // max_num_graph_transformation_steps
TransformerLevel::Level1, // graph_optimization_level
{}, // intra_op_param
{}, // inter_op_param
overrides, // free_dimension_overrides
true, // use_per_session_threads
true, // thread_pool_allow_spinning
false, // use_deterministic_compute
{}, // session_configurations
{}, // initializers_to_share_map
#if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS)
{}, // external_initializers
{}, // external_initializer_files
{}, // external_initializers
{}, // external_initializer_files
#endif
nullptr, // custom_create_thread_fn
nullptr, // custom_thread_creation_options
nullptr, // custom_join_thread_fn
nullptr, // custom_create_thread_fn
nullptr, // custom_thread_creation_options
nullptr, // custom_join_thread_fn
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS)
{}, // custom_op_libs
{}, // custom_op_libs
#endif
};

Просмотреть файл

@ -98,7 +98,7 @@ class RandomDataSet : public DataSet {
: DataSet(tensor_names),
num_samples_(num_samples),
tensor_shapes_(tensor_shapes),
tensor_types_(tensor_types){};
tensor_types_(tensor_types) {};
virtual ~RandomDataSet() {}
@ -189,7 +189,7 @@ class LossScaler {
min_loss_scale_(min_loss_scale),
max_loss_scale_(max_loss_scale),
loss_scale_(loss_scale),
stable_steps_(0){};
stable_steps_(0) {};
std::string GetLossScaleInputName() const { return loss_scale_input_name_; }

Просмотреть файл

@ -319,7 +319,7 @@ void addObjectMethodsForTraining(py::module& m) {
auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
pool.RegisterForwardRunner(function_address);
#else
ORT_UNUSED_PARAMETER(obj);
ORT_UNUSED_PARAMETER(obj);
#endif
});
m.def("register_backward_runner", [](py::object obj) -> void {
@ -328,7 +328,7 @@ void addObjectMethodsForTraining(py::module& m) {
auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
pool.RegisterBackwardRunner(function_address);
#else
ORT_UNUSED_PARAMETER(obj);
ORT_UNUSED_PARAMETER(obj);
#endif
});
m.def("register_torch_autograd_function", [](std::string function_full_qual_name, py::object obj) -> void {
@ -336,8 +336,8 @@ void addObjectMethodsForTraining(py::module& m) {
auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
pool.RegisterTorchAutogradFunction(function_full_qual_name, obj.ptr());
#else
ORT_UNUSED_PARAMETER(function_full_qual_name);
ORT_UNUSED_PARAMETER(obj);
ORT_UNUSED_PARAMETER(function_full_qual_name);
ORT_UNUSED_PARAMETER(obj);
#endif
});
m.def("register_shape_inference_function", [](std::string function_full_qual_name, py::object obj) -> void {
@ -345,8 +345,8 @@ void addObjectMethodsForTraining(py::module& m) {
auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
pool.RegisterShapeInferenceFunction(function_full_qual_name, obj.ptr());
#else
ORT_UNUSED_PARAMETER(function_full_qual_name);
ORT_UNUSED_PARAMETER(obj);
ORT_UNUSED_PARAMETER(function_full_qual_name);
ORT_UNUSED_PARAMETER(obj);
#endif
});
m.def("get_shape_inference_function", [](std::string function_full_qual_name) -> py::object {
@ -368,8 +368,8 @@ void addObjectMethodsForTraining(py::module& m) {
auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
pool.RegisterInputAliasFunction(function_full_qual_name, obj.ptr());
#else
ORT_UNUSED_PARAMETER(function_full_qual_name);
ORT_UNUSED_PARAMETER(obj);
ORT_UNUSED_PARAMETER(function_full_qual_name);
ORT_UNUSED_PARAMETER(obj);
#endif
});
m.def("register_miscellaneous_const_input", [](py::object obj) -> void {
@ -377,7 +377,7 @@ void addObjectMethodsForTraining(py::module& m) {
auto& pool = onnxruntime::language_interop_ops::torch::OrtTorchFunctionPool::GetInstance();
pool.RegisterMiscellaneousConstInput(obj.ptr());
#else
ORT_UNUSED_PARAMETER(obj);
ORT_UNUSED_PARAMETER(obj);
#endif
});
m.def("unregister_python_functions", []() -> void {
@ -391,14 +391,14 @@ void addObjectMethodsForTraining(py::module& m) {
#ifdef ENABLE_TRAINING_TORCH_INTEROP
return true;
#else
return false;
return false;
#endif
});
m.def("is_triton_enabled", []() -> bool {
#ifdef ENABLE_TRITON
return true;
#else
return false;
return false;
#endif
});
#ifdef ENABLE_TRITON
@ -1036,7 +1036,7 @@ void addObjectMethodsForTraining(py::module& m) {
#ifdef __linux__
return true;
#else
return false;
return false;
#endif
});
#endif

Просмотреть файл

@ -372,7 +372,7 @@ def _gen_bmm_module(
) -> Tuple[str, ModuleType]:
func_name = gen_unique_name("bmm")
kwargs = _mm_configs(dtype, m, n, k, trans_a, trans_b, alpha, func_name)
batch = batch_a if batch_a >= batch_b else batch_b
batch = max(batch_a, batch_b)
kwargs["stride_aq"] = m * k if batch_a == batch else 0
kwargs["stride_bq"] = k * n if batch_b == batch else 0
kwargs["batch"] = batch

Просмотреть файл

@ -74,7 +74,7 @@ def _ortvalues_to_torch_tensor(
return tuple(C.to_aten_ort_device_tensor(ov) for ov in ortvalues)
if not isinstance(ortvalues, C.OrtValueVector):
raise TypeError("ortvalues must be an instance of OrtValueVector not %r." % type(ortvalues))
raise TypeError(f"ortvalues must be an instance of OrtValueVector not {type(ortvalues)!r}.")
res: List[torch.Tensor] = ortvalues.to_dlpacks(_from_dlpack)
bool_indices = ortvalues.bool_tensor_indices()

Просмотреть файл

@ -58,8 +58,8 @@ class PyNodeSharedPointerPool {
}
private:
PyNodeSharedPointerPool(){};
~PyNodeSharedPointerPool(){};
PyNodeSharedPointerPool() {};
~PyNodeSharedPointerPool() {};
PyNodeSharedPointerPool(const PyNodeSharedPointerPool&) = delete;
PyNodeSharedPointerPool& operator=(const PyNodeSharedPointerPool&) = delete;

Просмотреть файл

@ -159,7 +159,7 @@ struct PipelineStageNodeGroup {
// the consumer nodes of a particular initializer can be more than one, so we need a vector to store those
// nodes.
std::vector<Node*> nodes;
PipelineStageNodeGroup(const size_t stage, std::vector<Node*>& node_group) : stage_id(stage), nodes(std::move(node_group)){};
PipelineStageNodeGroup(const size_t stage, std::vector<Node*>& node_group) : stage_id(stage), nodes(std::move(node_group)) {};
};
// This function passes through the given initializer across stages specified in node_groups[i].stage_id.

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше