Refactor CoreMLExecution to C++ bridge class (#21857)

Refactor Objective-C++ class `CoreMLExecution` into existing C++ bridge class `onnxruntime::coreml::Execution`.
This commit is contained in:
Lennart Hannink 2024-09-12 01:05:37 +02:00 коммит произвёл GitHub
Родитель 0309c5f02f
Коммит d8e64bb529
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
1 изменённых файлов: 171 добавлений и 227 удалений

Просмотреть файл

@ -30,8 +30,8 @@
// to manually do this
asm(".linker_option \"-framework\", \"CoreML\"");
using namespace onnxruntime;
using namespace onnxruntime::coreml;
namespace onnxruntime {
namespace coreml {
namespace {
/**
@ -247,213 +247,6 @@ Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buff
}
} // namespace
NS_ASSUME_NONNULL_BEGIN
// Execution for a CoreML model, it performs
// 1. Compile the model by given path for execution
// 2. Predict using given OnnxTensorFeatureProvider input and copy the output data back ORT
// 3. The compiled model will be removed in dealloc or removed using cleanup function
@interface CoreMLExecution : NSObject {
NSString* coreml_model_path_;
NSString* compiled_model_path_;
const logging::Logger* logger_;
uint32_t coreml_flags_;
}
- (instancetype)initWithPath:(const std::string&)path
logger:(const logging::Logger&)logger
coreml_flags:(uint32_t)coreml_flags;
- (void)cleanup;
- (void)dealloc;
- (Status)loadModel API_AVAILABLE_COREML3;
- (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
outputs:(const std::unordered_map<std::string, OnnxTensorInfo>&)outputs
getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn
API_AVAILABLE_COREML3;
@property(nullable) MLModel* model API_AVAILABLE_COREML3;
@end
@implementation CoreMLExecution
- (instancetype)initWithPath:(const std::string&)path
logger:(const logging::Logger&)logger
coreml_flags:(uint32_t)coreml_flags {
if (self = [super init]) {
coreml_model_path_ = util::Utf8StringToNSString(path.c_str());
logger_ = &logger;
coreml_flags_ = coreml_flags;
}
return self;
}
- (void)cleanup {
NSError* error = nil;
if (compiled_model_path_ != nil) {
[[NSFileManager defaultManager] removeItemAtPath:compiled_model_path_ error:&error];
if (error != nil) {
LOGS(*logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path_ UTF8String]
<< ", error message: " << [[error localizedDescription] UTF8String];
}
compiled_model_path_ = nil;
}
#if !defined(NDEBUG)
std::string path_override = Env::Default().GetEnvironmentVar(util::kOverrideModelOutputDirectoryEnvVar);
if (!path_override.empty()) {
// don't cleanup
coreml_model_path_ = nil;
}
#endif
if (coreml_model_path_ != nil) {
error = nil;
[[NSFileManager defaultManager] removeItemAtPath:coreml_model_path_ error:&error];
if (error != nil) {
LOGS(*logger_, ERROR) << "Failed cleaning up the coreml model: " << [coreml_model_path_ UTF8String]
<< ", error message: " << [[error localizedDescription] UTF8String];
}
coreml_model_path_ = nil;
}
}
- (void)dealloc {
[self cleanup];
}
- (Status)loadModel {
NSURL* modelUrl = [NSURL URLWithString:coreml_model_path_];
if (modelUrl == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path");
}
// TODO: Update this to version with callback handler as the API used here is deprecated.
// https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl
// As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the
// background. We will have to check for completion in `predict` and block until it is done.
NSError* error = nil;
NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error];
if (error != nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ",
[[error localizedDescription] UTF8String]);
}
compiled_model_path_ = [compileUrl path];
MLModelConfiguration* config = [MLModelConfiguration alloc];
config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
? MLComputeUnitsCPUOnly
: MLComputeUnitsAll;
_model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
if (error != nil || _model == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel",
(error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
}
return Status::OK();
}
- (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
outputs:(const std::unordered_map<std::string, OnnxTensorInfo>&)outputs
getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn {
Status status = Status::OK();
ORT_TRY {
if (_model == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Model is not loaded");
}
id<MLFeatureProvider> input_features;
InlinedVector<std::unique_ptr<int32_t[]>> conversion_buffers;
ORT_RETURN_IF_ERROR(CreateInputFeatureProvider(inputs, *logger_, &input_features, conversion_buffers));
MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
NSError* error = nil;
id<MLFeatureProvider> output_features = [_model predictionFromFeatures:input_features
options:options
error:&error];
if (error != nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error executing model: ",
[[error localizedDescription] UTF8String]);
}
for (const auto& [output_name, output_tensor_info] : outputs) {
MLFeatureValue* output_value =
[output_features featureValueForName:util::Utf8StringToNSString(output_name.c_str())];
if (output_value == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name);
}
MLMultiArray* data = [output_value multiArrayValue];
const auto coreml_static_output_shape = [data]() {
InlinedVector<int64_t> result;
result.reserve(data.shape.count);
for (NSNumber* dim in data.shape) {
const auto dim_value = dim.longLongValue;
result.push_back(dim_value);
}
return result;
}();
const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape,
*logger_);
void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type,
static_output_shape);
if (const size_t num_elements = data.count; num_elements > 0) {
if (const auto shape_size = ShapeSize(static_output_shape);
shape_size < 0 || num_elements != static_cast<size_t>(shape_size)) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"CoreML MLMultiArray count (", num_elements, ") and shape size (", shape_size,
") do not match");
}
// support a non-contiguous array, provided only one dimension is not contiguous
int64_t num_blocks = 0;
int64_t block_size = 0;
int64_t stride = 0;
ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, num_blocks, block_size, stride));
__block Status copy_status;
const auto* tensor_info = &output_tensor_info;
// `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
if (@available(macOS 12.3, iOS 15.4, *)) {
[data getBytesWithHandler:^(const void* bytes, NSInteger size) {
copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data,
num_blocks, block_size, stride, tensor_info);
}];
} else {
copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data,
num_blocks, block_size, stride, tensor_info);
}
ORT_RETURN_IF_ERROR(copy_status);
}
}
}
ORT_CATCH(const std::exception& e) {
ORT_HANDLE_EXCEPTION([&]() {
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Exception: ", e.what());
});
}
return status;
}
@end
NS_ASSUME_NONNULL_END
namespace onnxruntime {
namespace coreml {
Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array,
int64_t& num_blocks, int64_t& block_size, int64_t& stride) {
const auto* shape = array.shape;
@ -498,11 +291,14 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array,
}
// Internal Execution class
// This class will bridge Model (c++) with CoreMLExecution (objective c++)
// This class is part of the model class and handles the calls into CoreML. Specifically, it performs
// 1. Compile the model by given path for execution
// 2. Predict using given OnnxTensorFeatureProvider input and copy the output data back ORT
// 3. The compiled model will be removed in dealloc or removed using cleanup function
class Execution {
public:
Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags);
~Execution() {};
~Execution();
Status LoadModel();
Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
@ -510,30 +306,97 @@ class Execution {
const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn);
private:
bool model_loaded{false};
CoreMLExecution* execution_;
void cleanup();
NSString* coreml_model_path_{nil};
NSString* compiled_model_path_{nil};
const logging::Logger& logger_;
uint32_t coreml_flags_{0};
MLModel* model_{nil};
};
Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags) {
Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags)
: logger_(logger),
coreml_flags_(coreml_flags) {
@autoreleasepool {
execution_ = [[CoreMLExecution alloc] initWithPath:path
logger:logger
coreml_flags:coreml_flags];
coreml_model_path_ = util::Utf8StringToNSString(path.c_str());
}
}
Execution::~Execution() {
@autoreleasepool {
cleanup();
}
}
void Execution::cleanup() {
NSError* error = nil;
if (compiled_model_path_ != nil) {
[[NSFileManager defaultManager] removeItemAtPath:compiled_model_path_ error:&error];
if (error != nil) {
LOGS(logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path_ UTF8String]
<< ", error message: " << [[error localizedDescription] UTF8String];
}
compiled_model_path_ = nil;
}
#if !defined(NDEBUG)
std::string path_override = Env::Default().GetEnvironmentVar(util::kOverrideModelOutputDirectoryEnvVar);
if (!path_override.empty()) {
// don't cleanup
coreml_model_path_ = nil;
}
#endif
if (coreml_model_path_ != nil) {
error = nil;
[[NSFileManager defaultManager] removeItemAtPath:coreml_model_path_ error:&error];
if (error != nil) {
LOGS(logger_, ERROR) << "Failed cleaning up the coreml model: " << [coreml_model_path_ UTF8String]
<< ", error message: " << [[error localizedDescription] UTF8String];
}
coreml_model_path_ = nil;
}
}
Status Execution::LoadModel() {
if (model_loaded) {
if (model_ != nil) {
return Status::OK();
}
if (HAS_COREML3_OR_LATER) {
Status status{};
@autoreleasepool {
status = [execution_ loadModel];
NSError* error = nil;
NSURL* modelUrl = [NSURL URLWithString:coreml_model_path_];
if (modelUrl == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path");
}
// TODO: Update this to version with callback handler as the API used here is deprecated.
// https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl
// As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the
// background. We will have to check for completion in `predict` and block until it is done.
NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error];
if (error != nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ",
[[error localizedDescription] UTF8String]);
}
compiled_model_path_ = [compileUrl path];
MLModelConfiguration* config = [MLModelConfiguration alloc];
config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
? MLComputeUnitsCPUOnly
: MLComputeUnitsAll;
model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
if (error != nil || model_ == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel",
(error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
}
return Status::OK();
}
model_loaded = status.IsOK();
return status;
}
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+");
@ -542,13 +405,94 @@ Status Execution::LoadModel() {
Status Execution::Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
const std::unordered_map<std::string, OnnxTensorInfo>& outputs,
const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn) {
ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel");
if (HAS_COREML3_OR_LATER) {
@autoreleasepool {
return [execution_ predict:inputs
outputs:outputs
getOutputTensorDataFn:get_output_tensor_mutable_raw_data_fn];
Status status = Status::OK();
ORT_TRY {
if (model_ == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Model is not loaded");
}
id<MLFeatureProvider> input_features;
InlinedVector<std::unique_ptr<int32_t[]>> conversion_buffers;
ORT_RETURN_IF_ERROR(CreateInputFeatureProvider(inputs, logger_, &input_features, conversion_buffers));
MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
NSError* error = nil;
id<MLFeatureProvider> output_features = [model_ predictionFromFeatures:input_features
options:options
error:&error];
if (error != nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error executing model: ",
[[error localizedDescription] UTF8String]);
}
for (const auto& [output_name, output_tensor_info] : outputs) {
MLFeatureValue* output_value =
[output_features featureValueForName:util::Utf8StringToNSString(output_name.c_str())];
if (output_value == nil) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name);
}
MLMultiArray* data = [output_value multiArrayValue];
const auto coreml_static_output_shape = [data]() {
InlinedVector<int64_t> result;
result.reserve(data.shape.count);
for (NSNumber* dim in data.shape) {
const auto dim_value = dim.longLongValue;
result.push_back(dim_value);
}
return result;
}();
const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape,
logger_);
void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type,
static_output_shape);
if (const size_t num_elements = data.count; num_elements > 0) {
if (const auto shape_size = ShapeSize(static_output_shape);
shape_size < 0 || num_elements != static_cast<size_t>(shape_size)) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"CoreML MLMultiArray count (", num_elements, ") and shape size (", shape_size,
") do not match");
}
// support a non-contiguous array, provided only one dimension is not contiguous
int64_t num_blocks = 0;
int64_t block_size = 0;
int64_t stride = 0;
ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, num_blocks, block_size, stride));
__block Status copy_status;
const auto* tensor_info = &output_tensor_info;
// `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
if (@available(macOS 12.3, iOS 15.4, *)) {
[data getBytesWithHandler:^(const void* bytes, NSInteger size) {
copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data,
num_blocks, block_size, stride, tensor_info);
}];
} else {
copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data,
num_blocks, block_size, stride, tensor_info);
}
ORT_RETURN_IF_ERROR(copy_status);
}
}
}
ORT_CATCH(const std::exception& e) {
ORT_HANDLE_EXCEPTION([&]() {
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Exception: ", e.what());
});
}
return status;
}
}