Refactor CoreMLExecution to C++ bridge class (#21857)

Refactor Objective-C++ class `CoreMLExecution` into existing C++ bridge class `onnxruntime::coreml::Execution`.
2024-09-12 01:05:37 +02:00 · 2024-09-12 01:05:37 +02:00 · d8e64bb529
--- a/onnxruntime/core/providers/coreml/model/model.mm
+++ b/onnxruntime/core/providers/coreml/model/model.mm
@ -30,8 +30,8 @@
 // to manually do this
 asm(".linker_option \"-framework\", \"CoreML\"");

-using namespace onnxruntime;
-using namespace onnxruntime::coreml;
+namespace onnxruntime {
+namespace coreml {

 namespace {
 /**
@ -247,213 +247,6 @@ Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buff
 }
 }  // namespace

-NS_ASSUME_NONNULL_BEGIN
-
-// Execution for a CoreML model, it performs
-// 1. Compile the model by given path for execution
-// 2. Predict using given OnnxTensorFeatureProvider input and copy the output data back ORT
-// 3. The compiled model will be removed in dealloc or removed using cleanup function
-@interface CoreMLExecution : NSObject {
-  NSString* coreml_model_path_;
-  NSString* compiled_model_path_;
-  const logging::Logger* logger_;
-  uint32_t coreml_flags_;
-}
-
- (instancetype)initWithPath:(const std::string&)path
-                      logger:(const logging::Logger&)logger
-                coreml_flags:(uint32_t)coreml_flags;
- (void)cleanup;
- (void)dealloc;
- (Status)loadModel API_AVAILABLE_COREML3;
- (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
-                  outputs:(const std::unordered_map<std::string, OnnxTensorInfo>&)outputs
-    getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn
-    API_AVAILABLE_COREML3;
-
-@property(nullable) MLModel* model API_AVAILABLE_COREML3;
-
-@end
-
-@implementation CoreMLExecution
-
- (instancetype)initWithPath:(const std::string&)path
-                      logger:(const logging::Logger&)logger
-                coreml_flags:(uint32_t)coreml_flags {
-  if (self = [super init]) {
-    coreml_model_path_ = util::Utf8StringToNSString(path.c_str());
-    logger_ = &logger;
-    coreml_flags_ = coreml_flags;
-  }
-  return self;
-}
-
- (void)cleanup {
-  NSError* error = nil;
-  if (compiled_model_path_ != nil) {
-    [[NSFileManager defaultManager] removeItemAtPath:compiled_model_path_ error:&error];
-    if (error != nil) {
-      LOGS(*logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path_ UTF8String]
-                            << ", error message: " << [[error localizedDescription] UTF8String];
-    }
-    compiled_model_path_ = nil;
-  }
-
-#if !defined(NDEBUG)
-  std::string path_override = Env::Default().GetEnvironmentVar(util::kOverrideModelOutputDirectoryEnvVar);
-  if (!path_override.empty()) {
-    // don't cleanup
-    coreml_model_path_ = nil;
-  }
-#endif
-
-  if (coreml_model_path_ != nil) {
-    error = nil;
-    [[NSFileManager defaultManager] removeItemAtPath:coreml_model_path_ error:&error];
-    if (error != nil) {
-      LOGS(*logger_, ERROR) << "Failed cleaning up the coreml model: " << [coreml_model_path_ UTF8String]
-                            << ", error message: " << [[error localizedDescription] UTF8String];
-    }
-    coreml_model_path_ = nil;
-  }
-}
-
- (void)dealloc {
-  [self cleanup];
-}
-
- (Status)loadModel {
-  NSURL* modelUrl = [NSURL URLWithString:coreml_model_path_];
-  if (modelUrl == nil) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path");
-  }
-
-  // TODO: Update this to version with callback handler as the API used here is deprecated.
-  // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl
-  // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the
-  // background. We will have to check for completion in `predict` and block until it is done.
-  NSError* error = nil;
-  NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error];
-
-  if (error != nil) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ",
-                           [[error localizedDescription] UTF8String]);
-  }
-
-  compiled_model_path_ = [compileUrl path];
-
-  MLModelConfiguration* config = [MLModelConfiguration alloc];
-  config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
-                            ? MLComputeUnitsCPUOnly
-                            : MLComputeUnitsAll;
-  _model = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
-
-  if (error != nil || _model == nil) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel",
-                           (error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
-  }
-
-  return Status::OK();
-}
-
- (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
-                  outputs:(const std::unordered_map<std::string, OnnxTensorInfo>&)outputs
-    getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)get_output_tensor_mutable_raw_data_fn {
-  Status status = Status::OK();
-  ORT_TRY {
-    if (_model == nil) {
-      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Model is not loaded");
-    }
-
-    id<MLFeatureProvider> input_features;
-    InlinedVector<std::unique_ptr<int32_t[]>> conversion_buffers;
-    ORT_RETURN_IF_ERROR(CreateInputFeatureProvider(inputs, *logger_, &input_features, conversion_buffers));
-
-    MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
-    NSError* error = nil;
-    id<MLFeatureProvider> output_features = [_model predictionFromFeatures:input_features
-                                                                   options:options
-                                                                     error:&error];
-
-    if (error != nil) {
-      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error executing model: ",
-                             [[error localizedDescription] UTF8String]);
-    }
-
-    for (const auto& [output_name, output_tensor_info] : outputs) {
-      MLFeatureValue* output_value =
-          [output_features featureValueForName:util::Utf8StringToNSString(output_name.c_str())];
-
-      if (output_value == nil) {
-        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name);
-      }
-
-      MLMultiArray* data = [output_value multiArrayValue];
-
-      const auto coreml_static_output_shape = [data]() {
-        InlinedVector<int64_t> result;
-        result.reserve(data.shape.count);
-        for (NSNumber* dim in data.shape) {
-          const auto dim_value = dim.longLongValue;
-          result.push_back(dim_value);
-        }
-        return result;
-      }();
-
-      const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape,
-                                                            *logger_);
-
-      void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type,
-                                                                  static_output_shape);
-
-      if (const size_t num_elements = data.count; num_elements > 0) {
-        if (const auto shape_size = ShapeSize(static_output_shape);
-            shape_size < 0 || num_elements != static_cast<size_t>(shape_size)) {
-          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                                 "CoreML MLMultiArray count (", num_elements, ") and shape size (", shape_size,
-                                 ") do not match");
-        }
-
-        // support a non-contiguous array, provided only one dimension is not contiguous
-        int64_t num_blocks = 0;
-        int64_t block_size = 0;
-        int64_t stride = 0;
-
-        ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, num_blocks, block_size, stride));
-
-        __block Status copy_status;
-        const auto* tensor_info = &output_tensor_info;
-        // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
-        if (@available(macOS 12.3, iOS 15.4, *)) {
-          [data getBytesWithHandler:^(const void* bytes, NSInteger size) {
-            copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data,
-                                                 num_blocks, block_size, stride, tensor_info);
-          }];
-        } else {
-          copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data,
-                                               num_blocks, block_size, stride, tensor_info);
-        }
-
-        ORT_RETURN_IF_ERROR(copy_status);
-      }
-    }
-  }
-  ORT_CATCH(const std::exception& e) {
-    ORT_HANDLE_EXCEPTION([&]() {
-      status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Exception: ", e.what());
-    });
-  }
-
-  return status;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
-
-namespace onnxruntime {
-namespace coreml {
-
 Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array,
                               int64_t& num_blocks, int64_t& block_size, int64_t& stride) {
  const auto* shape = array.shape;
@ -498,11 +291,14 @@ Status GetMLMultiArrayCopyInfo(const MLMultiArray* _Nonnull array,
 }

 // Internal Execution class
-// This class will bridge Model (c++) with CoreMLExecution (objective c++)
+// This class is part of the model class and handles the calls into CoreML. Specifically, it performs
+// 1. Compile the model by given path for execution
+// 2. Predict using given OnnxTensorFeatureProvider input and copy the output data back ORT
+// 3. The compiled model will be removed in dealloc or removed using cleanup function
 class Execution {
 public:
  Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags);
-  ~Execution() {};
+  ~Execution();

  Status LoadModel();
  Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
@ -510,30 +306,97 @@ class Execution {
                 const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn);

 private:
-  bool model_loaded{false};
-  CoreMLExecution* execution_;
+  void cleanup();
+  NSString* coreml_model_path_{nil};
+  NSString* compiled_model_path_{nil};
+  const logging::Logger& logger_;
+  uint32_t coreml_flags_{0};
+  MLModel* model_{nil};
 };

-Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags) {
+Execution::Execution(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags)
+    : logger_(logger),
+      coreml_flags_(coreml_flags) {
  @autoreleasepool {
-    execution_ = [[CoreMLExecution alloc] initWithPath:path
-                                                logger:logger
-                                          coreml_flags:coreml_flags];
+    coreml_model_path_ = util::Utf8StringToNSString(path.c_str());
+  }
+}
+
+Execution::~Execution() {
+  @autoreleasepool {
+    cleanup();
+  }
+}
+
+void Execution::cleanup() {
+  NSError* error = nil;
+  if (compiled_model_path_ != nil) {
+    [[NSFileManager defaultManager] removeItemAtPath:compiled_model_path_ error:&error];
+    if (error != nil) {
+      LOGS(logger_, ERROR) << "Failed cleaning up the compiled model: " << [compiled_model_path_ UTF8String]
+                           << ", error message: " << [[error localizedDescription] UTF8String];
+    }
+    compiled_model_path_ = nil;
+  }
+
+#if !defined(NDEBUG)
+  std::string path_override = Env::Default().GetEnvironmentVar(util::kOverrideModelOutputDirectoryEnvVar);
+  if (!path_override.empty()) {
+    // don't cleanup
+    coreml_model_path_ = nil;
+  }
+#endif
+
+  if (coreml_model_path_ != nil) {
+    error = nil;
+    [[NSFileManager defaultManager] removeItemAtPath:coreml_model_path_ error:&error];
+    if (error != nil) {
+      LOGS(logger_, ERROR) << "Failed cleaning up the coreml model: " << [coreml_model_path_ UTF8String]
+                           << ", error message: " << [[error localizedDescription] UTF8String];
+    }
+    coreml_model_path_ = nil;
  }
 }

 Status Execution::LoadModel() {
-  if (model_loaded) {
+  if (model_ != nil) {
    return Status::OK();
  }

  if (HAS_COREML3_OR_LATER) {
-    Status status{};
    @autoreleasepool {
-      status = [execution_ loadModel];
+      NSError* error = nil;
+
+      NSURL* modelUrl = [NSURL URLWithString:coreml_model_path_];
+      if (modelUrl == nil) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path");
+      }
+
+      // TODO: Update this to version with callback handler as the API used here is deprecated.
+      // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl
+      // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the
+      // background. We will have to check for completion in `predict` and block until it is done.
+      NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error];
+      if (error != nil) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error compiling model: ",
+                               [[error localizedDescription] UTF8String]);
+      }
+
+      compiled_model_path_ = [compileUrl path];
+
+      MLModelConfiguration* config = [MLModelConfiguration alloc];
+      config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
+                                ? MLComputeUnitsCPUOnly
+                                : MLComputeUnitsAll;
+      model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
+
+      if (error != nil || model_ == nil) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create MLModel",
+                               (error != nil) ? MakeString(", error: ", [[error localizedDescription] UTF8String]) : "");
+      }
+
+      return Status::OK();
    }
-    model_loaded = status.IsOK();
-    return status;
  }

  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::LoadModel requires macos 10.15+ or ios 13+");
@ -542,13 +405,94 @@ Status Execution::LoadModel() {
 Status Execution::Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
                          const std::unordered_map<std::string, OnnxTensorInfo>& outputs,
                          const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn) {
-  ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel");
-
  if (HAS_COREML3_OR_LATER) {
    @autoreleasepool {
-      return [execution_ predict:inputs
-                         outputs:outputs
-           getOutputTensorDataFn:get_output_tensor_mutable_raw_data_fn];
+      Status status = Status::OK();
+      ORT_TRY {
+        if (model_ == nil) {
+          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Model is not loaded");
+        }
+
+        id<MLFeatureProvider> input_features;
+        InlinedVector<std::unique_ptr<int32_t[]>> conversion_buffers;
+        ORT_RETURN_IF_ERROR(CreateInputFeatureProvider(inputs, logger_, &input_features, conversion_buffers));
+
+        MLPredictionOptions* options = [[MLPredictionOptions alloc] init];
+        NSError* error = nil;
+        id<MLFeatureProvider> output_features = [model_ predictionFromFeatures:input_features
+                                                                       options:options
+                                                                         error:&error];
+
+        if (error != nil) {
+          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Error executing model: ",
+                                 [[error localizedDescription] UTF8String]);
+        }
+
+        for (const auto& [output_name, output_tensor_info] : outputs) {
+          MLFeatureValue* output_value =
+              [output_features featureValueForName:util::Utf8StringToNSString(output_name.c_str())];
+
+          if (output_value == nil) {
+            return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "output_features has no value for ", output_name);
+          }
+
+          MLMultiArray* data = [output_value multiArrayValue];
+
+          const auto coreml_static_output_shape = [data]() {
+            InlinedVector<int64_t> result;
+            result.reserve(data.shape.count);
+            for (NSNumber* dim in data.shape) {
+              const auto dim_value = dim.longLongValue;
+              result.push_back(dim_value);
+            }
+            return result;
+          }();
+
+          const auto static_output_shape = GetStaticOutputShape(output_tensor_info.shape, coreml_static_output_shape,
+                                                                logger_);
+
+          void* output_buffer = get_output_tensor_mutable_raw_data_fn(output_name, output_tensor_info.data_type,
+                                                                      static_output_shape);
+
+          if (const size_t num_elements = data.count; num_elements > 0) {
+            if (const auto shape_size = ShapeSize(static_output_shape);
+                shape_size < 0 || num_elements != static_cast<size_t>(shape_size)) {
+              return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+                                     "CoreML MLMultiArray count (", num_elements, ") and shape size (", shape_size,
+                                     ") do not match");
+            }
+
+            // support a non-contiguous array, provided only one dimension is not contiguous
+            int64_t num_blocks = 0;
+            int64_t block_size = 0;
+            int64_t stride = 0;
+
+            ORT_RETURN_IF_ERROR(GetMLMultiArrayCopyInfo(data, num_blocks, block_size, stride));
+
+            __block Status copy_status;
+            const auto* tensor_info = &output_tensor_info;
+            // `getBytesWithHandler` replaces deprecated `.dataPointer` on new versions
+            if (@available(macOS 12.3, iOS 15.4, *)) {
+              [data getBytesWithHandler:^(const void* bytes, NSInteger size) {
+                copy_status = CopyMLMultiArrayBuffer(bytes, output_buffer, data,
+                                                     num_blocks, block_size, stride, tensor_info);
+              }];
+            } else {
+              copy_status = CopyMLMultiArrayBuffer(data.dataPointer, output_buffer, data,
+                                                   num_blocks, block_size, stride, tensor_info);
+            }
+
+            ORT_RETURN_IF_ERROR(copy_status);
+          }
+        }
+      }
+      ORT_CATCH(const std::exception& e) {
+        ORT_HANDLE_EXCEPTION([&]() {
+          status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Exception: ", e.what());
+        });
+      }
+
+      return status;
    }
  }