ort-customops/operators/azure/openai_invokers.cc

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "openai_invokers.hpp"

namespace ort_extensions {

OpenAIAudioToTextInvoker::OpenAIAudioToTextInvoker(const OrtApi& api, const OrtKernelInfo& info)
    : CurlInvoker(api, info) {
  audio_format_ = TryToGetAttributeWithDefault<std::string>(kAudioFormat, "");

  const auto& property_names = RequestPropertyNames();

  const auto find_optional_input = [&property_names](const std::string& property_name) {
    std::optional<size_t> result;
    auto optional_input = std::find_if(property_names.begin(), property_names.end(),
                                       [&property_name](const auto& name) { return name == property_name; });

    if (optional_input != property_names.end()) {
      result = optional_input - property_names.begin();
    }

    return result;
  };

  filename_input_ = find_optional_input("filename");
  model_name_input_ = find_optional_input("model_name");

  // OpenAI audio endpoints require 'file' and 'model'.
  if (!std::any_of(property_names.begin(), property_names.end(),
                   [](const auto& name) { return name == "file"; })) {
    ORTX_CXX_API_THROW("Required 'file' input was not found", ORT_INVALID_ARGUMENT);
  }

  if (ModelName().empty() && !model_name_input_) {
    ORTX_CXX_API_THROW("Required 'model_name' input was not found", ORT_INVALID_ARGUMENT);
  }
}

void OpenAIAudioToTextInvoker::ValidateInputs(const ortc::Variadic& inputs) const {
  // We don't have a way to get the output type from the custom op API.
  // If there's a mismatch it will fail in the Compute when it allocates the output tensor.
  if (OutputNames().size() != 1) {
    ORTX_CXX_API_THROW("Expected single output", ORT_INVALID_ARGUMENT);
  }
}

void OpenAIAudioToTextInvoker::SetupRequest(CurlHandler& curl_handler, const ortc::Variadic& inputs) const {
  // theoretically the filename the content was buffered from. provides the extensions indicating the audio format
  static const std::string fake_filename = "audio." + audio_format_;

  const auto& property_names = RequestPropertyNames();

  const auto get_optional_input =
      [&](const std::optional<size_t>& input_idx, const std::string& default_value, size_t min_size = 1) {
        return (input_idx.has_value() && inputs[*input_idx]->SizeInBytes() > min_size)
                   ? static_cast<const char*>(inputs[*input_idx]->DataRaw())
                   : default_value.c_str();
      };

  // filename_input_ is optional in a model. if it's not present, use a fake filename.
  // if it's present make sure it's not a default empty value. as the filename needs to have an extension of
  // mp3, mp4, mpeg, mpga, m4a, wav, or webm it must be at least 4 characters long.
  const char* filename = get_optional_input(filename_input_, fake_filename, 4);

  curl_handler.AddHeader("Content-Type: multipart/form-data");
  // model name could be input or attribute
  curl_handler.AddFormString("model", get_optional_input(model_name_input_, ModelName()));

  for (size_t ith_input = 1; ith_input < inputs.Size(); ++ith_input) {
    switch (inputs[ith_input]->Type()) {
      case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
        curl_handler.AddFormString(property_names[ith_input].c_str(),
                                   // assumes null terminated.
                                   // might be safer to pass pointer and length and add use CURLFORM_CONTENTSLENGTH
                                   static_cast<const char*>(inputs[ith_input]->DataRaw()));
        break;
      case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
        // only the 'file' input is uint8
        if (property_names[ith_input] != "file") {
          ORTX_CXX_API_THROW("Only the 'file' input should be uint8 data. Invalid input:" + InputNames()[ith_input],
                             ORT_INVALID_ARGUMENT);
        }

        curl_handler.AddFormBuffer(property_names[ith_input].c_str(),
                                   filename,
                                   inputs[ith_input]->DataRaw(),
                                   inputs[ith_input]->SizeInBytes());
        break;
      case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
        // TODO - required to support 'temperature' input.
      default:
        ORTX_CXX_API_THROW("input must be either text or binary", ORT_INVALID_ARGUMENT);
        break;
    }
  }
}

void OpenAIAudioToTextInvoker::ProcessResponse(const std::string& response, ortc::Variadic& outputs) const {
  auto& string_tensor = outputs.AllocateStringTensor(0);
  string_tensor.SetStringOutput(std::vector<std::string>{response}, std::vector<int64_t>{1});
}
}  // namespace ort_extensions
Refactor setup for Azure ops. Add Android support. (#507) * Refactor setup for Azure ops to try and make common things more re-usable, and for the actual ops to simply layer in the specific input/output constraints for that type of request. Currently builds on Linux, Windows (x64 only) and Android Android requires a manual pre-build of openssl and curl. Linux requires a manual pre-install of openssl. Windows currently only works for x64. Other targets need the triplet adjusted. * Address PR comments * Fix could of android build warnings. * Update .gitignore to remove old path * Fix build break from merge 2023-08-08 12:54:30 +03:00			`// Copyright (c) Microsoft Corporation. All rights reserved.`
			`// Licensed under the MIT License.`

			`#include "openai_invokers.hpp"`

			`namespace ort_extensions {`

			`OpenAIAudioToTextInvoker::OpenAIAudioToTextInvoker(const OrtApi& api, const OrtKernelInfo& info)`
			`: CurlInvoker(api, info) {`
			`audio_format_ = TryToGetAttributeWithDefault<std::string>(kAudioFormat, "");`

			`const auto& property_names = RequestPropertyNames();`

			`const auto find_optional_input = [&property_names](const std::string& property_name) {`
			`std::optional<size_t> result;`
			`auto optional_input = std::find_if(property_names.begin(), property_names.end(),`
			`[&property_name](const auto& name) { return name == property_name; });`

			`if (optional_input != property_names.end()) {`
			`result = optional_input - property_names.begin();`
			`}`

			`return result;`
			`};`

			`filename_input_ = find_optional_input("filename");`
Update domain in triton test models (#519) * Update domain in triton test models * Use 'model_name' everywhere. Test py and model/op were inconsistent. 2023-08-12 05:40:21 +03:00			`model_name_input_ = find_optional_input("model_name");`
Refactor setup for Azure ops. Add Android support. (#507) * Refactor setup for Azure ops to try and make common things more re-usable, and for the actual ops to simply layer in the specific input/output constraints for that type of request. Currently builds on Linux, Windows (x64 only) and Android Android requires a manual pre-build of openssl and curl. Linux requires a manual pre-install of openssl. Windows currently only works for x64. Other targets need the triplet adjusted. * Address PR comments * Fix could of android build warnings. * Update .gitignore to remove old path * Fix build break from merge 2023-08-08 12:54:30 +03:00
			`// OpenAI audio endpoints require 'file' and 'model'.`
			`if (!std::any_of(property_names.begin(), property_names.end(),`
			`[](const auto& name) { return name == "file"; })) {`
			`ORTX_CXX_API_THROW("Required 'file' input was not found", ORT_INVALID_ARGUMENT);`
			`}`

			`if (ModelName().empty() && !model_name_input_) {`
Update domain in triton test models (#519) * Update domain in triton test models * Use 'model_name' everywhere. Test py and model/op were inconsistent. 2023-08-12 05:40:21 +03:00			`ORTX_CXX_API_THROW("Required 'model_name' input was not found", ORT_INVALID_ARGUMENT);`
Refactor setup for Azure ops. Add Android support. (#507) * Refactor setup for Azure ops to try and make common things more re-usable, and for the actual ops to simply layer in the specific input/output constraints for that type of request. Currently builds on Linux, Windows (x64 only) and Android Android requires a manual pre-build of openssl and curl. Linux requires a manual pre-install of openssl. Windows currently only works for x64. Other targets need the triplet adjusted. * Address PR comments * Fix could of android build warnings. * Update .gitignore to remove old path * Fix build break from merge 2023-08-08 12:54:30 +03:00			`}`
			`}`

			`void OpenAIAudioToTextInvoker::ValidateInputs(const ortc::Variadic& inputs) const {`
			`// We don't have a way to get the output type from the custom op API.`
			`// If there's a mismatch it will fail in the Compute when it allocates the output tensor.`
			`if (OutputNames().size() != 1) {`
			`ORTX_CXX_API_THROW("Expected single output", ORT_INVALID_ARGUMENT);`
			`}`
			`}`

			`void OpenAIAudioToTextInvoker::SetupRequest(CurlHandler& curl_handler, const ortc::Variadic& inputs) const {`
			`// theoretically the filename the content was buffered from. provides the extensions indicating the audio format`
			`static const std::string fake_filename = "audio." + audio_format_;`

			`const auto& property_names = RequestPropertyNames();`

Make CIs pass with Azure ops enabled by default. (#518) * Make CIs pass with Azure ops enabled by default. 2023-08-12 10:45:59 +03:00			`const auto get_optional_input =`
Refactor setup for Azure ops. Add Android support. (#507) * Refactor setup for Azure ops to try and make common things more re-usable, and for the actual ops to simply layer in the specific input/output constraints for that type of request. Currently builds on Linux, Windows (x64 only) and Android Android requires a manual pre-build of openssl and curl. Linux requires a manual pre-install of openssl. Windows currently only works for x64. Other targets need the triplet adjusted. * Address PR comments * Fix could of android build warnings. * Update .gitignore to remove old path * Fix build break from merge 2023-08-08 12:54:30 +03:00			`[&](const std::optional<size_t>& input_idx, const std::string& default_value, size_t min_size = 1) {`
			`return (input_idx.has_value() && inputs[*input_idx]->SizeInBytes() > min_size)`
			`? static_cast<const char>(inputs[input_idx]->DataRaw())`
			`: default_value.c_str();`
			`};`

			`// filename_input_ is optional in a model. if it's not present, use a fake filename.`
			`// if it's present make sure it's not a default empty value. as the filename needs to have an extension of`
			`// mp3, mp4, mpeg, mpga, m4a, wav, or webm it must be at least 4 characters long.`
			`const char* filename = get_optional_input(filename_input_, fake_filename, 4);`

			`curl_handler.AddHeader("Content-Type: multipart/form-data");`
			`// model name could be input or attribute`
			`curl_handler.AddFormString("model", get_optional_input(model_name_input_, ModelName()));`

			`for (size_t ith_input = 1; ith_input < inputs.Size(); ++ith_input) {`
			`switch (inputs[ith_input]->Type()) {`
			`case ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:`
			`curl_handler.AddFormString(property_names[ith_input].c_str(),`
			`// assumes null terminated.`
			`// might be safer to pass pointer and length and add use CURLFORM_CONTENTSLENGTH`
			`static_cast<const char*>(inputs[ith_input]->DataRaw()));`
			`break;`
			`case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:`
			`// only the 'file' input is uint8`
			`if (property_names[ith_input] != "file") {`
			`ORTX_CXX_API_THROW("Only the 'file' input should be uint8 data. Invalid input:" + InputNames()[ith_input],`
			`ORT_INVALID_ARGUMENT);`
			`}`

			`curl_handler.AddFormBuffer(property_names[ith_input].c_str(),`
			`filename,`
			`inputs[ith_input]->DataRaw(),`
			`inputs[ith_input]->SizeInBytes());`
			`break;`
			`case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:`
			`// TODO - required to support 'temperature' input.`
			`default:`
			`ORTX_CXX_API_THROW("input must be either text or binary", ORT_INVALID_ARGUMENT);`
			`break;`
			`}`
			`}`
			`}`

			`void OpenAIAudioToTextInvoker::ProcessResponse(const std::string& response, ortc::Variadic& outputs) const {`
			`auto& string_tensor = outputs.AllocateStringTensor(0);`
			`string_tensor.SetStringOutput(std::vector<std::string>{response}, std::vector<int64_t>{1});`
			`}`
			`} // namespace ort_extensions`