Merge branch 'main' into fix-transformers_and_bert-arguments

2024-09-05 11:58:21 -07:00 · 2024-09-05 11:58:21 -07:00 · ed09a5a0ab
--- a/.pipelines/ci.yml
+++ b/.pipelines/ci.yml
@ -197,7 +197,7 @@ stages:
    # compiled as only one operator selected.
    - bash: |
        set -e -x -u
-        ./build.sh -DOCOS_ENABLE_C_API=ON
+        ./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
        cd out/Linux/RelWithDebInfo
        ctest -C RelWithDebInfo --output-on-failure
      displayName: Build ort-extensions with API enabled and run tests
@ -281,7 +281,7 @@ stages:
    # compiled as only one operator selected.
    - bash: |
        set -e -x -u
-        ./build.sh -DOCOS_ENABLE_C_API=ON
+        ./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
        cd out/Darwin/RelWithDebInfo
        ctest -C RelWithDebInfo --output-on-failure
      displayName: Build ort-extensions with API enabled and run tests
@ -431,7 +431,7 @@ stages:

    steps:
    - script: |
-        call .\build.bat -DOCOS_ENABLE_C_API=ON
+        call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
        cd out\Windows
        ctest -C RelWithDebInfo --output-on-failure
      displayName: Build ort-extensions with API enabled and run tests
--- a/.pyproject/cmdclass.py
+++ b/.pyproject/cmdclass.py
@ -212,6 +212,9 @@ class CmdBuildCMakeExt(_build_ext):
                '-DOCOS_ENABLE_VISION=OFF']

        if self.pp_api:
+            if not self.no_opencv:
+                raise RuntimeError(
+                    "Cannot enable PP C API Python Wrapper without disabling OpenCV.")
            cmake_args += ['-DOCOS_ENABLE_C_API=ON']

        if self.no_azure is not None:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -206,7 +206,6 @@ if(NOT PROJECT_IS_TOP_LEVEL AND ONNXRUNTIME_ROOT)
  set(_ONNXRUNTIME_EMBEDDED TRUE)
 endif()

-
 if (OCOS_ENABLE_SELECTED_OPLIST OR OCOS_BUILD_PRESET)
  disable_all_operators()
  if(OCOS_ENABLE_SELECTED_OPLIST)
@ -737,9 +736,12 @@ if(OCOS_ENABLE_C_API)
    file(GLOB audio_TARGET_SRC "shared/api/c_api_feature_extraction.*" "shared/api/speech_*")
    list(APPEND _TARGET_LIB_SRC ${audio_TARGET_SRC})
  endif()
-  if(OCOS_ENABLE_CV2)
+  if(OCOS_ENABLE_DLIB)
+    include(ext_imgcodecs)
    file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
    list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
+    target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
+    target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY} ${ZLIB_LIBRARY})
  endif()
 endif()

@ -852,8 +854,8 @@ target_link_libraries(ortcustomops PUBLIC ocos_operators)
 if(OCOS_BUILD_SHARED_LIB)
  file(GLOB shared_TARGET_SRC "shared/*.cc" "shared/*.h")
  if (OCOS_ENABLE_C_API)
-    if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_CV2 OR NOT OCOS_ENABLE_AUDIO)
-      message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, CV2 and AUDIO to be enabled.")
+    if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_AUDIO)
+      message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, AUDIO to be enabled.")
    endif()
    list(APPEND shared_TARGET_SRC "shared/extensions_c.def")
  else()
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -5,11 +5,3 @@ recursive-include include *.*
 recursive-include operators *.*
 recursive-include pyop *.*
 recursive-include shared *.*
-prune ci_build
-prune docs
-prune test
-prune _subbuild
-prune out
-exclude *.bat
-exclude *.yaml
-exclude *.git*
--- a/build.sh
+++ b/build.sh
@ -1,7 +1,7 @@
 #!/bin/bash

 # The example build script to build the source in Linux-like platform
-set -e -x -u
+set -e -u

 cuda_arch=''
 if [[ $@ == *"DOCOS_USE_CUDA=ON"* && $@ != *"DCMAKE_CUDA_ARCHITECTURES"* ]]; then
--- a/cmake/ext_imgcodecs.cmake
+++ b/cmake/ext_imgcodecs.cmake
@ -0,0 +1,131 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+set(_IMGCODEC_ROOT_DIR ${dlib_SOURCE_DIR}/dlib/external)
+
+# ----------------------------------------------------------------------------
+#  project libpng
+#
+# ----------------------------------------------------------------------------
+set (PNG_LIBRARY "libpng_static_c")
+set (libPNG_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/libpng)
+set (zlib_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/zlib)
+
+if(NOT WIN32)
+  find_library(M_LIBRARY
+    NAMES m
+    PATHS /usr/lib /usr/local/lib
+  )
+  if(NOT M_LIBRARY)
+    message(STATUS "math lib 'libm' not found; floating point support disabled")
+  endif()
+else()
+  # not needed on windows
+  set(M_LIBRARY "")
+endif()
+
+set(lib_srcs
+   ${libPNG_SOURCE_DIR}/arm/arm_init.c
+   ${libPNG_SOURCE_DIR}/arm/filter_neon_intrinsics.c
+   ${libPNG_SOURCE_DIR}/arm/palette_neon_intrinsics.c
+   ${libPNG_SOURCE_DIR}//png.c
+   ${libPNG_SOURCE_DIR}//pngerror.c
+   ${libPNG_SOURCE_DIR}//pngget.c
+   ${libPNG_SOURCE_DIR}//pngmem.c
+   ${libPNG_SOURCE_DIR}//pngpread.c
+   ${libPNG_SOURCE_DIR}//pngread.c
+   ${libPNG_SOURCE_DIR}//pngrio.c
+   ${libPNG_SOURCE_DIR}//pngrtran.c
+   ${libPNG_SOURCE_DIR}//pngrutil.c
+   ${libPNG_SOURCE_DIR}//pngset.c
+   ${libPNG_SOURCE_DIR}//pngtrans.c
+   ${libPNG_SOURCE_DIR}//pngwio.c
+   ${libPNG_SOURCE_DIR}//pngwrite.c
+   ${libPNG_SOURCE_DIR}//pngwtran.c
+   ${libPNG_SOURCE_DIR}//pngwutil.c
+   ${zlib_SOURCE_DIR}/adler32.c
+   ${zlib_SOURCE_DIR}/compress.c
+   ${zlib_SOURCE_DIR}/crc32.c
+   ${zlib_SOURCE_DIR}/deflate.c
+   ${zlib_SOURCE_DIR}/gzclose.c
+   ${zlib_SOURCE_DIR}/gzlib.c
+   ${zlib_SOURCE_DIR}/gzread.c
+   ${zlib_SOURCE_DIR}/gzwrite.c
+   ${zlib_SOURCE_DIR}/infback.c
+   ${zlib_SOURCE_DIR}/inffast.c
+   ${zlib_SOURCE_DIR}/inflate.c
+   ${zlib_SOURCE_DIR}/inftrees.c
+   ${zlib_SOURCE_DIR}/trees.c
+   ${zlib_SOURCE_DIR}/uncompr.c
+   ${zlib_SOURCE_DIR}/zutil.c
+)
+
+add_library(${PNG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs})
+target_include_directories(${PNG_LIBRARY} BEFORE PRIVATE ${zlib_SOURCE_DIR})
+
+if(MSVC)
+  target_compile_definitions(${PNG_LIBRARY} PRIVATE -D_CRT_SECURE_NO_DEPRECATE)
+else()
+  target_compile_options(${PNG_LIBRARY} PRIVATE -Wno-deprecated-non-prototype)
+endif()
+
+# ----------------------------------------------------------------------------
+#  project libjpeg
+#
+# ----------------------------------------------------------------------------
+set(JPEG_LIBRARY "libjpeg_static_c")
+set(libJPEG_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/libjpeg)
+
+set(lib_srcs
+  ${libJPEG_SOURCE_DIR}/jaricom.c
+  ${libJPEG_SOURCE_DIR}/jcapimin.c
+  ${libJPEG_SOURCE_DIR}/jcapistd.c
+  ${libJPEG_SOURCE_DIR}/jcarith.c
+  ${libJPEG_SOURCE_DIR}/jccoefct.c
+  ${libJPEG_SOURCE_DIR}/jccolor.c
+  ${libJPEG_SOURCE_DIR}/jcdctmgr.c
+  ${libJPEG_SOURCE_DIR}/jchuff.c
+  ${libJPEG_SOURCE_DIR}/jcinit.c
+  ${libJPEG_SOURCE_DIR}/jcmainct.c
+  ${libJPEG_SOURCE_DIR}/jcmarker.c
+  ${libJPEG_SOURCE_DIR}/jcmaster.c
+  ${libJPEG_SOURCE_DIR}/jcomapi.c
+  ${libJPEG_SOURCE_DIR}/jcparam.c
+  ${libJPEG_SOURCE_DIR}/jcprepct.c
+  ${libJPEG_SOURCE_DIR}/jcsample.c
+  ${libJPEG_SOURCE_DIR}/jdapimin.c
+  ${libJPEG_SOURCE_DIR}/jdapistd.c
+  ${libJPEG_SOURCE_DIR}/jdarith.c
+  ${libJPEG_SOURCE_DIR}/jdatadst.c
+  ${libJPEG_SOURCE_DIR}/jdatasrc.c
+  ${libJPEG_SOURCE_DIR}/jdcoefct.c
+  ${libJPEG_SOURCE_DIR}/jdcolor.c
+  ${libJPEG_SOURCE_DIR}/jddctmgr.c
+  ${libJPEG_SOURCE_DIR}/jdhuff.c
+  ${libJPEG_SOURCE_DIR}/jdinput.c
+  ${libJPEG_SOURCE_DIR}/jdmainct.c
+  ${libJPEG_SOURCE_DIR}/jdmarker.c
+  ${libJPEG_SOURCE_DIR}/jdmaster.c
+  ${libJPEG_SOURCE_DIR}/jdmerge.c
+  ${libJPEG_SOURCE_DIR}/jdpostct.c
+  ${libJPEG_SOURCE_DIR}/jdsample.c
+  ${libJPEG_SOURCE_DIR}/jerror.c
+  ${libJPEG_SOURCE_DIR}/jfdctflt.c
+  ${libJPEG_SOURCE_DIR}/jfdctfst.c
+  ${libJPEG_SOURCE_DIR}/jfdctint.c
+  ${libJPEG_SOURCE_DIR}/jidctflt.c
+  ${libJPEG_SOURCE_DIR}/jidctfst.c
+  ${libJPEG_SOURCE_DIR}/jidctint.c
+  ${libJPEG_SOURCE_DIR}/jmemmgr.c
+  ${libJPEG_SOURCE_DIR}/jmemnobs.c
+  ${libJPEG_SOURCE_DIR}/jquant1.c
+  ${libJPEG_SOURCE_DIR}/jquant2.c
+  ${libJPEG_SOURCE_DIR}/jutils.c
+  )
+file(GLOB lib_hdrs ${libJPEG_SOURCE_DIR}/*.h)
+add_library(${JPEG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs} ${lib_hdrs})
+
+if(NOT MSVC)
+  set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1")
+endif()
+target_compile_definitions(${JPEG_LIBRARY} PRIVATE -DNO_MKTEMP)
--- a/cmake/ext_tests.cmake
+++ b/cmake/ext_tests.cmake
@ -189,6 +189,10 @@ if (OCOS_BUILD_SHARED_LIB)
        list(APPEND extensions_test_libraries stdc++fs -pthread)
      endif()

+      if (NOT MSVC)
+        list(APPEND extensions_test_libraries ${CMAKE_DL_LIBS})
+      endif()
+
      add_test_target(TARGET extensions_test
                      TEST_SOURCES ${shared_TEST_SRC}
                      LIBRARIES ${extensions_test_libraries}
--- a/cmake/presets/ort_genai.cmake
+++ b/cmake/presets/ort_genai.cmake
@ -3,8 +3,6 @@

 set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
-set(OCOS_ENABLE_CV2 ON CACHE INTERNAL "" FORCE)
-set(OCOS_ENABLE_OPENCV_CODECS ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE)
 set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE)
--- a/include/ortx_utils.h
+++ b/include/ortx_utils.h
@ -105,6 +105,17 @@ extError_t ORTX_API_CALL OrtxTensorResultGetAt(OrtxTensorResult* result, size_t
 */
 extError_t ORTX_API_CALL OrtxGetTensorType(OrtxTensor* tensor, extDataType_t* type);

+/**
+ * @brief Retrieves the size of each element in the given tensor.
+ *
+ * This function calculates the size of each element in the specified tensor and stores it in the provided size variable.
+ *
+ * @param tensor A pointer to the OrtxTensor object.
+ * @param size A pointer to a size_t variable to store the size of each element.
+ * @return An extError_t value indicating the success or failure of the operation.
+ */
+extError_t ORTX_API_CALL OrtxGetTensorSizeOfElement(OrtxTensor* tensor, size_t* size);
+
 /** \brief Get the data from the tensor
 *
 * \param tensor The tensor object
--- a/onnxruntime_extensions/pp_api.py
+++ b/onnxruntime_extensions/pp_api.py
@ -65,8 +65,16 @@ class ImageProcessor:
        self.processor = create_processor(processor_json)

    def pre_process(self, images):
+        if isinstance(images, str):
+            images = [images]
+        if isinstance(images, list):
+            images = load_images(images)
        return image_pre_process(self.processor, images)

+    @staticmethod
+    def to_numpy(result):
+        return tensor_result_get_at(result, 0)
+
    def __del__(self):
        if delete_object and self.processor:
            delete_object(self.processor)
--- a/pyop/py_c_api.cc
+++ b/pyop/py_c_api.cc
@ -85,15 +85,12 @@ void AddGlobalMethodsCApi(pybind11::module& m) {
        const int64_t* shape{};
        size_t num_dims;
        const void* data{};
-        size_t elem_size = 0;
-        if (tensor_type == extDataType_t::kOrtxInt64 || tensor_type == extDataType_t::kOrtxFloat) {
+        size_t elem_size = 1;
+        if (tensor_type == extDataType_t::kOrtxInt64 ||
+            tensor_type == extDataType_t::kOrtxFloat ||
+            tensor_type == extDataType_t::kOrtxUint8) {
          OrtxGetTensorData(tensor, reinterpret_cast<const void**>(&data), &shape, &num_dims);
-          elem_size = 4;
-          if (tensor_type == extDataType_t::kOrtxInt64) {
-            elem_size = 8;
-          }
-        } else if (tensor_type == extDataType_t::kOrtxUnknownType) {
-          throw std::runtime_error("Failed to get tensor type");
+          OrtxGetTensorSizeOfElement(tensor, &elem_size);
        } else if (tensor_type == extDataType_t::kOrtxUnknownType) {
          throw std::runtime_error("unsupported tensor type");
        }
@ -108,6 +105,8 @@ void AddGlobalMethodsCApi(pybind11::module& m) {
          obj = py::array_t<float>(npy_dims);
        } else if (tensor_type == extDataType_t::kOrtxInt64) {
          obj = py::array_t<int64_t>(npy_dims);
+        } else if (tensor_type == extDataType_t::kOrtxUint8) {
+          obj = py::array_t<uint8_t>(npy_dims);
        }

        void* out_ptr = obj.mutable_data();
--- a/shared/api/c_api_utils.cc
+++ b/shared/api/c_api_utils.cc
@ -103,7 +103,6 @@ extError_t ORTX_API_CALL OrtxTensorResultGetAt(OrtxTensorResult* result, size_t

  auto tensor_ptr = std::make_unique<TensorObject>();
  tensor_ptr->SetTensor(ts);
-  tensor_ptr->SetTensorType(result_ptr->GetTensorType(index));
  *tensor = static_cast<OrtxTensor*>(tensor_ptr.release());
  return extError_t();
 }
@ -124,6 +123,24 @@ extError_t ORTX_API_CALL OrtxGetTensorType(OrtxTensor* tensor, extDataType_t* ty
  return extError_t();
 }

+extError_t ORTX_API_CALL OrtxGetTensorSizeOfElement(OrtxTensor* tensor, size_t* size) {
+  if (tensor == nullptr || size == nullptr) {
+    ReturnableStatus::last_error_message_ = "Invalid argument";
+    return kOrtxErrorInvalidArgument;
+  }
+
+  auto tensor_impl = static_cast<TensorObject*>(tensor);
+  if (tensor_impl->ortx_kind() != extObjectKind_t::kOrtxKindTensor) {
+    ReturnableStatus::last_error_message_ = "Invalid argument";
+    return kOrtxErrorInvalidArgument;
+  }
+
+  auto tb = tensor_impl->GetTensor();
+  assert(tb != nullptr); 
+  *size = tb->SizeInBytes() / tb->NumberOfElement();
+  return extError_t();
+}
+
 extError_t ORTX_API_CALL OrtxGetTensorData(OrtxTensor* tensor, const void** data, const int64_t** shape,
                                           size_t* num_dims) {
  if (tensor == nullptr) {
@ -158,3 +175,11 @@ extError_t ORTX_API_CALL OrtxGetTensorDataFloat(OrtxTensor* tensor, const float*
  *data = reinterpret_cast<const float*>(data_ptr);  // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
  return err;
 }
+
+extError_t ORTX_API_CALL OrtxGetTensorDataUint8(OrtxTensor* tensor, const uint8_t** data, const int64_t** shape,
+                                                size_t* num_dims) {
+  const void* data_ptr{};
+  auto err = OrtxGetTensorData(tensor, &data_ptr, shape, num_dims);
+  *data = reinterpret_cast<const uint8_t*>(data_ptr);  // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
+  return err;
+}
--- a/shared/api/c_api_utils.hpp
+++ b/shared/api/c_api_utils.hpp
@ -99,15 +99,56 @@ class TensorObject : public OrtxObjectImpl {
  ~TensorObject() override = default;

  void SetTensor(ortc::TensorBase* tensor) { tensor_ = tensor; }
-  void SetTensorType(extDataType_t type) { tensor_type_ = type; }

-  [[nodiscard]] extDataType_t GetTensorType() const { return tensor_type_; }
+  static extDataType_t GetDataType(ONNXTensorElementDataType dt) {
+    if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
+      return extDataType_t::kOrtxFloat;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) {
+      return extDataType_t::kOrtxUint8;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) {
+      return extDataType_t::kOrtxInt8;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) {
+      return extDataType_t::kOrtxUint16;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16) {
+      return extDataType_t::kOrtxInt16;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
+      return extDataType_t::kOrtxInt32;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
+      return extDataType_t::kOrtxInt64;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) {
+      return extDataType_t::kOrtxString;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) {
+      return extDataType_t::kOrtxBool;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
+      return extDataType_t::kOrtxFloat16;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
+      return extDataType_t::kOrtxDouble;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32) {
+      return extDataType_t::kOrtxUint32;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64) {
+      return extDataType_t::kOrtxUint64;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64) {
+      return extDataType_t::kOrtxComplex64;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128) {
+      return extDataType_t::kOrtxComplex128;
+    } else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16) {
+      return extDataType_t::kOrtxBFloat16;
+    } else {
+      return extDataType_t::kOrtxUnknownType;
+    }
+  }
+  
+  [[nodiscard]] extDataType_t GetTensorType() const {
+    if (tensor_ == nullptr) {
+      return extDataType_t::kOrtxUnknownType;
+    }
+    return GetDataType(tensor_->Type());
+  }

  [[nodiscard]] ortc::TensorBase* GetTensor() const { return tensor_; }

 private:
  ortc::TensorBase* tensor_{};
-  extDataType_t tensor_type_{extDataType_t::kOrtxUnknownType};
 };

 class TensorResult : public OrtxObjectImpl {
@ -116,13 +157,8 @@ class TensorResult : public OrtxObjectImpl {
  ~TensorResult() override = default;

  void SetTensors(std::vector<std::unique_ptr<ortc::TensorBase>>&& tensors) { tensors_ = std::move(tensors); }
-  void SetTensorTypes(const std::vector<extDataType_t>& types) { tensor_types_ = types; }
  [[nodiscard]] size_t NumTensors() const { return tensors_.size(); }
-
-  [[nodiscard]] const std::vector<extDataType_t>& tensor_types() const { return tensor_types_; }
-
  [[nodiscard]] const std::vector<std::unique_ptr<ortc::TensorBase>>& tensors() const { return tensors_; }
-
  [[nodiscard]] std::vector<ortc::TensorBase*> GetTensors() const {
    std::vector<ortc::TensorBase*> ts;
    ts.reserve(tensors_.size());
@ -139,16 +175,8 @@ class TensorResult : public OrtxObjectImpl {
    return nullptr;
  }

-  extDataType_t GetTensorType(size_t i) const {
-    if (i < tensor_types_.size()) {
-      return tensor_types_[i];
-    }
-    return extDataType_t::kOrtxUnknownType;
-  }
-
 private:
  std::vector<std::unique_ptr<ortc::TensorBase>> tensors_;
-  std::vector<extDataType_t> tensor_types_;
 };

 struct ReturnableStatus {
--- a/shared/api/image_decoder.hpp
+++ b/shared/api/image_decoder.hpp
@ -0,0 +1,145 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "png.h"
+#include "jpeglib.h"
+#include "op_def_struct.h"
+#include "ext_status.h"
+
+class JMemorySourceManager : public jpeg_source_mgr {
+ public:
+  // Constructor
+  JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) {
+    // Initialize source fields
+    next_input_byte = reinterpret_cast<const JOCTET*>(encoded_image_data);
+    bytes_in_buffer = static_cast<size_t>(encoded_image_data_len);
+    init_source = &JMemorySourceManager::initSource;
+    fill_input_buffer = &JMemorySourceManager::fillInputBuffer;
+    skip_input_data = &JMemorySourceManager::skipInputData;
+    resync_to_restart = jpeg_resync_to_restart;
+    term_source = &JMemorySourceManager::termSource;
+  }
+
+  // Initialize source (no-op)
+  static void initSource(j_decompress_ptr cinfo) {
+    // No initialization needed
+  }
+
+  // Fill input buffer (not used here, always return FALSE)
+  static boolean fillInputBuffer(j_decompress_ptr cinfo) {
+    return FALSE;  // Buffer is managed manually
+  }
+
+  // Skip input data
+  static void skipInputData(j_decompress_ptr cinfo, long num_bytes) {
+    JMemorySourceManager* srcMgr = reinterpret_cast<JMemorySourceManager*>(cinfo->src);
+    if (num_bytes > 0) {
+      size_t bytes_to_skip = static_cast<size_t>(num_bytes);
+      while (bytes_to_skip > srcMgr->bytes_in_buffer) {
+        bytes_to_skip -= srcMgr->bytes_in_buffer;
+        if (srcMgr->fillInputBuffer(cinfo)) {
+          // Error: buffer ran out
+          srcMgr->extError = kOrtxErrorCorruptData;
+        }
+      }
+      srcMgr->next_input_byte += bytes_to_skip;
+      srcMgr->bytes_in_buffer -= bytes_to_skip;
+    }
+  }
+
+  // Terminate source (no-op)
+  static void termSource(j_decompress_ptr cinfo) {
+    // No cleanup needed
+  }
+
+  extError_t extError{kOrtxOK};  // Error handler
+};
+
+inline OrtxStatus image_decoder(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
+  const auto& dimensions = input.Shape();
+  if (dimensions.size() != 1ULL) {
+    return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
+  }
+
+  // Get data & the length
+  const uint8_t* encoded_image_data = input.Data();
+  const int64_t encoded_image_data_len = input.NumberOfElement();
+
+  // check it's a PNG image or JPEG image
+  if (encoded_image_data_len < 8) {
+    return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."};
+  }
+
+  OrtxStatus status{};
+  if (png_sig_cmp(encoded_image_data, 0, 8) == 0) {
+    // Decode the PNG image
+    png_image image;
+    std::memset(&image, 0, sizeof(image));  // Use std::memset for clarity
+    image.version = PNG_IMAGE_VERSION;
+
+    if (png_image_begin_read_from_memory(&image, encoded_image_data, static_cast<size_t>(encoded_image_data_len)) ==
+        0) {
+      return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to read PNG image."};
+    }
+
+    image.format = PNG_FORMAT_RGB;  // Ensure you have the appropriate format
+    const int height = image.height;
+    const int width = image.width;
+    const int channels = PNG_IMAGE_PIXEL_CHANNELS(image.format);  // Calculates the number of channels based on format
+
+    std::vector<int64_t> output_dimensions{height, width, channels};
+
+    uint8_t* decoded_image_data = output.Allocate(output_dimensions);
+    if (decoded_image_data == nullptr) {
+      return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to allocate memory for decoded image data."};
+    }
+
+    if (png_image_finish_read(&image, nullptr, decoded_image_data, 0, nullptr) == 0) {
+      return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to decode PNG image."};
+    }
+  } else {
+    // Initialize JPEG decompression object
+    jpeg_decompress_struct cinfo;
+    jpeg_error_mgr jerr;
+    cinfo.err = jpeg_std_error(&jerr);
+    jpeg_create_decompress(&cinfo);
+
+    // Set up the custom memory source manager
+    JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len);
+    cinfo.src = &srcManager;
+
+    // Read the JPEG header to get image info
+    jpeg_read_header(&cinfo, TRUE);
+
+    // Start decompression
+    jpeg_start_decompress(&cinfo);
+
+    // Allocate memory for the image
+    std::vector<int64_t> output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components};
+    uint8_t* imageBuffer = output.Allocate(output_dimensions);
+
+    // Read the image data
+    int row_stride = cinfo.output_width * cinfo.output_components;
+    while (cinfo.output_scanline < cinfo.output_height) {
+      uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride);
+      jpeg_read_scanlines(&cinfo, &row_ptr, 1);
+      if (srcManager.extError != kOrtxOK) {
+        break;
+      }
+    }
+
+    if (srcManager.extError != kOrtxOK) {
+      status = {srcManager.extError, "[ImageDecoder]: Failed to decode JPEG image."};
+    }
+
+    // Finish decompression
+    jpeg_finish_decompress(&cinfo);
+    jpeg_destroy_decompress(&cinfo);
+  }
+
+  return status;
+}
--- a/shared/api/image_processor.cc
+++ b/shared/api/image_processor.cc
@ -8,7 +8,7 @@

 #include "image_processor.h"
 #include "c_api_utils.hpp"
-#include "cv2/imgcodecs/imdecode.hpp"
+#include "image_decoder.hpp"
 #include "image_transforms.hpp"
 #include "image_transforms_phi_3.hpp"

@ -179,7 +179,7 @@ OrtxStatus ImageProcessor::PreProcess(ort_extensions::span<ImageRawData> image_d
  operations_.back()->ResetTensors(allocator_);
  if (status.IsOk()) {
    r.SetTensors(std::move(img_result));
-    r.SetTensorTypes({kOrtxFloat, kOrtxInt64, kOrtxInt64});
+    // r.SetTensorTypes({kOrtxFloat, kOrtxInt64, kOrtxInt64});
  }

  return status;
--- a/test/data/processor/image_to_numpy.py
+++ b/test/data/processor/image_to_numpy.py
@ -0,0 +1,41 @@
+import os
+import tempfile
+from PIL import Image
+
+from onnxruntime_extensions.pp_api import ImageProcessor
+
+img_proc = ImageProcessor(R"""
+{
+  "processor": {
+    "name": "image_processing",
+    "transforms": [
+      {
+        "operation": {
+          "name": "decode_image",
+          "type": "DecodeImage",
+          "attrs": {
+            "color_space": "BGR"
+          }
+        }
+      },
+      {
+        "operation": {
+          "name": "convert_to_rgb",
+          "type": "ConvertRGB"
+        }
+      }
+    ]
+  }
+}""")
+
+img_name = "australia.jpg"
+result = img_proc.pre_process(os.path.dirname(__file__) + "/" + img_name)
+np_img = img_proc.to_numpy(result)
+print(np_img.shape, np_img.dtype)
+
+# can save the image back to disk
+img_rgb = np_img[0]
+img_bgr = img_rgb[..., ::-1]
+output_name = tempfile.gettempdir() + "/" + img_name
+Image.fromarray(img_bgr).save(output_name)
+print(output_name)
--- a/test/data/processor/proctest.py
+++ b/test/data/processor/proctest.py
@ -0,0 +1,76 @@
+import os
+import tempfile
+from PIL import Image
+from transformers import AutoProcessor
+from onnxruntime_extensions.pp_api import create_processor, load_images, image_pre_process, tensor_result_get_at
+
+import numpy as np
+
+
+def regen_image(arr):
+    mean = np.array([0.48145466, 0.4578275, 0.40821073])
+    std = np.array([0.26862954, 0.26130258, 0.27577711])
+
+    # Reverse normalization
+    array = arr * std + mean
+
+    # Clip the values to [0, 1] range
+    array = np.clip(array, 0, 1)
+
+    # Convert to [0, 255] range and uint8 type
+    array = (array * 255).astype(np.uint8)
+
+    # Convert NumPy array to PIL Image
+    image = Image.fromarray(array)
+    return image
+
+
+test_image = "test/data/processor/passport.png"
+# test_image = "/temp/passport_s.png"
+# test_image = "/temp/passport_s2.png"
+model_id = "microsoft/Phi-3-vision-128k-instruct"
+
+processor = create_processor("test/data/processor/phi_3_image.json")
+images = load_images([test_image])
+c_out = image_pre_process(processor, images)
+# print(tensor_result_get_at(c_out, 0))
+# print(tensor_result_get_at(c_out, 1))
+
+image = Image.open(test_image)
+processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+messages = [
+    {"role": "user", "content": "<|image_1|>\nWhat is shown in this image?"},
+    {"role": "assistant", "content": "The chart displays the percentage of respondents who agree with various statements about their preparedness for meetings. It shows five categories: 'Having clear and pre-defined goals for meetings', 'Knowing where to find the information I need for a meeting', 'Understanding my exact role and responsibilities when I'm invited', 'Having tools to manage admin tasks like note-taking or summarization', and 'Having more focus time to sufficiently prepare for meetings'. Each category has an associated bar indicating the level of agreement, measured on a scale from 0% to 100%."},
+    {"role": "user", "content": "Provide insightful questions to spark discussion."}
+]
+prompt = processor.tokenizer.apply_chat_template(
+    messages, tokenize=False, add_generation_prompt=True)
+
+
+inputs = processor(prompt, [image], return_tensors="pt")
+# print(inputs["pixel_values"].numpy())
+# print(inputs["image_sizes"])
+
+np.testing.assert_allclose(
+    inputs["image_sizes"].numpy(), tensor_result_get_at(c_out, 1))
+# np.testing.assert_allclose(inputs["pixel_values"].numpy(), tensor_result_get_at(c_out, 0), rtol=1e-1)
+
+if os.path.exists("/temp"):
+    temp_dir = "/temp"
+else:
+    temp_dir = tempfile.mkdtemp()
+    print(f"Created temp dir: {temp_dir}")
+
+for i in range(17):
+    expected = inputs["pixel_values"].numpy()[0, i]
+    actual = tensor_result_get_at(c_out, 0)[0, i]
+    e_image = regen_image(expected.transpose(1, 2, 0))
+    a_image = regen_image(actual.transpose(1, 2, 0))
+    e_image.save(f"{temp_dir}/e_{i}.png")
+    a_image.save(f"{temp_dir}/a_{i}.png")
+
+    try:
+        np.testing.assert_allclose(inputs["pixel_values"].numpy(
+        )[0, i], tensor_result_get_at(c_out, 0)[0, i], rtol=1e-2)
+    except AssertionError as e:
+        print(str(e))
--- a/test/pp_api_test/test_imgcodec.cc
+++ b/test/pp_api_test/test_imgcodec.cc
@ -0,0 +1,81 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <vector>
+#include <tuple>
+#include <fstream>
+#include <filesystem>
+
+#include "gtest/gtest.h"
+#include "shared/api/c_api_utils.hpp"
+#include "shared/api/image_decoder.hpp"
+
+using namespace ort_extensions;
+
+TEST(ImgDecoderTest, TestPngDecoder) {
+  std::vector<uint8_t> png_data;
+  std::filesystem::path png_path = "data/processor/exceltable.png";
+  std::ifstream png_file(png_path, std::ios::binary);
+  ASSERT_TRUE(png_file.is_open());
+  png_file.seekg(0, std::ios::end);
+  png_data.resize(png_file.tellg());
+  png_file.seekg(0, std::ios::beg);
+  png_file.read(reinterpret_cast<char*>(png_data.data()), png_data.size());
+  png_file.close();
+
+  ortc::Tensor<uint8_t> png_tensor({static_cast<int64_t>(png_data.size())},  png_data.data());
+  ortc::Tensor<uint8_t> out_tensor{&CppAllocator::Instance()};
+  auto status = image_decoder(png_tensor, out_tensor);
+  ASSERT_TRUE(status.IsOk());
+
+  ASSERT_EQ(out_tensor.Shape(), std::vector<int64_t>({206, 487, 3}));
+  auto out_range = out_tensor.Data() + 0;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
+
+  out_range = out_tensor.Data() + 477 * 3;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
+
+  out_range = out_tensor.Data() + 243 * 206 * 3;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217}));
+
+  out_range = out_tensor.Data() + 485 * 206 * 3;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
+}
+
+TEST(ImageDecoderTest, TestJpegDecoder) {
+  std::vector<uint8_t> jpeg_data;
+  std::filesystem::path jpeg_path = "data/processor/australia.jpg";
+  std::ifstream jpeg_file(jpeg_path, std::ios::binary);
+  ASSERT_TRUE(jpeg_file.is_open());
+  jpeg_file.seekg(0, std::ios::end);
+  jpeg_data.resize(jpeg_file.tellg());
+  jpeg_file.seekg(0, std::ios::beg);
+  jpeg_file.read(reinterpret_cast<char*>(jpeg_data.data()), jpeg_data.size());
+  jpeg_file.close();
+
+  ortc::Tensor<uint8_t> jpeg_tensor({static_cast<int64_t>(jpeg_data.size())},  jpeg_data.data());
+  ortc::Tensor<uint8_t> out_tensor{&CppAllocator::Instance()};
+  auto status = image_decoder(jpeg_tensor, out_tensor);
+  ASSERT_TRUE(status.IsOk());
+
+  ASSERT_EQ(out_tensor.Shape(), std::vector<int64_t>({876, 1300, 3}));
+  auto out_range = out_tensor.Data() + 0;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({48, 14, 5, 48, 14, 5, 48, 14, 5, 48, 14, 5}));
+
+  out_range = out_tensor.Data() + 1296 * 3;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({221, 237, 224, 225, 236, 219, 218, 222, 199, 203, 202, 174}));
+
+  out_range = out_tensor.Data() + 438 * 1300 * 3;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({84, 68, 55, 86, 70, 55, 92, 77, 58, 101, 86, 65}));
+
+  out_range = out_tensor.Data() + 875 * 1300 * 3 + 1296 * 3;
+  ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
+            std::vector<uint8_t>({208, 210, 197, 204, 206, 193, 198, 200, 187, 194, 196, 183}));
+}