Merge branch 'main' into fix-transformers_and_bert-arguments

This commit is contained in:
Sebastian Gallese 2024-09-05 11:58:21 -07:00 коммит произвёл GitHub
Родитель 383def2ab9 90d8f33172
Коммит ed09a5a0ab
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
18 изменённых файлов: 589 добавлений и 45 удалений

Просмотреть файл

@ -197,7 +197,7 @@ stages:
# compiled as only one operator selected.
- bash: |
set -e -x -u
./build.sh -DOCOS_ENABLE_C_API=ON
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
cd out/Linux/RelWithDebInfo
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
@ -281,7 +281,7 @@ stages:
# compiled as only one operator selected.
- bash: |
set -e -x -u
./build.sh -DOCOS_ENABLE_C_API=ON
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
cd out/Darwin/RelWithDebInfo
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
@ -431,7 +431,7 @@ stages:
steps:
- script: |
call .\build.bat -DOCOS_ENABLE_C_API=ON
call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
cd out\Windows
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests

Просмотреть файл

@ -212,6 +212,9 @@ class CmdBuildCMakeExt(_build_ext):
'-DOCOS_ENABLE_VISION=OFF']
if self.pp_api:
if not self.no_opencv:
raise RuntimeError(
"Cannot enable PP C API Python Wrapper without disabling OpenCV.")
cmake_args += ['-DOCOS_ENABLE_C_API=ON']
if self.no_azure is not None:

Просмотреть файл

@ -206,7 +206,6 @@ if(NOT PROJECT_IS_TOP_LEVEL AND ONNXRUNTIME_ROOT)
set(_ONNXRUNTIME_EMBEDDED TRUE)
endif()
if (OCOS_ENABLE_SELECTED_OPLIST OR OCOS_BUILD_PRESET)
disable_all_operators()
if(OCOS_ENABLE_SELECTED_OPLIST)
@ -737,9 +736,12 @@ if(OCOS_ENABLE_C_API)
file(GLOB audio_TARGET_SRC "shared/api/c_api_feature_extraction.*" "shared/api/speech_*")
list(APPEND _TARGET_LIB_SRC ${audio_TARGET_SRC})
endif()
if(OCOS_ENABLE_CV2)
if(OCOS_ENABLE_DLIB)
include(ext_imgcodecs)
file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY} ${ZLIB_LIBRARY})
endif()
endif()
@ -852,8 +854,8 @@ target_link_libraries(ortcustomops PUBLIC ocos_operators)
if(OCOS_BUILD_SHARED_LIB)
file(GLOB shared_TARGET_SRC "shared/*.cc" "shared/*.h")
if (OCOS_ENABLE_C_API)
if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_CV2 OR NOT OCOS_ENABLE_AUDIO)
message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, CV2 and AUDIO to be enabled.")
if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_AUDIO)
message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, AUDIO to be enabled.")
endif()
list(APPEND shared_TARGET_SRC "shared/extensions_c.def")
else()

Просмотреть файл

@ -5,11 +5,3 @@ recursive-include include *.*
recursive-include operators *.*
recursive-include pyop *.*
recursive-include shared *.*
prune ci_build
prune docs
prune test
prune _subbuild
prune out
exclude *.bat
exclude *.yaml
exclude *.git*

Просмотреть файл

@ -1,7 +1,7 @@
#!/bin/bash
# The example build script to build the source in Linux-like platform
set -e -x -u
set -e -u
cuda_arch=''
if [[ $@ == *"DOCOS_USE_CUDA=ON"* && $@ != *"DCMAKE_CUDA_ARCHITECTURES"* ]]; then

131
cmake/ext_imgcodecs.cmake Normal file
Просмотреть файл

@ -0,0 +1,131 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
set(_IMGCODEC_ROOT_DIR ${dlib_SOURCE_DIR}/dlib/external)
# ----------------------------------------------------------------------------
# project libpng
#
# ----------------------------------------------------------------------------
set (PNG_LIBRARY "libpng_static_c")
set (libPNG_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/libpng)
set (zlib_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/zlib)
if(NOT WIN32)
find_library(M_LIBRARY
NAMES m
PATHS /usr/lib /usr/local/lib
)
if(NOT M_LIBRARY)
message(STATUS "math lib 'libm' not found; floating point support disabled")
endif()
else()
# not needed on windows
set(M_LIBRARY "")
endif()
set(lib_srcs
${libPNG_SOURCE_DIR}/arm/arm_init.c
${libPNG_SOURCE_DIR}/arm/filter_neon_intrinsics.c
${libPNG_SOURCE_DIR}/arm/palette_neon_intrinsics.c
${libPNG_SOURCE_DIR}//png.c
${libPNG_SOURCE_DIR}//pngerror.c
${libPNG_SOURCE_DIR}//pngget.c
${libPNG_SOURCE_DIR}//pngmem.c
${libPNG_SOURCE_DIR}//pngpread.c
${libPNG_SOURCE_DIR}//pngread.c
${libPNG_SOURCE_DIR}//pngrio.c
${libPNG_SOURCE_DIR}//pngrtran.c
${libPNG_SOURCE_DIR}//pngrutil.c
${libPNG_SOURCE_DIR}//pngset.c
${libPNG_SOURCE_DIR}//pngtrans.c
${libPNG_SOURCE_DIR}//pngwio.c
${libPNG_SOURCE_DIR}//pngwrite.c
${libPNG_SOURCE_DIR}//pngwtran.c
${libPNG_SOURCE_DIR}//pngwutil.c
${zlib_SOURCE_DIR}/adler32.c
${zlib_SOURCE_DIR}/compress.c
${zlib_SOURCE_DIR}/crc32.c
${zlib_SOURCE_DIR}/deflate.c
${zlib_SOURCE_DIR}/gzclose.c
${zlib_SOURCE_DIR}/gzlib.c
${zlib_SOURCE_DIR}/gzread.c
${zlib_SOURCE_DIR}/gzwrite.c
${zlib_SOURCE_DIR}/infback.c
${zlib_SOURCE_DIR}/inffast.c
${zlib_SOURCE_DIR}/inflate.c
${zlib_SOURCE_DIR}/inftrees.c
${zlib_SOURCE_DIR}/trees.c
${zlib_SOURCE_DIR}/uncompr.c
${zlib_SOURCE_DIR}/zutil.c
)
add_library(${PNG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs})
target_include_directories(${PNG_LIBRARY} BEFORE PRIVATE ${zlib_SOURCE_DIR})
if(MSVC)
target_compile_definitions(${PNG_LIBRARY} PRIVATE -D_CRT_SECURE_NO_DEPRECATE)
else()
target_compile_options(${PNG_LIBRARY} PRIVATE -Wno-deprecated-non-prototype)
endif()
# ----------------------------------------------------------------------------
# project libjpeg
#
# ----------------------------------------------------------------------------
set(JPEG_LIBRARY "libjpeg_static_c")
set(libJPEG_SOURCE_DIR ${_IMGCODEC_ROOT_DIR}/libjpeg)
set(lib_srcs
${libJPEG_SOURCE_DIR}/jaricom.c
${libJPEG_SOURCE_DIR}/jcapimin.c
${libJPEG_SOURCE_DIR}/jcapistd.c
${libJPEG_SOURCE_DIR}/jcarith.c
${libJPEG_SOURCE_DIR}/jccoefct.c
${libJPEG_SOURCE_DIR}/jccolor.c
${libJPEG_SOURCE_DIR}/jcdctmgr.c
${libJPEG_SOURCE_DIR}/jchuff.c
${libJPEG_SOURCE_DIR}/jcinit.c
${libJPEG_SOURCE_DIR}/jcmainct.c
${libJPEG_SOURCE_DIR}/jcmarker.c
${libJPEG_SOURCE_DIR}/jcmaster.c
${libJPEG_SOURCE_DIR}/jcomapi.c
${libJPEG_SOURCE_DIR}/jcparam.c
${libJPEG_SOURCE_DIR}/jcprepct.c
${libJPEG_SOURCE_DIR}/jcsample.c
${libJPEG_SOURCE_DIR}/jdapimin.c
${libJPEG_SOURCE_DIR}/jdapistd.c
${libJPEG_SOURCE_DIR}/jdarith.c
${libJPEG_SOURCE_DIR}/jdatadst.c
${libJPEG_SOURCE_DIR}/jdatasrc.c
${libJPEG_SOURCE_DIR}/jdcoefct.c
${libJPEG_SOURCE_DIR}/jdcolor.c
${libJPEG_SOURCE_DIR}/jddctmgr.c
${libJPEG_SOURCE_DIR}/jdhuff.c
${libJPEG_SOURCE_DIR}/jdinput.c
${libJPEG_SOURCE_DIR}/jdmainct.c
${libJPEG_SOURCE_DIR}/jdmarker.c
${libJPEG_SOURCE_DIR}/jdmaster.c
${libJPEG_SOURCE_DIR}/jdmerge.c
${libJPEG_SOURCE_DIR}/jdpostct.c
${libJPEG_SOURCE_DIR}/jdsample.c
${libJPEG_SOURCE_DIR}/jerror.c
${libJPEG_SOURCE_DIR}/jfdctflt.c
${libJPEG_SOURCE_DIR}/jfdctfst.c
${libJPEG_SOURCE_DIR}/jfdctint.c
${libJPEG_SOURCE_DIR}/jidctflt.c
${libJPEG_SOURCE_DIR}/jidctfst.c
${libJPEG_SOURCE_DIR}/jidctint.c
${libJPEG_SOURCE_DIR}/jmemmgr.c
${libJPEG_SOURCE_DIR}/jmemnobs.c
${libJPEG_SOURCE_DIR}/jquant1.c
${libJPEG_SOURCE_DIR}/jquant2.c
${libJPEG_SOURCE_DIR}/jutils.c
)
file(GLOB lib_hdrs ${libJPEG_SOURCE_DIR}/*.h)
add_library(${JPEG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs} ${lib_hdrs})
if(NOT MSVC)
set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1")
endif()
target_compile_definitions(${JPEG_LIBRARY} PRIVATE -DNO_MKTEMP)

Просмотреть файл

@ -189,6 +189,10 @@ if (OCOS_BUILD_SHARED_LIB)
list(APPEND extensions_test_libraries stdc++fs -pthread)
endif()
if (NOT MSVC)
list(APPEND extensions_test_libraries ${CMAKE_DL_LIBS})
endif()
add_test_target(TARGET extensions_test
TEST_SOURCES ${shared_TEST_SRC}
LIBRARIES ${extensions_test_libraries}

Просмотреть файл

@ -3,8 +3,6 @@
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_CV2 ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_OPENCV_CODECS ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE)

Просмотреть файл

@ -105,6 +105,17 @@ extError_t ORTX_API_CALL OrtxTensorResultGetAt(OrtxTensorResult* result, size_t
*/
extError_t ORTX_API_CALL OrtxGetTensorType(OrtxTensor* tensor, extDataType_t* type);
/**
* @brief Retrieves the size of each element in the given tensor.
*
* This function calculates the size of each element in the specified tensor and stores it in the provided size variable.
*
* @param tensor A pointer to the OrtxTensor object.
* @param size A pointer to a size_t variable to store the size of each element.
* @return An extError_t value indicating the success or failure of the operation.
*/
extError_t ORTX_API_CALL OrtxGetTensorSizeOfElement(OrtxTensor* tensor, size_t* size);
/** \brief Get the data from the tensor
*
* \param tensor The tensor object

Просмотреть файл

@ -65,8 +65,16 @@ class ImageProcessor:
self.processor = create_processor(processor_json)
def pre_process(self, images):
if isinstance(images, str):
images = [images]
if isinstance(images, list):
images = load_images(images)
return image_pre_process(self.processor, images)
@staticmethod
def to_numpy(result):
return tensor_result_get_at(result, 0)
def __del__(self):
if delete_object and self.processor:
delete_object(self.processor)

Просмотреть файл

@ -85,15 +85,12 @@ void AddGlobalMethodsCApi(pybind11::module& m) {
const int64_t* shape{};
size_t num_dims;
const void* data{};
size_t elem_size = 0;
if (tensor_type == extDataType_t::kOrtxInt64 || tensor_type == extDataType_t::kOrtxFloat) {
size_t elem_size = 1;
if (tensor_type == extDataType_t::kOrtxInt64 ||
tensor_type == extDataType_t::kOrtxFloat ||
tensor_type == extDataType_t::kOrtxUint8) {
OrtxGetTensorData(tensor, reinterpret_cast<const void**>(&data), &shape, &num_dims);
elem_size = 4;
if (tensor_type == extDataType_t::kOrtxInt64) {
elem_size = 8;
}
} else if (tensor_type == extDataType_t::kOrtxUnknownType) {
throw std::runtime_error("Failed to get tensor type");
OrtxGetTensorSizeOfElement(tensor, &elem_size);
} else if (tensor_type == extDataType_t::kOrtxUnknownType) {
throw std::runtime_error("unsupported tensor type");
}
@ -108,6 +105,8 @@ void AddGlobalMethodsCApi(pybind11::module& m) {
obj = py::array_t<float>(npy_dims);
} else if (tensor_type == extDataType_t::kOrtxInt64) {
obj = py::array_t<int64_t>(npy_dims);
} else if (tensor_type == extDataType_t::kOrtxUint8) {
obj = py::array_t<uint8_t>(npy_dims);
}
void* out_ptr = obj.mutable_data();

Просмотреть файл

@ -103,7 +103,6 @@ extError_t ORTX_API_CALL OrtxTensorResultGetAt(OrtxTensorResult* result, size_t
auto tensor_ptr = std::make_unique<TensorObject>();
tensor_ptr->SetTensor(ts);
tensor_ptr->SetTensorType(result_ptr->GetTensorType(index));
*tensor = static_cast<OrtxTensor*>(tensor_ptr.release());
return extError_t();
}
@ -124,6 +123,24 @@ extError_t ORTX_API_CALL OrtxGetTensorType(OrtxTensor* tensor, extDataType_t* ty
return extError_t();
}
extError_t ORTX_API_CALL OrtxGetTensorSizeOfElement(OrtxTensor* tensor, size_t* size) {
if (tensor == nullptr || size == nullptr) {
ReturnableStatus::last_error_message_ = "Invalid argument";
return kOrtxErrorInvalidArgument;
}
auto tensor_impl = static_cast<TensorObject*>(tensor);
if (tensor_impl->ortx_kind() != extObjectKind_t::kOrtxKindTensor) {
ReturnableStatus::last_error_message_ = "Invalid argument";
return kOrtxErrorInvalidArgument;
}
auto tb = tensor_impl->GetTensor();
assert(tb != nullptr);
*size = tb->SizeInBytes() / tb->NumberOfElement();
return extError_t();
}
extError_t ORTX_API_CALL OrtxGetTensorData(OrtxTensor* tensor, const void** data, const int64_t** shape,
size_t* num_dims) {
if (tensor == nullptr) {
@ -158,3 +175,11 @@ extError_t ORTX_API_CALL OrtxGetTensorDataFloat(OrtxTensor* tensor, const float*
*data = reinterpret_cast<const float*>(data_ptr); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
return err;
}
extError_t ORTX_API_CALL OrtxGetTensorDataUint8(OrtxTensor* tensor, const uint8_t** data, const int64_t** shape,
size_t* num_dims) {
const void* data_ptr{};
auto err = OrtxGetTensorData(tensor, &data_ptr, shape, num_dims);
*data = reinterpret_cast<const uint8_t*>(data_ptr); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)
return err;
}

Просмотреть файл

@ -99,15 +99,56 @@ class TensorObject : public OrtxObjectImpl {
~TensorObject() override = default;
void SetTensor(ortc::TensorBase* tensor) { tensor_ = tensor; }
void SetTensorType(extDataType_t type) { tensor_type_ = type; }
[[nodiscard]] extDataType_t GetTensorType() const { return tensor_type_; }
static extDataType_t GetDataType(ONNXTensorElementDataType dt) {
if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
return extDataType_t::kOrtxFloat;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) {
return extDataType_t::kOrtxUint8;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) {
return extDataType_t::kOrtxInt8;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) {
return extDataType_t::kOrtxUint16;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16) {
return extDataType_t::kOrtxInt16;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
return extDataType_t::kOrtxInt32;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
return extDataType_t::kOrtxInt64;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) {
return extDataType_t::kOrtxString;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) {
return extDataType_t::kOrtxBool;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
return extDataType_t::kOrtxFloat16;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
return extDataType_t::kOrtxDouble;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32) {
return extDataType_t::kOrtxUint32;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64) {
return extDataType_t::kOrtxUint64;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64) {
return extDataType_t::kOrtxComplex64;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128) {
return extDataType_t::kOrtxComplex128;
} else if (dt == ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16) {
return extDataType_t::kOrtxBFloat16;
} else {
return extDataType_t::kOrtxUnknownType;
}
}
[[nodiscard]] extDataType_t GetTensorType() const {
if (tensor_ == nullptr) {
return extDataType_t::kOrtxUnknownType;
}
return GetDataType(tensor_->Type());
}
[[nodiscard]] ortc::TensorBase* GetTensor() const { return tensor_; }
private:
ortc::TensorBase* tensor_{};
extDataType_t tensor_type_{extDataType_t::kOrtxUnknownType};
};
class TensorResult : public OrtxObjectImpl {
@ -116,13 +157,8 @@ class TensorResult : public OrtxObjectImpl {
~TensorResult() override = default;
void SetTensors(std::vector<std::unique_ptr<ortc::TensorBase>>&& tensors) { tensors_ = std::move(tensors); }
void SetTensorTypes(const std::vector<extDataType_t>& types) { tensor_types_ = types; }
[[nodiscard]] size_t NumTensors() const { return tensors_.size(); }
[[nodiscard]] const std::vector<extDataType_t>& tensor_types() const { return tensor_types_; }
[[nodiscard]] const std::vector<std::unique_ptr<ortc::TensorBase>>& tensors() const { return tensors_; }
[[nodiscard]] std::vector<ortc::TensorBase*> GetTensors() const {
std::vector<ortc::TensorBase*> ts;
ts.reserve(tensors_.size());
@ -139,16 +175,8 @@ class TensorResult : public OrtxObjectImpl {
return nullptr;
}
extDataType_t GetTensorType(size_t i) const {
if (i < tensor_types_.size()) {
return tensor_types_[i];
}
return extDataType_t::kOrtxUnknownType;
}
private:
std::vector<std::unique_ptr<ortc::TensorBase>> tensors_;
std::vector<extDataType_t> tensor_types_;
};
struct ReturnableStatus {

Просмотреть файл

@ -0,0 +1,145 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <cstdint>
#include "png.h"
#include "jpeglib.h"
#include "op_def_struct.h"
#include "ext_status.h"
class JMemorySourceManager : public jpeg_source_mgr {
public:
// Constructor
JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) {
// Initialize source fields
next_input_byte = reinterpret_cast<const JOCTET*>(encoded_image_data);
bytes_in_buffer = static_cast<size_t>(encoded_image_data_len);
init_source = &JMemorySourceManager::initSource;
fill_input_buffer = &JMemorySourceManager::fillInputBuffer;
skip_input_data = &JMemorySourceManager::skipInputData;
resync_to_restart = jpeg_resync_to_restart;
term_source = &JMemorySourceManager::termSource;
}
// Initialize source (no-op)
static void initSource(j_decompress_ptr cinfo) {
// No initialization needed
}
// Fill input buffer (not used here, always return FALSE)
static boolean fillInputBuffer(j_decompress_ptr cinfo) {
return FALSE; // Buffer is managed manually
}
// Skip input data
static void skipInputData(j_decompress_ptr cinfo, long num_bytes) {
JMemorySourceManager* srcMgr = reinterpret_cast<JMemorySourceManager*>(cinfo->src);
if (num_bytes > 0) {
size_t bytes_to_skip = static_cast<size_t>(num_bytes);
while (bytes_to_skip > srcMgr->bytes_in_buffer) {
bytes_to_skip -= srcMgr->bytes_in_buffer;
if (srcMgr->fillInputBuffer(cinfo)) {
// Error: buffer ran out
srcMgr->extError = kOrtxErrorCorruptData;
}
}
srcMgr->next_input_byte += bytes_to_skip;
srcMgr->bytes_in_buffer -= bytes_to_skip;
}
}
// Terminate source (no-op)
static void termSource(j_decompress_ptr cinfo) {
// No cleanup needed
}
extError_t extError{kOrtxOK}; // Error handler
};
inline OrtxStatus image_decoder(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
const auto& dimensions = input.Shape();
if (dimensions.size() != 1ULL) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
}
// Get data & the length
const uint8_t* encoded_image_data = input.Data();
const int64_t encoded_image_data_len = input.NumberOfElement();
// check it's a PNG image or JPEG image
if (encoded_image_data_len < 8) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."};
}
OrtxStatus status{};
if (png_sig_cmp(encoded_image_data, 0, 8) == 0) {
// Decode the PNG image
png_image image;
std::memset(&image, 0, sizeof(image)); // Use std::memset for clarity
image.version = PNG_IMAGE_VERSION;
if (png_image_begin_read_from_memory(&image, encoded_image_data, static_cast<size_t>(encoded_image_data_len)) ==
0) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to read PNG image."};
}
image.format = PNG_FORMAT_RGB; // Ensure you have the appropriate format
const int height = image.height;
const int width = image.width;
const int channels = PNG_IMAGE_PIXEL_CHANNELS(image.format); // Calculates the number of channels based on format
std::vector<int64_t> output_dimensions{height, width, channels};
uint8_t* decoded_image_data = output.Allocate(output_dimensions);
if (decoded_image_data == nullptr) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to allocate memory for decoded image data."};
}
if (png_image_finish_read(&image, nullptr, decoded_image_data, 0, nullptr) == 0) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to decode PNG image."};
}
} else {
// Initialize JPEG decompression object
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
// Set up the custom memory source manager
JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len);
cinfo.src = &srcManager;
// Read the JPEG header to get image info
jpeg_read_header(&cinfo, TRUE);
// Start decompression
jpeg_start_decompress(&cinfo);
// Allocate memory for the image
std::vector<int64_t> output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components};
uint8_t* imageBuffer = output.Allocate(output_dimensions);
// Read the image data
int row_stride = cinfo.output_width * cinfo.output_components;
while (cinfo.output_scanline < cinfo.output_height) {
uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride);
jpeg_read_scanlines(&cinfo, &row_ptr, 1);
if (srcManager.extError != kOrtxOK) {
break;
}
}
if (srcManager.extError != kOrtxOK) {
status = {srcManager.extError, "[ImageDecoder]: Failed to decode JPEG image."};
}
// Finish decompression
jpeg_finish_decompress(&cinfo);
jpeg_destroy_decompress(&cinfo);
}
return status;
}

Просмотреть файл

@ -8,7 +8,7 @@
#include "image_processor.h"
#include "c_api_utils.hpp"
#include "cv2/imgcodecs/imdecode.hpp"
#include "image_decoder.hpp"
#include "image_transforms.hpp"
#include "image_transforms_phi_3.hpp"
@ -179,7 +179,7 @@ OrtxStatus ImageProcessor::PreProcess(ort_extensions::span<ImageRawData> image_d
operations_.back()->ResetTensors(allocator_);
if (status.IsOk()) {
r.SetTensors(std::move(img_result));
r.SetTensorTypes({kOrtxFloat, kOrtxInt64, kOrtxInt64});
// r.SetTensorTypes({kOrtxFloat, kOrtxInt64, kOrtxInt64});
}
return status;

Просмотреть файл

@ -0,0 +1,41 @@
import os
import tempfile
from PIL import Image
from onnxruntime_extensions.pp_api import ImageProcessor
img_proc = ImageProcessor(R"""
{
"processor": {
"name": "image_processing",
"transforms": [
{
"operation": {
"name": "decode_image",
"type": "DecodeImage",
"attrs": {
"color_space": "BGR"
}
}
},
{
"operation": {
"name": "convert_to_rgb",
"type": "ConvertRGB"
}
}
]
}
}""")
img_name = "australia.jpg"
result = img_proc.pre_process(os.path.dirname(__file__) + "/" + img_name)
np_img = img_proc.to_numpy(result)
print(np_img.shape, np_img.dtype)
# can save the image back to disk
img_rgb = np_img[0]
img_bgr = img_rgb[..., ::-1]
output_name = tempfile.gettempdir() + "/" + img_name
Image.fromarray(img_bgr).save(output_name)
print(output_name)

Просмотреть файл

@ -0,0 +1,76 @@
import os
import tempfile
from PIL import Image
from transformers import AutoProcessor
from onnxruntime_extensions.pp_api import create_processor, load_images, image_pre_process, tensor_result_get_at
import numpy as np
def regen_image(arr):
mean = np.array([0.48145466, 0.4578275, 0.40821073])
std = np.array([0.26862954, 0.26130258, 0.27577711])
# Reverse normalization
array = arr * std + mean
# Clip the values to [0, 1] range
array = np.clip(array, 0, 1)
# Convert to [0, 255] range and uint8 type
array = (array * 255).astype(np.uint8)
# Convert NumPy array to PIL Image
image = Image.fromarray(array)
return image
test_image = "test/data/processor/passport.png"
# test_image = "/temp/passport_s.png"
# test_image = "/temp/passport_s2.png"
model_id = "microsoft/Phi-3-vision-128k-instruct"
processor = create_processor("test/data/processor/phi_3_image.json")
images = load_images([test_image])
c_out = image_pre_process(processor, images)
# print(tensor_result_get_at(c_out, 0))
# print(tensor_result_get_at(c_out, 1))
image = Image.open(test_image)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
messages = [
{"role": "user", "content": "<|image_1|>\nWhat is shown in this image?"},
{"role": "assistant", "content": "The chart displays the percentage of respondents who agree with various statements about their preparedness for meetings. It shows five categories: 'Having clear and pre-defined goals for meetings', 'Knowing where to find the information I need for a meeting', 'Understanding my exact role and responsibilities when I'm invited', 'Having tools to manage admin tasks like note-taking or summarization', and 'Having more focus time to sufficiently prepare for meetings'. Each category has an associated bar indicating the level of agreement, measured on a scale from 0% to 100%."},
{"role": "user", "content": "Provide insightful questions to spark discussion."}
]
prompt = processor.tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True)
inputs = processor(prompt, [image], return_tensors="pt")
# print(inputs["pixel_values"].numpy())
# print(inputs["image_sizes"])
np.testing.assert_allclose(
inputs["image_sizes"].numpy(), tensor_result_get_at(c_out, 1))
# np.testing.assert_allclose(inputs["pixel_values"].numpy(), tensor_result_get_at(c_out, 0), rtol=1e-1)
if os.path.exists("/temp"):
temp_dir = "/temp"
else:
temp_dir = tempfile.mkdtemp()
print(f"Created temp dir: {temp_dir}")
for i in range(17):
expected = inputs["pixel_values"].numpy()[0, i]
actual = tensor_result_get_at(c_out, 0)[0, i]
e_image = regen_image(expected.transpose(1, 2, 0))
a_image = regen_image(actual.transpose(1, 2, 0))
e_image.save(f"{temp_dir}/e_{i}.png")
a_image.save(f"{temp_dir}/a_{i}.png")
try:
np.testing.assert_allclose(inputs["pixel_values"].numpy(
)[0, i], tensor_result_get_at(c_out, 0)[0, i], rtol=1e-2)
except AssertionError as e:
print(str(e))

Просмотреть файл

@ -0,0 +1,81 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include <vector>
#include <tuple>
#include <fstream>
#include <filesystem>
#include "gtest/gtest.h"
#include "shared/api/c_api_utils.hpp"
#include "shared/api/image_decoder.hpp"
using namespace ort_extensions;
TEST(ImgDecoderTest, TestPngDecoder) {
std::vector<uint8_t> png_data;
std::filesystem::path png_path = "data/processor/exceltable.png";
std::ifstream png_file(png_path, std::ios::binary);
ASSERT_TRUE(png_file.is_open());
png_file.seekg(0, std::ios::end);
png_data.resize(png_file.tellg());
png_file.seekg(0, std::ios::beg);
png_file.read(reinterpret_cast<char*>(png_data.data()), png_data.size());
png_file.close();
ortc::Tensor<uint8_t> png_tensor({static_cast<int64_t>(png_data.size())}, png_data.data());
ortc::Tensor<uint8_t> out_tensor{&CppAllocator::Instance()};
auto status = image_decoder(png_tensor, out_tensor);
ASSERT_TRUE(status.IsOk());
ASSERT_EQ(out_tensor.Shape(), std::vector<int64_t>({206, 487, 3}));
auto out_range = out_tensor.Data() + 0;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
out_range = out_tensor.Data() + 477 * 3;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
out_range = out_tensor.Data() + 243 * 206 * 3;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217}));
out_range = out_tensor.Data() + 485 * 206 * 3;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
}
TEST(ImageDecoderTest, TestJpegDecoder) {
std::vector<uint8_t> jpeg_data;
std::filesystem::path jpeg_path = "data/processor/australia.jpg";
std::ifstream jpeg_file(jpeg_path, std::ios::binary);
ASSERT_TRUE(jpeg_file.is_open());
jpeg_file.seekg(0, std::ios::end);
jpeg_data.resize(jpeg_file.tellg());
jpeg_file.seekg(0, std::ios::beg);
jpeg_file.read(reinterpret_cast<char*>(jpeg_data.data()), jpeg_data.size());
jpeg_file.close();
ortc::Tensor<uint8_t> jpeg_tensor({static_cast<int64_t>(jpeg_data.size())}, jpeg_data.data());
ortc::Tensor<uint8_t> out_tensor{&CppAllocator::Instance()};
auto status = image_decoder(jpeg_tensor, out_tensor);
ASSERT_TRUE(status.IsOk());
ASSERT_EQ(out_tensor.Shape(), std::vector<int64_t>({876, 1300, 3}));
auto out_range = out_tensor.Data() + 0;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({48, 14, 5, 48, 14, 5, 48, 14, 5, 48, 14, 5}));
out_range = out_tensor.Data() + 1296 * 3;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({221, 237, 224, 225, 236, 219, 218, 222, 199, 203, 202, 174}));
out_range = out_tensor.Data() + 438 * 1300 * 3;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({84, 68, 55, 86, 70, 55, 92, 77, 58, 101, 86, 65}));
out_range = out_tensor.Data() + 875 * 1300 * 3 + 1296 * 3;
ASSERT_EQ(std::vector<uint8_t>(out_range, out_range + 12),
std::vector<uint8_t>({208, 210, 197, 204, 206, 193, 198, 200, 187, 194, 196, 183}));
}