Unify the image operations in extensions library (#831)

* Unify the image operations in extensions library

* fix the build configuration issue

* More build fixings

* Fix the native image codec

* fix encode_image

* Add bgr/rgb conversion for encoding image

* parity check

* build break

* update PNG encoding parameters

* build break on Linux

* using MSE to compare images

* fix the discrependency between Linux and Windows

* final code refinement

* one more change

* fix the C++ warnings

---------

Co-authored-by: Sayan Shaw <52221015+sayanshaw24@users.noreply.github.com>
This commit is contained in:
Wenbing Li 2024-10-30 09:17:06 -07:00 коммит произвёл GitHub
Родитель 0e6bffa201
Коммит be5aa773e3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
20 изменённых файлов: 459 добавлений и 308 удалений

Просмотреть файл

@ -197,7 +197,7 @@ stages:
# compiled as only one operator selected.
- bash: |
set -e -x -u
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
./build.sh -DOCOS_ENABLE_C_API=ON
cd out/Linux/RelWithDebInfo
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
@ -281,7 +281,7 @@ stages:
# compiled as only one operator selected.
- bash: |
set -e -x -u
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
./build.sh -DOCOS_ENABLE_C_API=ON
cd out/Darwin/RelWithDebInfo
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests
@ -431,7 +431,7 @@ stages:
steps:
- script: |
call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
call .\build.bat -DOCOS_ENABLE_C_API=ON
cd out\Windows
ctest -C RelWithDebInfo --output-on-failure
displayName: Build ort-extensions with API enabled and run tests

Просмотреть файл

@ -208,8 +208,7 @@ class CmdBuildCMakeExt(_build_ext):
# Disabling openCV can drastically reduce the build time.
cmake_args += [
'-DOCOS_ENABLE_OPENCV_CODECS=OFF',
'-DOCOS_ENABLE_CV2=OFF',
'-DOCOS_ENABLE_VISION=OFF']
'-DOCOS_ENABLE_CV2=OFF']
if self.pp_api:
if not self.no_opencv:

Просмотреть файл

@ -72,8 +72,8 @@ option(OCOS_ENABLE_BLINGFIRE "Enable operators depending on the Blingfire librar
option(OCOS_ENABLE_MATH "Enable math tensor operators building" ON)
option(OCOS_ENABLE_DLIB "Enable operators like Inverse depending on DLIB" ON)
option(OCOS_ENABLE_VENDOR_IMAGE_CODECS "Enable and use vendor image codecs if supported over libpng & libjpeg" OFF)
option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." ON)
option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" ON)
option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." OFF)
option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" OFF)
option(OCOS_ENABLE_VISION "Enable the operators in `operators/vision`" ON)
option(OCOS_ENABLE_AUDIO "Enable the operators for audio processing" ON)
option(OCOS_ENABLE_AZURE "Enable the operators for azure execution provider" OFF)
@ -383,7 +383,7 @@ if (OCOS_USE_CUDA)
endif()
# enable the opencv dependency if we have ops that require it
if(OCOS_ENABLE_CV2 OR OCOS_ENABLE_VISION)
if(OCOS_ENABLE_CV2)
set(_ENABLE_OPENCV ON)
message(STATUS "Fetch opencv")
include(opencv)
@ -402,10 +402,6 @@ if(OCOS_ENABLE_CV2)
endif()
if(OCOS_ENABLE_VISION)
if(NOT OCOS_ENABLE_OPENCV_CODECS)
message(FATAL_ERROR "OCOS_ENABLE_VISION requires OCOS_ENABLE_OPENCV_CODECS to be ON")
endif()
file(GLOB TARGET_SRC_VISION "operators/vision/*.cc" "operators/vision/*.h*")
list(APPEND TARGET_SRC ${TARGET_SRC_VISION})
endif()
@ -653,6 +649,25 @@ endif()
if(OCOS_ENABLE_VISION)
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_VISION)
set(_DEFAULT_CODEC_ENABLE ON)
if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
if(WIN32)
# Use WIC on Windows. Nothing to be done
set(_DEFAULT_CODEC_ENABLE OFF)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
# Use ImageIO on Apple platforms
set(_DEFAULT_CODEC_ENABLE OFF)
target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
endif()
endif()
set(_DEFAULT_CODEC_ENABLE ON) # libpng and libjpeg can be optional after EncodeImage with native support too.
if(_DEFAULT_CODEC_ENABLE)
include(ext_imgcodecs)
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
endif()
endif()
if(OCOS_ENABLE_AZURE)
@ -740,24 +755,6 @@ if(OCOS_ENABLE_C_API)
if(OCOS_ENABLE_DLIB)
file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
if(WIN32)
# Use WIC on Windows. Nothing to be done
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
# Use ImageIO on Apple platforms
target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
else()
# Fallback to libpng & libjpeg on all other platforms
include(ext_imgcodecs)
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
endif()
else()
include(ext_imgcodecs)
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
endif()
endif()
endif()

Просмотреть файл

@ -61,7 +61,7 @@ set(lib_srcs
)
add_library(${PNG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs})
target_include_directories(${PNG_LIBRARY} BEFORE PRIVATE ${zlib_SOURCE_DIR})
target_include_directories(${PNG_LIBRARY} BEFORE PUBLIC ${zlib_SOURCE_DIR})
if(MSVC)
target_compile_definitions(${PNG_LIBRARY} PRIVATE -D_CRT_SECURE_NO_DEPRECATE)

Просмотреть файл

@ -4,9 +4,7 @@
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_OPENCV_CODECS OFF CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_CV2 OFF CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_VISION OFF CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_VISION ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_VENDOR_IMAGE_CODECS ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE)

Просмотреть файл

@ -3,3 +3,4 @@
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
set(OCOS_BUILD_SHARED_LIB OFF CACHE INTERNAL "" FORCE)

Просмотреть файл

@ -1,40 +0,0 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "decode_image.hpp"
#include <opencv2/imgcodecs.hpp>
#include "narrow.h"
namespace ort_extensions {
void KernelDecodeImage::Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
// Setup inputs
const auto& dimensions = input.Shape();
if (dimensions.size() != 1ULL) {
ORTX_CXX_API_THROW("[DecodeImage]: Raw image bytes with 1D shape expected.", ORT_INVALID_ARGUMENT);
}
const int64_t encoded_image_data_len = input.NumberOfElement();
// Decode the image
const std::vector<int32_t> encoded_image_sizes{1, static_cast<int32_t>(encoded_image_data_len)};
const void* encoded_image_data = input.Data();
const cv::Mat encoded_image(encoded_image_sizes, CV_8UC1, const_cast<void*>(encoded_image_data));
const cv::Mat decoded_image = cv::imdecode(encoded_image, cv::IMREAD_COLOR);
if (decoded_image.data == nullptr) {
ORTX_CXX_API_THROW("[DecodeImage] Invalid input. Failed to decode image.", ORT_INVALID_ARGUMENT);
};
// Setup output & copy to destination
const cv::Size decoded_image_size = decoded_image.size();
const int64_t height = decoded_image_size.height;
const int64_t width = decoded_image_size.width;
const int64_t colors = decoded_image.elemSize(); // == 3 as it's BGR
const std::vector<int64_t> output_dims{height, width, colors};
uint8_t* decoded_image_data = output.Allocate(output_dims);
memcpy(decoded_image_data, decoded_image.data, narrow<size_t>(height * width * colors));
}
} // namespace ort_extensions

Просмотреть файл

@ -3,19 +3,81 @@
#pragma once
#include "ocos.h"
#include "string_utils.h"
#include <cstring>
#include <variant>
#include <unordered_map>
#include <cstdint>
#include "ext_status.h"
#include "op_def_struct.h"
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
#if WIN32
#include "image_decoder_win32.hpp"
#elif __APPLE__
#include "image_decoder_darwin.hpp"
#else
#include "image_decoder.hpp"
#endif
#else
#include "image_decoder.hpp"
#endif
namespace ort_extensions {
struct DecodeImage: public internal::DecodeImage {
void decode_image(const ortc::Tensor<uint8_t>& input,
ortc::Tensor<uint8_t>& output);
template <typename DictT>
OrtxStatus Init(const DictT& attrs) {
auto status = internal::DecodeImage::OnInit();
if (!status.IsOk()) {
return status;
}
struct KernelDecodeImage : BaseKernel {
KernelDecodeImage(const OrtApi& api, const OrtKernelInfo& info) : BaseKernel(api, info) {}
void Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const;
for (const auto& [key, value] : attrs) {
if (key == "color_space") {
auto color_space = std::get<std::string>(value);
if (color_space == "RGB") {
is_bgr_ = false;
} else if (color_space == "BGR") {
is_bgr_ = true;
} else {
return {kOrtxErrorInvalidArgument, "[DecodeImage]: Invalid color_space"};
}
} else {
return {kOrtxErrorInvalidArgument, "[Resize]: Invalid argument"};
}
}
return {};
}
OrtStatusPtr OnModelAttach(const OrtApi& api, const OrtKernelInfo& info) {
is_bgr_ = true;
return Init(std::unordered_map<std::string, std::variant<std::string>>());
}
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const{
auto status = internal::DecodeImage::Compute(input, output);
if (!status.IsOk()) {
return status;
}
if (is_bgr_) {
// need to convert rgb to bgr for backward compatibility
const auto& dimensions = output.Shape();
uint8_t* rgb_data = const_cast<uint8_t*>(output.Data());
// do an inplace swap of the channels
for (int y = 0; y < dimensions[0]; ++y) {
for (int x = 0; x < dimensions[1]; ++x) {
std::swap(rgb_data[(y * dimensions[1] + x) * 3 + 0], rgb_data[(y * dimensions[1] + x) * 3 + 2]);
}
}
}
return status;
}
private:
bool is_bgr_{}; // flag to indicate if the output is in BGR format
};
} // namespace ort_extensions

Просмотреть файл

@ -1,40 +1,139 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "zlib.h"
#if ZLIB_VERNUM != 0x12b0
// the following is a trick to show the invalid version number for the diagnosis.
#define STR_VERSION(x) STR_NUM(x)
#define STR_NUM(x) #x
#pragma message "Invalid zlib version: " STR_VERSION(ZLIB_VERNUM)
#error "stopped"
#endif
#include "png.h"
#include "jpeglib.h"
#include "op_def_struct.h"
#include "ext_status.h"
#include "encode_image.hpp"
#include <opencv2/imgcodecs.hpp>
namespace ort_extensions {
void KernelEncodeImage::Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
// Setup inputs
const auto dimensions_bgr = input.Shape();
const auto& dimensions_bgr = input.Shape();
if (dimensions_bgr.size() != 3 || dimensions_bgr[2] != 3) {
// expect {H, W, C} as that's the inverse of what decode_image produces.
// we have no way to check if it's BGR or RGB though
ORTX_CXX_API_THROW("[EncodeImage] requires rank 3 BGR input in channels last format.", ORT_INVALID_ARGUMENT);
}
// Get data & the length
std::vector<int32_t> height_x_width{static_cast<int32_t>(dimensions_bgr[0]), // H
static_cast<int32_t>(dimensions_bgr[1])}; // W
int32_t height = static_cast<int32_t>(dimensions_bgr[0]); // H
int32_t width = static_cast<int32_t>(dimensions_bgr[1]); // W
const int32_t color_space = 3;
const uint8_t* bgr_data = input.Data();
unsigned char* outbuffer = nullptr;
std::vector<uint8_t> png_buffer;
size_t outsize = 0;
// data is const uint8_t but opencv2 wants void*.
const void* bgr_data = input.Data();
const cv::Mat bgr_image(height_x_width, CV_8UC3, const_cast<void*>(bgr_data));
// don't know output size ahead of time so need to encode and then copy to output
std::vector<uint8_t> encoded_image;
if (!cv::imencode(extension_, bgr_image, encoded_image)) {
ORTX_CXX_API_THROW("[EncodeImage] Image encoding failed.", ORT_INVALID_ARGUMENT);
auto rgb_data = std::make_unique<uint8_t[]>(height * width * color_space);
for (int32_t y = 0; y < height; ++y) {
for (int32_t x = 0; x < width; ++x) {
rgb_data[(y * width + x) * color_space + 0] = bgr_data[(y * width + x) * color_space + 2];
rgb_data[(y * width + x) * color_space + 1] = bgr_data[(y * width + x) * color_space + 1];
rgb_data[(y * width + x) * color_space + 2] = bgr_data[(y * width + x) * color_space + 0];
}
}
// Setup output & copy to destination
std::vector<int64_t> output_dimensions{static_cast<int64_t>(encoded_image.size())};
if (extension_ == ".jpg") {
struct jpeg_compress_struct cinfo;
struct jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_compress(&cinfo);
jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
cinfo.image_width = width;
cinfo.image_height = height;
cinfo.input_components = color_space;
cinfo.in_color_space = JCS_RGB;
// compression parameters is compatible with opencv
jpeg_set_defaults(&cinfo);
jpeg_set_quality(&cinfo, 95, TRUE);
cinfo.optimize_coding = FALSE;
cinfo.restart_interval = 0;
cinfo.q_scale_factor[0] = jpeg_quality_scaling(-1);
cinfo.q_scale_factor[1] = jpeg_quality_scaling(-1);
const int32_t sampling_factor = 0x221111; // 4:2:0 IMWRITE_JPEG_SAMPLING_FACTOR_420
cinfo.comp_info[0].v_samp_factor = (sampling_factor >> 16 ) & 0xF;
cinfo.comp_info[0].h_samp_factor = (sampling_factor >> 20 ) & 0xF;
cinfo.comp_info[1].v_samp_factor = 1;
cinfo.comp_info[1].h_samp_factor = 1;
// jpeg_default_qtables( &cinfo, TRUE );
jpeg_start_compress(&cinfo, TRUE);
JSAMPROW row_pointer[1];
while (cinfo.next_scanline < cinfo.image_height) {
row_pointer[0] = (JSAMPROW)&rgb_data[cinfo.next_scanline * cinfo.image_width * color_space];
jpeg_write_scanlines(&cinfo, row_pointer, 1);
}
jpeg_finish_compress(&cinfo);
jpeg_destroy_compress(&cinfo);
} else if (extension_ == ".png") {
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (!png_ptr) {
ORTX_CXX_API_THROW("[EncodeImage] PNG create write struct failed.", ORT_INVALID_ARGUMENT);
}
png_infop info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr) {
png_destroy_write_struct(&png_ptr, nullptr);
ORTX_CXX_API_THROW("[EncodeImage] PNG create info struct failed.", ORT_INVALID_ARGUMENT);
}
if (setjmp(png_jmpbuf(png_ptr))) {
png_destroy_write_struct(&png_ptr, &info_ptr);
ORTX_CXX_API_THROW("[EncodeImage] PNG encoding failed.", ORT_INVALID_ARGUMENT);
}
png_set_write_fn(png_ptr, &png_buffer, [](png_structp png_ptr, png_bytep data, png_size_t length) {
auto p = reinterpret_cast<std::vector<uint8_t>*>(png_get_io_ptr(png_ptr));
p->insert(p->end(), data, data + length);
}, nullptr);
// sync with openCV parameters
png_set_filter(png_ptr, PNG_FILTER_TYPE_BASE, PNG_FILTER_SUB);
png_set_compression_level(png_ptr, 1);
png_set_compression_strategy(png_ptr, 3);
png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_RGB,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
png_write_info(png_ptr, info_ptr);
for (int32_t y = 0; y < height; ++y) {
png_write_row(png_ptr, (png_bytep)&rgb_data[y * width * color_space]);
}
png_write_flush(png_ptr);
png_write_end(png_ptr, info_ptr);
png_destroy_write_struct(&png_ptr, &info_ptr);
outbuffer = png_buffer.data();
outsize = png_buffer.size();
} else {
ORTX_CXX_API_THROW("[EncodeImage] Unsupported image format.", ORT_INVALID_ARGUMENT);
}
std::vector<int64_t> output_dimensions{static_cast<int64_t>(outsize)};
uint8_t* data = output.Allocate(output_dimensions);
memcpy(data, encoded_image.data(), encoded_image.size());
memcpy(data, outbuffer, outsize);
if (outbuffer != png_buffer.data() && outbuffer != nullptr) {
free(outbuffer);
}
}
} // namespace ort_extensions

Просмотреть файл

@ -0,0 +1,208 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <cstdint>
#include "png.h"
#include "jpeglib.h"
#include "op_def_struct.h"
#include "ext_status.h"
namespace ort_extensions::internal {
struct DecodeImage {
OrtxStatus OnInit() { return {}; }
OrtxStatus DecodePNG(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len,
ortc::Tensor<uint8_t>& output) const {
// Decode the PNG image
png_structp png = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (!png) {
return {kOrtxErrorCorruptData, "[ImageDecoder]: Failed to create png read struct."};
}
png_infop info = png_create_info_struct(png);
if (!info) {
png_destroy_read_struct(&png, nullptr, nullptr);
return {kOrtxErrorCorruptData, "[ImageDecoder]: Failed to create png info struct."};
}
if (setjmp(png_jmpbuf(png))) {
png_destroy_read_struct(&png, &info, nullptr);
return {kOrtxErrorCorruptData, "[ImageDecoder]: Error during png creation."};
}
struct BufferState {
const uint8_t* ptr;
png_size_t size;
} bufferState = {encoded_image_data, static_cast<png_size_t>(encoded_image_data_len)};
png_set_read_fn(png, &bufferState, [](png_structp pngPtr, png_bytep data, png_size_t length) {
BufferState* state = static_cast<BufferState*>(png_get_io_ptr(pngPtr));
if (length > state->size) png_error(pngPtr, "Read Error: Exceeded buffer size");
memcpy(data, state->ptr, length);
state->ptr += length;
state->size -= length;
});
png_read_info(png, info);
auto width = png_get_image_width(png, info);
auto height = png_get_image_height(png, info);
png_byte color_type = png_get_color_type(png, info);
png_byte bit_depth = png_get_bit_depth(png, info);
if (bit_depth == 16) {
png_set_strip_16(png);
}
if (color_type == PNG_COLOR_TYPE_PALETTE) {
png_set_palette_to_rgb(png);
}
if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) {
png_set_expand_gray_1_2_4_to_8(png);
}
if (png_get_valid(png, info, PNG_INFO_tRNS)) {
png_set_tRNS_to_alpha(png);
}
if (color_type == PNG_COLOR_TYPE_RGB || color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_PALETTE) {
png_set_filler(png, 0xFF, PNG_FILLER_AFTER);
}
if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
png_set_gray_to_rgb(png);
}
png_read_update_info(png, info);
std::vector<int64_t> output_dimensions{height, width, 3};
uint8_t* output_data = output.Allocate(output_dimensions);
// Read the image row by row
std::vector<uint8_t> row(width * 4);
for (uint32_t i = 0; i < height; ++i) {
png_read_row(png, row.data(), nullptr);
for (uint32_t j = 0; j < width; ++j) {
for (uint32_t k = 0; k < 3; ++k) {
output_data[i * width * 3 + j * 3 + k] = row[j * 4 + k];
}
}
}
png_destroy_read_struct(&png, &info, nullptr);
return {};
}
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
const auto& dimensions = input.Shape();
if (dimensions.size() != 1ULL) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
}
// Get data & the length
const uint8_t* encoded_image_data = input.Data();
const int64_t encoded_image_data_len = input.NumberOfElement();
// check it's a PNG image or JPEG image
if (encoded_image_data_len < 8) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."};
}
if (png_sig_cmp(encoded_image_data, 0, 8) == 0) {
return DecodePNG(encoded_image_data, encoded_image_data_len, output);
} else {
// Initialize JPEG decompression object
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
// Set up the custom memory source manager
JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len);
cinfo.src = &srcManager;
// Read the JPEG header to get image info
jpeg_read_header(&cinfo, TRUE);
// Start decompression
jpeg_start_decompress(&cinfo);
// Allocate memory for the image
std::vector<int64_t> output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components};
uint8_t* imageBuffer = output.Allocate(output_dimensions);
// Read the image data
int row_stride = cinfo.output_width * cinfo.output_components;
while (cinfo.output_scanline < cinfo.output_height) {
uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride);
jpeg_read_scanlines(&cinfo, &row_ptr, 1);
if (srcManager.extError != kOrtxOK) {
break;
}
}
if (srcManager.extError != kOrtxOK) {
return {kOrtxErrorInternal, "[ImageDecoder]: Failed to decode JPEG image."};
}
// Finish decompression
jpeg_finish_decompress(&cinfo);
jpeg_destroy_decompress(&cinfo);
}
return {};
}
class JMemorySourceManager : public jpeg_source_mgr {
public:
// Constructor
JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) {
// Initialize source fields
next_input_byte = reinterpret_cast<const JOCTET*>(encoded_image_data);
bytes_in_buffer = static_cast<size_t>(encoded_image_data_len);
init_source = &JMemorySourceManager::initSource;
fill_input_buffer = &JMemorySourceManager::fillInputBuffer;
skip_input_data = &JMemorySourceManager::skipInputData;
resync_to_restart = jpeg_resync_to_restart;
term_source = &JMemorySourceManager::termSource;
}
// Initialize source (no-op)
static void initSource(j_decompress_ptr cinfo) {
// No initialization needed
}
// Fill input buffer (not used here, always return FALSE)
static boolean fillInputBuffer(j_decompress_ptr cinfo) {
return FALSE; // Buffer is managed manually
}
// Skip input data
static void skipInputData(j_decompress_ptr cinfo, long num_bytes) {
JMemorySourceManager* srcMgr = reinterpret_cast<JMemorySourceManager*>(cinfo->src);
if (num_bytes > 0) {
size_t bytes_to_skip = static_cast<size_t>(num_bytes);
while (bytes_to_skip > srcMgr->bytes_in_buffer) {
bytes_to_skip -= srcMgr->bytes_in_buffer;
if (srcMgr->fillInputBuffer(cinfo)) {
// Error: buffer ran out
srcMgr->extError = kOrtxErrorCorruptData;
}
}
srcMgr->next_input_byte += bytes_to_skip;
srcMgr->bytes_in_buffer -= bytes_to_skip;
}
}
// Terminate source (no-op)
static void termSource(j_decompress_ptr cinfo) {
// No cleanup needed
}
extError_t extError{kOrtxOK}; // Error handler
};
};
} // namespace ort_extensions::internal

Просмотреть файл

@ -8,9 +8,10 @@
#include "op_def_struct.h"
#include "ext_status.h"
namespace ort_extensions::internal {
struct DecodeImage {
template <typename DictT>
OrtxStatus Init(const DictT& attrs) {
OrtxStatus OnInit() {
CFStringRef optionKeys[2];
CFTypeRef optionValues[2];
optionKeys[0] = kCGImageSourceShouldCache;
@ -25,7 +26,7 @@ struct DecodeImage {
return {};
}
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
const auto& dimensions = input.Shape();
if (dimensions.size() != 1ULL) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
@ -114,3 +115,4 @@ struct DecodeImage {
private:
CFDictionaryRef imageSourceOptions_{NULL};
};
} // namespace ort_extensions::internal

Просмотреть файл

@ -12,10 +12,9 @@
#include "op_def_struct.h"
#include "ext_status.h"
namespace ort_extensions::internal {
struct DecodeImage {
template <typename DictT>
OrtxStatus Init(const DictT& attrs) {
OrtxStatus OnInit() {
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
if (FAILED(hr)) {
return {kOrtxErrorInternal, "[ImageDecoder]: Failed when CoInitialize."};
@ -29,7 +28,7 @@ struct DecodeImage {
return {};
}
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const{
const auto& dimensions = input.Shape();
if (dimensions.size() != 1ULL) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
@ -147,3 +146,4 @@ struct DecodeImage {
private:
winrt::com_ptr<IWICImagingFactory> pIWICFactory_;
};
} // namespace ort_extensions::internal

Просмотреть файл

@ -1,16 +1,15 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "ocos.h"
#include "decode_image.hpp"
#include "encode_image.hpp"
#include "draw_bounding_box.hpp"
const std::vector<const OrtCustomOp*>& VisionLoader() {
static OrtOpLoader op_loader(CustomCpuStruct("EncodeImage", ort_extensions::KernelEncodeImage),
CustomCpuStruct("DecodeImage", ort_extensions::KernelDecodeImage),
static OrtOpLoader op_loader(CustomCpuStructV2("DecodeImage", ort_extensions::DecodeImage),
CustomCpuStruct("EncodeImage", ort_extensions::KernelEncodeImage),
CustomCpuStruct("DrawBoundingBoxes", ort_extensions::DrawBoundingBoxes));
return op_loader.GetCustomOps();
}
FxLoadCustomOpFactory LoadCustomOpClasses_Vision = VisionLoader;
FxLoadCustomOpFactory LoadCustomOpClasses_Vision = VisionLoader;

Просмотреть файл

@ -1,159 +0,0 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <cstdint>
#include "png.h"
#include "jpeglib.h"
#include "op_def_struct.h"
#include "ext_status.h"
OrtxStatus image_decoder(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output);
struct DecodeImage {
template <typename DictT>
OrtxStatus Init(const DictT& attrs) {
return {};
}
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
return image_decoder(input, output);
}
};
class JMemorySourceManager : public jpeg_source_mgr {
public:
// Constructor
JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) {
// Initialize source fields
next_input_byte = reinterpret_cast<const JOCTET*>(encoded_image_data);
bytes_in_buffer = static_cast<size_t>(encoded_image_data_len);
init_source = &JMemorySourceManager::initSource;
fill_input_buffer = &JMemorySourceManager::fillInputBuffer;
skip_input_data = &JMemorySourceManager::skipInputData;
resync_to_restart = jpeg_resync_to_restart;
term_source = &JMemorySourceManager::termSource;
}
// Initialize source (no-op)
static void initSource(j_decompress_ptr cinfo) {
// No initialization needed
}
// Fill input buffer (not used here, always return FALSE)
static boolean fillInputBuffer(j_decompress_ptr cinfo) {
return FALSE; // Buffer is managed manually
}
// Skip input data
static void skipInputData(j_decompress_ptr cinfo, long num_bytes) {
JMemorySourceManager* srcMgr = reinterpret_cast<JMemorySourceManager*>(cinfo->src);
if (num_bytes > 0) {
size_t bytes_to_skip = static_cast<size_t>(num_bytes);
while (bytes_to_skip > srcMgr->bytes_in_buffer) {
bytes_to_skip -= srcMgr->bytes_in_buffer;
if (srcMgr->fillInputBuffer(cinfo)) {
// Error: buffer ran out
srcMgr->extError = kOrtxErrorCorruptData;
}
}
srcMgr->next_input_byte += bytes_to_skip;
srcMgr->bytes_in_buffer -= bytes_to_skip;
}
}
// Terminate source (no-op)
static void termSource(j_decompress_ptr cinfo) {
// No cleanup needed
}
extError_t extError{kOrtxOK}; // Error handler
};
inline OrtxStatus image_decoder(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
const auto& dimensions = input.Shape();
if (dimensions.size() != 1ULL) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
}
// Get data & the length
const uint8_t* encoded_image_data = input.Data();
const int64_t encoded_image_data_len = input.NumberOfElement();
// check it's a PNG image or JPEG image
if (encoded_image_data_len < 8) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."};
}
OrtxStatus status{};
if (png_sig_cmp(encoded_image_data, 0, 8) == 0) {
// Decode the PNG image
png_image image;
std::memset(&image, 0, sizeof(image)); // Use std::memset for clarity
image.version = PNG_IMAGE_VERSION;
if (png_image_begin_read_from_memory(&image, encoded_image_data, static_cast<size_t>(encoded_image_data_len)) ==
0) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to read PNG image."};
}
image.format = PNG_FORMAT_RGB; // Ensure you have the appropriate format
const int height = image.height;
const int width = image.width;
const int channels = PNG_IMAGE_PIXEL_CHANNELS(image.format); // Calculates the number of channels based on format
std::vector<int64_t> output_dimensions{height, width, channels};
uint8_t* decoded_image_data = output.Allocate(output_dimensions);
if (decoded_image_data == nullptr) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to allocate memory for decoded image data."};
}
if (png_image_finish_read(&image, nullptr, decoded_image_data, 0, nullptr) == 0) {
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to decode PNG image."};
}
} else {
// Initialize JPEG decompression object
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
// Set up the custom memory source manager
JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len);
cinfo.src = &srcManager;
// Read the JPEG header to get image info
jpeg_read_header(&cinfo, TRUE);
// Start decompression
jpeg_start_decompress(&cinfo);
// Allocate memory for the image
std::vector<int64_t> output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components};
uint8_t* imageBuffer = output.Allocate(output_dimensions);
// Read the image data
int row_stride = cinfo.output_width * cinfo.output_components;
while (cinfo.output_scanline < cinfo.output_height) {
uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride);
jpeg_read_scanlines(&cinfo, &row_ptr, 1);
if (srcManager.extError != kOrtxOK) {
break;
}
}
if (srcManager.extError != kOrtxOK) {
status = {srcManager.extError, "[ImageDecoder]: Failed to decode JPEG image."};
}
// Finish decompression
jpeg_finish_decompress(&cinfo);
jpeg_destroy_decompress(&cinfo);
}
return status;
}

Просмотреть файл

@ -6,19 +6,9 @@
#include "nlohmann/json.hpp"
#include "file_sys.h"
#include "vision/decode_image.hpp"
#include "image_processor.h"
#include "c_api_utils.hpp"
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
#if WIN32
#include "image_decoder_win32.hpp"
#elif __APPLE__
#include "image_decoder_darwin.hpp"
#else
#include "image_decoder.hpp"
#endif
#else
#include "image_decoder.hpp"
#endif
#include "image_transforms.hpp"
#include "image_transforms_phi_3.hpp"
@ -40,7 +30,7 @@ using namespace ort_extensions;
using json = nlohmann::json;
Operation::KernelRegistry ImageProcessor::kernel_registry_ = {
{"DecodeImage", []() { return CreateKernelInstance(&DecodeImage::Compute); }},
{"DecodeImage", []() { return CreateKernelInstance(&ort_extensions::DecodeImage::Compute); }},
{"Resize", []() { return CreateKernelInstance(&Resize::Compute); }},
{"Rescale", []() { return CreateKernelInstance(&Rescale::Compute); }},
{"Normalize", []() { return CreateKernelInstance(&Normalize::Compute); }},

Просмотреть файл

@ -230,7 +230,7 @@ struct Llama3ImageTransform {
int64_t max_image_tiles, int64_t tile_size) {
{
auto possible_tile_arrangements = GetAllSupportedAspectRatios(max_image_tiles);
std::vector<std::pair<int, int>> possible_canvas_sizes;
std::vector<std::pair<int64_t, int64_t>> possible_canvas_sizes;
for (const auto& arrangement : possible_tile_arrangements) {
possible_canvas_sizes.emplace_back(arrangement.first * tile_size, arrangement.second * tile_size);
@ -263,7 +263,7 @@ struct Llama3ImageTransform {
selected_scale = *std::max_element(downscaling_options.begin(), downscaling_options.end());
}
std::vector<std::pair<int, int>> chosen_canvas;
std::vector<std::pair<int64_t, int64_t>> chosen_canvas;
for (size_t i = 0; i < scales.size(); ++i) {
if (std::abs(scales[i] - selected_scale) < 1e-9) {
chosen_canvas.push_back(possible_canvas_sizes[i]);
@ -272,7 +272,7 @@ struct Llama3ImageTransform {
if (chosen_canvas.size() > 1) {
auto optimal_canvas = std::min_element(chosen_canvas.begin(), chosen_canvas.end(),
[](const std::pair<int, int>& a, const std::pair<int, int>& b) {
[](const std::pair<int64_t, int64_t>& a, const std::pair<int64_t, int64_t>& b) {
return (a.first * a.second) < (b.first * b.second);
});
return *optimal_canvas;

Просмотреть файл

@ -209,6 +209,11 @@ std::unique_ptr<KernelDef> CreateKernelInstance(OrtxStatus (T::*method)(Args...)
return std::make_unique<KernelStruct<T, Args...>>(method);
}
template <typename T, typename... Args>
std::unique_ptr<KernelDef> CreateKernelInstance(OrtxStatus (T::*method)(Args...) const) {
return std::make_unique<KernelStruct<T, Args...>>(reinterpret_cast<OrtxStatus (T::*)(Args...)>(method));
}
class Operation {
public:
using KernelRegistry = std::unordered_map<std::string_view, std::function<std::unique_ptr<KernelDef>()>>;

Просмотреть файл

@ -9,23 +9,14 @@
#include "gtest/gtest.h"
#include "shared/api/c_api_utils.hpp"
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
#if WIN32
#include "shared/api/image_decoder_win32.hpp"
#elif __APPLE__
#include "shared/api/image_decoder_darwin.hpp"
#else
#include "shared/api/image_decoder.hpp"
#endif
#else
#include "shared/api/image_decoder.hpp"
#endif
#include "vision/decode_image.hpp"
using namespace ort_extensions;
TEST(ImgDecoderTest, TestPngDecoder) {
DecodeImage image_decoder;
image_decoder.Init(NULL);
ort_extensions::DecodeImage image_decoder;
image_decoder.Init(std::unordered_map<std::string, std::variant<std::string>>());
std::vector<uint8_t> png_data;
std::filesystem::path png_path = "data/processor/exceltable.png";
std::ifstream png_file(png_path, std::ios::binary);
@ -60,8 +51,8 @@ TEST(ImgDecoderTest, TestPngDecoder) {
}
TEST(ImageDecoderTest, TestJpegDecoder) {
DecodeImage image_decoder;
image_decoder.Init(NULL);
ort_extensions::DecodeImage image_decoder;
image_decoder.Init(std::unordered_map<std::string, std::variant<std::string>>());
std::vector<uint8_t> jpeg_data;
std::filesystem::path jpeg_path = "data/processor/australia.jpg";
std::ifstream jpeg_file(jpeg_path, std::ios::binary);
@ -139,8 +130,8 @@ TEST(ImageDecoderTest, TestJpegDecoder) {
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
#if defined(WIN32) || defined(__APPLE__)
TEST(ImageDecoderTest, TestTiffDecoder) {
DecodeImage image_decoder;
image_decoder.Init(NULL);
ort_extensions::DecodeImage image_decoder;
image_decoder.Init(std::unordered_map<std::string, std::variant<std::string>>());
std::vector<uint8_t> tiff_data;
std::filesystem::path tiff_path = "data/processor/canoe.tif";
std::ifstream tiff_file(tiff_path, std::ios::binary);
@ -174,4 +165,4 @@ TEST(ImageDecoderTest, TestTiffDecoder) {
std::vector<uint8_t>({82, 66, 49, 74, 66, 57, 74, 66, 49, 82, 74, 57}));
}
#endif
#endif
#endif

Просмотреть файл

@ -3,7 +3,8 @@ import numpy as np
from PIL import Image
from onnxruntime_extensions import OrtPyFunction, ONNXRuntimeError, util
@unittest.skip("The opencv based operators are not supported in the offical release any more"
"please build from source code to with OCOS_ENABLE_CV2 and OCOS_ENABLE_OPENCV_CODECS enabled.")
class TestOpenCV(unittest.TestCase):
@classmethod
def setUpClass(cls):

Просмотреть файл

@ -34,8 +34,6 @@ CMAKE_FLAG_TO_OPS = {
"SegmentExtraction",
],
"OCOS_ENABLE_OPENCV_CODECS": [
"DecodeImage",
"EncodeImage",
"ImageReader"
],
"OCOS_ENABLE_RE2_REGEX": [