Unify the image operations in extensions library (#831)
* Unify the image operations in extensions library * fix the build configuration issue * More build fixings * Fix the native image codec * fix encode_image * Add bgr/rgb conversion for encoding image * parity check * build break * update PNG encoding parameters * build break on Linux * using MSE to compare images * fix the discrependency between Linux and Windows * final code refinement * one more change * fix the C++ warnings --------- Co-authored-by: Sayan Shaw <52221015+sayanshaw24@users.noreply.github.com>
This commit is contained in:
Родитель
0e6bffa201
Коммит
be5aa773e3
|
@ -197,7 +197,7 @@ stages:
|
|||
# compiled as only one operator selected.
|
||||
- bash: |
|
||||
set -e -x -u
|
||||
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
|
||||
./build.sh -DOCOS_ENABLE_C_API=ON
|
||||
cd out/Linux/RelWithDebInfo
|
||||
ctest -C RelWithDebInfo --output-on-failure
|
||||
displayName: Build ort-extensions with API enabled and run tests
|
||||
|
@ -281,7 +281,7 @@ stages:
|
|||
# compiled as only one operator selected.
|
||||
- bash: |
|
||||
set -e -x -u
|
||||
./build.sh -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
|
||||
./build.sh -DOCOS_ENABLE_C_API=ON
|
||||
cd out/Darwin/RelWithDebInfo
|
||||
ctest -C RelWithDebInfo --output-on-failure
|
||||
displayName: Build ort-extensions with API enabled and run tests
|
||||
|
@ -431,7 +431,7 @@ stages:
|
|||
|
||||
steps:
|
||||
- script: |
|
||||
call .\build.bat -DOCOS_ENABLE_C_API=ON -DOCOS_ENABLE_CV2=OFF -DOCOS_ENABLE_VISION=OFF -DOCOS_ENABLE_OPENCV_CODECS=OFF
|
||||
call .\build.bat -DOCOS_ENABLE_C_API=ON
|
||||
cd out\Windows
|
||||
ctest -C RelWithDebInfo --output-on-failure
|
||||
displayName: Build ort-extensions with API enabled and run tests
|
||||
|
|
|
@ -208,8 +208,7 @@ class CmdBuildCMakeExt(_build_ext):
|
|||
# Disabling openCV can drastically reduce the build time.
|
||||
cmake_args += [
|
||||
'-DOCOS_ENABLE_OPENCV_CODECS=OFF',
|
||||
'-DOCOS_ENABLE_CV2=OFF',
|
||||
'-DOCOS_ENABLE_VISION=OFF']
|
||||
'-DOCOS_ENABLE_CV2=OFF']
|
||||
|
||||
if self.pp_api:
|
||||
if not self.no_opencv:
|
||||
|
|
|
@ -72,8 +72,8 @@ option(OCOS_ENABLE_BLINGFIRE "Enable operators depending on the Blingfire librar
|
|||
option(OCOS_ENABLE_MATH "Enable math tensor operators building" ON)
|
||||
option(OCOS_ENABLE_DLIB "Enable operators like Inverse depending on DLIB" ON)
|
||||
option(OCOS_ENABLE_VENDOR_IMAGE_CODECS "Enable and use vendor image codecs if supported over libpng & libjpeg" OFF)
|
||||
option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." ON)
|
||||
option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" ON)
|
||||
option(OCOS_ENABLE_OPENCV_CODECS "Enable cv2 and vision operators that require opencv imgcodecs." OFF)
|
||||
option(OCOS_ENABLE_CV2 "Enable the operators in `operators/cv2`" OFF)
|
||||
option(OCOS_ENABLE_VISION "Enable the operators in `operators/vision`" ON)
|
||||
option(OCOS_ENABLE_AUDIO "Enable the operators for audio processing" ON)
|
||||
option(OCOS_ENABLE_AZURE "Enable the operators for azure execution provider" OFF)
|
||||
|
@ -383,7 +383,7 @@ if (OCOS_USE_CUDA)
|
|||
endif()
|
||||
|
||||
# enable the opencv dependency if we have ops that require it
|
||||
if(OCOS_ENABLE_CV2 OR OCOS_ENABLE_VISION)
|
||||
if(OCOS_ENABLE_CV2)
|
||||
set(_ENABLE_OPENCV ON)
|
||||
message(STATUS "Fetch opencv")
|
||||
include(opencv)
|
||||
|
@ -402,10 +402,6 @@ if(OCOS_ENABLE_CV2)
|
|||
endif()
|
||||
|
||||
if(OCOS_ENABLE_VISION)
|
||||
if(NOT OCOS_ENABLE_OPENCV_CODECS)
|
||||
message(FATAL_ERROR "OCOS_ENABLE_VISION requires OCOS_ENABLE_OPENCV_CODECS to be ON")
|
||||
endif()
|
||||
|
||||
file(GLOB TARGET_SRC_VISION "operators/vision/*.cc" "operators/vision/*.h*")
|
||||
list(APPEND TARGET_SRC ${TARGET_SRC_VISION})
|
||||
endif()
|
||||
|
@ -653,6 +649,25 @@ endif()
|
|||
|
||||
if(OCOS_ENABLE_VISION)
|
||||
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_VISION)
|
||||
set(_DEFAULT_CODEC_ENABLE ON)
|
||||
if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
|
||||
add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
|
||||
if(WIN32)
|
||||
# Use WIC on Windows. Nothing to be done
|
||||
set(_DEFAULT_CODEC_ENABLE OFF)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
|
||||
# Use ImageIO on Apple platforms
|
||||
set(_DEFAULT_CODEC_ENABLE OFF)
|
||||
target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(_DEFAULT_CODEC_ENABLE ON) # libpng and libjpeg can be optional after EncodeImage with native support too.
|
||||
if(_DEFAULT_CODEC_ENABLE)
|
||||
include(ext_imgcodecs)
|
||||
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
|
||||
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(OCOS_ENABLE_AZURE)
|
||||
|
@ -740,24 +755,6 @@ if(OCOS_ENABLE_C_API)
|
|||
if(OCOS_ENABLE_DLIB)
|
||||
file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
|
||||
list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
|
||||
if(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
|
||||
add_compile_definitions(OCOS_ENABLE_VENDOR_IMAGE_CODECS)
|
||||
if(WIN32)
|
||||
# Use WIC on Windows. Nothing to be done
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
|
||||
# Use ImageIO on Apple platforms
|
||||
target_link_libraries(ocos_operators PRIVATE "-framework CoreFoundation" "-framework CoreGraphics" "-framework ImageIO")
|
||||
else()
|
||||
# Fallback to libpng & libjpeg on all other platforms
|
||||
include(ext_imgcodecs)
|
||||
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
|
||||
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
|
||||
endif()
|
||||
else()
|
||||
include(ext_imgcodecs)
|
||||
target_include_directories(ocos_operators PUBLIC ${libPNG_SOURCE_DIR} ${libJPEG_SOURCE_DIR})
|
||||
target_link_libraries(ocos_operators PUBLIC ${PNG_LIBRARY} ${JPEG_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ set(lib_srcs
|
|||
)
|
||||
|
||||
add_library(${PNG_LIBRARY} STATIC EXCLUDE_FROM_ALL ${lib_srcs})
|
||||
target_include_directories(${PNG_LIBRARY} BEFORE PRIVATE ${zlib_SOURCE_DIR})
|
||||
target_include_directories(${PNG_LIBRARY} BEFORE PUBLIC ${zlib_SOURCE_DIR})
|
||||
|
||||
if(MSVC)
|
||||
target_compile_definitions(${PNG_LIBRARY} PRIVATE -D_CRT_SECURE_NO_DEPRECATE)
|
||||
|
|
|
@ -4,9 +4,7 @@
|
|||
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_DLIB ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_OPENCV_CODECS OFF CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_CV2 OFF CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_VISION OFF CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_VISION ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_VENDOR_IMAGE_CODECS ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_MATH ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_AUDIO ON CACHE INTERNAL "" FORCE)
|
||||
|
|
|
@ -3,3 +3,4 @@
|
|||
|
||||
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_ENABLE_C_API ON CACHE INTERNAL "" FORCE)
|
||||
set(OCOS_BUILD_SHARED_LIB OFF CACHE INTERNAL "" FORCE)
|
|
@ -1,40 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "decode_image.hpp"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include "narrow.h"
|
||||
|
||||
namespace ort_extensions {
|
||||
|
||||
void KernelDecodeImage::Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
|
||||
// Setup inputs
|
||||
const auto& dimensions = input.Shape();
|
||||
if (dimensions.size() != 1ULL) {
|
||||
ORTX_CXX_API_THROW("[DecodeImage]: Raw image bytes with 1D shape expected.", ORT_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
const int64_t encoded_image_data_len = input.NumberOfElement();
|
||||
|
||||
// Decode the image
|
||||
const std::vector<int32_t> encoded_image_sizes{1, static_cast<int32_t>(encoded_image_data_len)};
|
||||
const void* encoded_image_data = input.Data();
|
||||
const cv::Mat encoded_image(encoded_image_sizes, CV_8UC1, const_cast<void*>(encoded_image_data));
|
||||
const cv::Mat decoded_image = cv::imdecode(encoded_image, cv::IMREAD_COLOR);
|
||||
|
||||
if (decoded_image.data == nullptr) {
|
||||
ORTX_CXX_API_THROW("[DecodeImage] Invalid input. Failed to decode image.", ORT_INVALID_ARGUMENT);
|
||||
};
|
||||
|
||||
// Setup output & copy to destination
|
||||
const cv::Size decoded_image_size = decoded_image.size();
|
||||
const int64_t height = decoded_image_size.height;
|
||||
const int64_t width = decoded_image_size.width;
|
||||
const int64_t colors = decoded_image.elemSize(); // == 3 as it's BGR
|
||||
|
||||
const std::vector<int64_t> output_dims{height, width, colors};
|
||||
uint8_t* decoded_image_data = output.Allocate(output_dims);
|
||||
memcpy(decoded_image_data, decoded_image.data, narrow<size_t>(height * width * colors));
|
||||
}
|
||||
} // namespace ort_extensions
|
|
@ -3,19 +3,81 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ocos.h"
|
||||
#include "string_utils.h"
|
||||
#include <cstring>
|
||||
#include <variant>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <cstdint>
|
||||
#include "ext_status.h"
|
||||
#include "op_def_struct.h"
|
||||
|
||||
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
|
||||
#if WIN32
|
||||
#include "image_decoder_win32.hpp"
|
||||
#elif __APPLE__
|
||||
#include "image_decoder_darwin.hpp"
|
||||
#else
|
||||
#include "image_decoder.hpp"
|
||||
#endif
|
||||
#else
|
||||
#include "image_decoder.hpp"
|
||||
#endif
|
||||
|
||||
namespace ort_extensions {
|
||||
struct DecodeImage: public internal::DecodeImage {
|
||||
|
||||
void decode_image(const ortc::Tensor<uint8_t>& input,
|
||||
ortc::Tensor<uint8_t>& output);
|
||||
template <typename DictT>
|
||||
OrtxStatus Init(const DictT& attrs) {
|
||||
auto status = internal::DecodeImage::OnInit();
|
||||
if (!status.IsOk()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
struct KernelDecodeImage : BaseKernel {
|
||||
KernelDecodeImage(const OrtApi& api, const OrtKernelInfo& info) : BaseKernel(api, info) {}
|
||||
void Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const;
|
||||
for (const auto& [key, value] : attrs) {
|
||||
if (key == "color_space") {
|
||||
auto color_space = std::get<std::string>(value);
|
||||
if (color_space == "RGB") {
|
||||
is_bgr_ = false;
|
||||
} else if (color_space == "BGR") {
|
||||
is_bgr_ = true;
|
||||
} else {
|
||||
return {kOrtxErrorInvalidArgument, "[DecodeImage]: Invalid color_space"};
|
||||
}
|
||||
} else {
|
||||
return {kOrtxErrorInvalidArgument, "[Resize]: Invalid argument"};
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
OrtStatusPtr OnModelAttach(const OrtApi& api, const OrtKernelInfo& info) {
|
||||
is_bgr_ = true;
|
||||
return Init(std::unordered_map<std::string, std::variant<std::string>>());
|
||||
}
|
||||
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const{
|
||||
auto status = internal::DecodeImage::Compute(input, output);
|
||||
if (!status.IsOk()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (is_bgr_) {
|
||||
// need to convert rgb to bgr for backward compatibility
|
||||
const auto& dimensions = output.Shape();
|
||||
uint8_t* rgb_data = const_cast<uint8_t*>(output.Data());
|
||||
// do an inplace swap of the channels
|
||||
for (int y = 0; y < dimensions[0]; ++y) {
|
||||
for (int x = 0; x < dimensions[1]; ++x) {
|
||||
std::swap(rgb_data[(y * dimensions[1] + x) * 3 + 0], rgb_data[(y * dimensions[1] + x) * 3 + 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
private:
|
||||
bool is_bgr_{}; // flag to indicate if the output is in BGR format
|
||||
};
|
||||
|
||||
} // namespace ort_extensions
|
||||
|
|
|
@ -1,40 +1,139 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "zlib.h"
|
||||
#if ZLIB_VERNUM != 0x12b0
|
||||
// the following is a trick to show the invalid version number for the diagnosis.
|
||||
#define STR_VERSION(x) STR_NUM(x)
|
||||
#define STR_NUM(x) #x
|
||||
#pragma message "Invalid zlib version: " STR_VERSION(ZLIB_VERNUM)
|
||||
#error "stopped"
|
||||
#endif
|
||||
|
||||
#include "png.h"
|
||||
#include "jpeglib.h"
|
||||
#include "op_def_struct.h"
|
||||
#include "ext_status.h"
|
||||
|
||||
#include "encode_image.hpp"
|
||||
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
|
||||
namespace ort_extensions {
|
||||
|
||||
void KernelEncodeImage::Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
|
||||
// Setup inputs
|
||||
const auto dimensions_bgr = input.Shape();
|
||||
|
||||
const auto& dimensions_bgr = input.Shape();
|
||||
if (dimensions_bgr.size() != 3 || dimensions_bgr[2] != 3) {
|
||||
// expect {H, W, C} as that's the inverse of what decode_image produces.
|
||||
// we have no way to check if it's BGR or RGB though
|
||||
ORTX_CXX_API_THROW("[EncodeImage] requires rank 3 BGR input in channels last format.", ORT_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
// Get data & the length
|
||||
std::vector<int32_t> height_x_width{static_cast<int32_t>(dimensions_bgr[0]), // H
|
||||
static_cast<int32_t>(dimensions_bgr[1])}; // W
|
||||
int32_t height = static_cast<int32_t>(dimensions_bgr[0]); // H
|
||||
int32_t width = static_cast<int32_t>(dimensions_bgr[1]); // W
|
||||
const int32_t color_space = 3;
|
||||
const uint8_t* bgr_data = input.Data();
|
||||
unsigned char* outbuffer = nullptr;
|
||||
std::vector<uint8_t> png_buffer;
|
||||
size_t outsize = 0;
|
||||
|
||||
// data is const uint8_t but opencv2 wants void*.
|
||||
const void* bgr_data = input.Data();
|
||||
const cv::Mat bgr_image(height_x_width, CV_8UC3, const_cast<void*>(bgr_data));
|
||||
|
||||
// don't know output size ahead of time so need to encode and then copy to output
|
||||
std::vector<uint8_t> encoded_image;
|
||||
if (!cv::imencode(extension_, bgr_image, encoded_image)) {
|
||||
ORTX_CXX_API_THROW("[EncodeImage] Image encoding failed.", ORT_INVALID_ARGUMENT);
|
||||
auto rgb_data = std::make_unique<uint8_t[]>(height * width * color_space);
|
||||
for (int32_t y = 0; y < height; ++y) {
|
||||
for (int32_t x = 0; x < width; ++x) {
|
||||
rgb_data[(y * width + x) * color_space + 0] = bgr_data[(y * width + x) * color_space + 2];
|
||||
rgb_data[(y * width + x) * color_space + 1] = bgr_data[(y * width + x) * color_space + 1];
|
||||
rgb_data[(y * width + x) * color_space + 2] = bgr_data[(y * width + x) * color_space + 0];
|
||||
}
|
||||
}
|
||||
|
||||
// Setup output & copy to destination
|
||||
std::vector<int64_t> output_dimensions{static_cast<int64_t>(encoded_image.size())};
|
||||
if (extension_ == ".jpg") {
|
||||
struct jpeg_compress_struct cinfo;
|
||||
struct jpeg_error_mgr jerr;
|
||||
|
||||
cinfo.err = jpeg_std_error(&jerr);
|
||||
jpeg_create_compress(&cinfo);
|
||||
jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
|
||||
|
||||
cinfo.image_width = width;
|
||||
cinfo.image_height = height;
|
||||
cinfo.input_components = color_space;
|
||||
cinfo.in_color_space = JCS_RGB;
|
||||
|
||||
// compression parameters is compatible with opencv
|
||||
jpeg_set_defaults(&cinfo);
|
||||
jpeg_set_quality(&cinfo, 95, TRUE);
|
||||
cinfo.optimize_coding = FALSE;
|
||||
cinfo.restart_interval = 0;
|
||||
cinfo.q_scale_factor[0] = jpeg_quality_scaling(-1);
|
||||
cinfo.q_scale_factor[1] = jpeg_quality_scaling(-1);
|
||||
|
||||
const int32_t sampling_factor = 0x221111; // 4:2:0 IMWRITE_JPEG_SAMPLING_FACTOR_420
|
||||
cinfo.comp_info[0].v_samp_factor = (sampling_factor >> 16 ) & 0xF;
|
||||
cinfo.comp_info[0].h_samp_factor = (sampling_factor >> 20 ) & 0xF;
|
||||
cinfo.comp_info[1].v_samp_factor = 1;
|
||||
cinfo.comp_info[1].h_samp_factor = 1;
|
||||
// jpeg_default_qtables( &cinfo, TRUE );
|
||||
|
||||
jpeg_start_compress(&cinfo, TRUE);
|
||||
|
||||
JSAMPROW row_pointer[1];
|
||||
while (cinfo.next_scanline < cinfo.image_height) {
|
||||
row_pointer[0] = (JSAMPROW)&rgb_data[cinfo.next_scanline * cinfo.image_width * color_space];
|
||||
jpeg_write_scanlines(&cinfo, row_pointer, 1);
|
||||
}
|
||||
|
||||
jpeg_finish_compress(&cinfo);
|
||||
jpeg_destroy_compress(&cinfo);
|
||||
} else if (extension_ == ".png") {
|
||||
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
|
||||
if (!png_ptr) {
|
||||
ORTX_CXX_API_THROW("[EncodeImage] PNG create write struct failed.", ORT_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
png_infop info_ptr = png_create_info_struct(png_ptr);
|
||||
if (!info_ptr) {
|
||||
png_destroy_write_struct(&png_ptr, nullptr);
|
||||
ORTX_CXX_API_THROW("[EncodeImage] PNG create info struct failed.", ORT_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
if (setjmp(png_jmpbuf(png_ptr))) {
|
||||
png_destroy_write_struct(&png_ptr, &info_ptr);
|
||||
ORTX_CXX_API_THROW("[EncodeImage] PNG encoding failed.", ORT_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
png_set_write_fn(png_ptr, &png_buffer, [](png_structp png_ptr, png_bytep data, png_size_t length) {
|
||||
auto p = reinterpret_cast<std::vector<uint8_t>*>(png_get_io_ptr(png_ptr));
|
||||
p->insert(p->end(), data, data + length);
|
||||
}, nullptr);
|
||||
|
||||
// sync with openCV parameters
|
||||
png_set_filter(png_ptr, PNG_FILTER_TYPE_BASE, PNG_FILTER_SUB);
|
||||
png_set_compression_level(png_ptr, 1);
|
||||
png_set_compression_strategy(png_ptr, 3);
|
||||
|
||||
png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_RGB,
|
||||
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
|
||||
|
||||
png_write_info(png_ptr, info_ptr);
|
||||
|
||||
for (int32_t y = 0; y < height; ++y) {
|
||||
png_write_row(png_ptr, (png_bytep)&rgb_data[y * width * color_space]);
|
||||
}
|
||||
|
||||
png_write_flush(png_ptr);
|
||||
png_write_end(png_ptr, info_ptr);
|
||||
png_destroy_write_struct(&png_ptr, &info_ptr);
|
||||
|
||||
outbuffer = png_buffer.data();
|
||||
outsize = png_buffer.size();
|
||||
} else {
|
||||
ORTX_CXX_API_THROW("[EncodeImage] Unsupported image format.", ORT_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
std::vector<int64_t> output_dimensions{static_cast<int64_t>(outsize)};
|
||||
uint8_t* data = output.Allocate(output_dimensions);
|
||||
memcpy(data, encoded_image.data(), encoded_image.size());
|
||||
memcpy(data, outbuffer, outsize);
|
||||
|
||||
if (outbuffer != png_buffer.data() && outbuffer != nullptr) {
|
||||
free(outbuffer);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ort_extensions
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "png.h"
|
||||
#include "jpeglib.h"
|
||||
#include "op_def_struct.h"
|
||||
#include "ext_status.h"
|
||||
|
||||
namespace ort_extensions::internal {
|
||||
struct DecodeImage {
|
||||
OrtxStatus OnInit() { return {}; }
|
||||
|
||||
OrtxStatus DecodePNG(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len,
|
||||
ortc::Tensor<uint8_t>& output) const {
|
||||
// Decode the PNG image
|
||||
png_structp png = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
|
||||
if (!png) {
|
||||
return {kOrtxErrorCorruptData, "[ImageDecoder]: Failed to create png read struct."};
|
||||
}
|
||||
|
||||
png_infop info = png_create_info_struct(png);
|
||||
if (!info) {
|
||||
png_destroy_read_struct(&png, nullptr, nullptr);
|
||||
return {kOrtxErrorCorruptData, "[ImageDecoder]: Failed to create png info struct."};
|
||||
}
|
||||
|
||||
if (setjmp(png_jmpbuf(png))) {
|
||||
png_destroy_read_struct(&png, &info, nullptr);
|
||||
return {kOrtxErrorCorruptData, "[ImageDecoder]: Error during png creation."};
|
||||
}
|
||||
|
||||
struct BufferState {
|
||||
const uint8_t* ptr;
|
||||
png_size_t size;
|
||||
} bufferState = {encoded_image_data, static_cast<png_size_t>(encoded_image_data_len)};
|
||||
|
||||
png_set_read_fn(png, &bufferState, [](png_structp pngPtr, png_bytep data, png_size_t length) {
|
||||
BufferState* state = static_cast<BufferState*>(png_get_io_ptr(pngPtr));
|
||||
if (length > state->size) png_error(pngPtr, "Read Error: Exceeded buffer size");
|
||||
memcpy(data, state->ptr, length);
|
||||
state->ptr += length;
|
||||
state->size -= length;
|
||||
});
|
||||
|
||||
png_read_info(png, info);
|
||||
|
||||
auto width = png_get_image_width(png, info);
|
||||
auto height = png_get_image_height(png, info);
|
||||
png_byte color_type = png_get_color_type(png, info);
|
||||
png_byte bit_depth = png_get_bit_depth(png, info);
|
||||
|
||||
if (bit_depth == 16) {
|
||||
png_set_strip_16(png);
|
||||
}
|
||||
|
||||
if (color_type == PNG_COLOR_TYPE_PALETTE) {
|
||||
png_set_palette_to_rgb(png);
|
||||
}
|
||||
|
||||
if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) {
|
||||
png_set_expand_gray_1_2_4_to_8(png);
|
||||
}
|
||||
|
||||
if (png_get_valid(png, info, PNG_INFO_tRNS)) {
|
||||
png_set_tRNS_to_alpha(png);
|
||||
}
|
||||
|
||||
if (color_type == PNG_COLOR_TYPE_RGB || color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_PALETTE) {
|
||||
png_set_filler(png, 0xFF, PNG_FILLER_AFTER);
|
||||
}
|
||||
|
||||
if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
|
||||
png_set_gray_to_rgb(png);
|
||||
}
|
||||
|
||||
png_read_update_info(png, info);
|
||||
|
||||
std::vector<int64_t> output_dimensions{height, width, 3};
|
||||
uint8_t* output_data = output.Allocate(output_dimensions);
|
||||
// Read the image row by row
|
||||
std::vector<uint8_t> row(width * 4);
|
||||
for (uint32_t i = 0; i < height; ++i) {
|
||||
png_read_row(png, row.data(), nullptr);
|
||||
for (uint32_t j = 0; j < width; ++j) {
|
||||
for (uint32_t k = 0; k < 3; ++k) {
|
||||
output_data[i * width * 3 + j * 3 + k] = row[j * 4 + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
png_destroy_read_struct(&png, &info, nullptr);
|
||||
return {};
|
||||
}
|
||||
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
|
||||
const auto& dimensions = input.Shape();
|
||||
if (dimensions.size() != 1ULL) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
|
||||
}
|
||||
|
||||
// Get data & the length
|
||||
const uint8_t* encoded_image_data = input.Data();
|
||||
const int64_t encoded_image_data_len = input.NumberOfElement();
|
||||
|
||||
// check it's a PNG image or JPEG image
|
||||
if (encoded_image_data_len < 8) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."};
|
||||
}
|
||||
|
||||
if (png_sig_cmp(encoded_image_data, 0, 8) == 0) {
|
||||
return DecodePNG(encoded_image_data, encoded_image_data_len, output);
|
||||
} else {
|
||||
// Initialize JPEG decompression object
|
||||
jpeg_decompress_struct cinfo;
|
||||
jpeg_error_mgr jerr;
|
||||
cinfo.err = jpeg_std_error(&jerr);
|
||||
jpeg_create_decompress(&cinfo);
|
||||
|
||||
// Set up the custom memory source manager
|
||||
JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len);
|
||||
cinfo.src = &srcManager;
|
||||
|
||||
// Read the JPEG header to get image info
|
||||
jpeg_read_header(&cinfo, TRUE);
|
||||
|
||||
// Start decompression
|
||||
jpeg_start_decompress(&cinfo);
|
||||
|
||||
// Allocate memory for the image
|
||||
std::vector<int64_t> output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components};
|
||||
uint8_t* imageBuffer = output.Allocate(output_dimensions);
|
||||
|
||||
// Read the image data
|
||||
int row_stride = cinfo.output_width * cinfo.output_components;
|
||||
while (cinfo.output_scanline < cinfo.output_height) {
|
||||
uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride);
|
||||
jpeg_read_scanlines(&cinfo, &row_ptr, 1);
|
||||
if (srcManager.extError != kOrtxOK) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (srcManager.extError != kOrtxOK) {
|
||||
return {kOrtxErrorInternal, "[ImageDecoder]: Failed to decode JPEG image."};
|
||||
}
|
||||
|
||||
// Finish decompression
|
||||
jpeg_finish_decompress(&cinfo);
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
class JMemorySourceManager : public jpeg_source_mgr {
|
||||
public:
|
||||
// Constructor
|
||||
JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) {
|
||||
// Initialize source fields
|
||||
next_input_byte = reinterpret_cast<const JOCTET*>(encoded_image_data);
|
||||
bytes_in_buffer = static_cast<size_t>(encoded_image_data_len);
|
||||
init_source = &JMemorySourceManager::initSource;
|
||||
fill_input_buffer = &JMemorySourceManager::fillInputBuffer;
|
||||
skip_input_data = &JMemorySourceManager::skipInputData;
|
||||
resync_to_restart = jpeg_resync_to_restart;
|
||||
term_source = &JMemorySourceManager::termSource;
|
||||
}
|
||||
|
||||
// Initialize source (no-op)
|
||||
static void initSource(j_decompress_ptr cinfo) {
|
||||
// No initialization needed
|
||||
}
|
||||
|
||||
// Fill input buffer (not used here, always return FALSE)
|
||||
static boolean fillInputBuffer(j_decompress_ptr cinfo) {
|
||||
return FALSE; // Buffer is managed manually
|
||||
}
|
||||
|
||||
// Skip input data
|
||||
static void skipInputData(j_decompress_ptr cinfo, long num_bytes) {
|
||||
JMemorySourceManager* srcMgr = reinterpret_cast<JMemorySourceManager*>(cinfo->src);
|
||||
if (num_bytes > 0) {
|
||||
size_t bytes_to_skip = static_cast<size_t>(num_bytes);
|
||||
while (bytes_to_skip > srcMgr->bytes_in_buffer) {
|
||||
bytes_to_skip -= srcMgr->bytes_in_buffer;
|
||||
if (srcMgr->fillInputBuffer(cinfo)) {
|
||||
// Error: buffer ran out
|
||||
srcMgr->extError = kOrtxErrorCorruptData;
|
||||
}
|
||||
}
|
||||
srcMgr->next_input_byte += bytes_to_skip;
|
||||
srcMgr->bytes_in_buffer -= bytes_to_skip;
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate source (no-op)
|
||||
static void termSource(j_decompress_ptr cinfo) {
|
||||
// No cleanup needed
|
||||
}
|
||||
|
||||
extError_t extError{kOrtxOK}; // Error handler
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace ort_extensions::internal
|
|
@ -8,9 +8,10 @@
|
|||
#include "op_def_struct.h"
|
||||
#include "ext_status.h"
|
||||
|
||||
namespace ort_extensions::internal {
|
||||
|
||||
struct DecodeImage {
|
||||
template <typename DictT>
|
||||
OrtxStatus Init(const DictT& attrs) {
|
||||
OrtxStatus OnInit() {
|
||||
CFStringRef optionKeys[2];
|
||||
CFTypeRef optionValues[2];
|
||||
optionKeys[0] = kCGImageSourceShouldCache;
|
||||
|
@ -25,7 +26,7 @@ struct DecodeImage {
|
|||
return {};
|
||||
}
|
||||
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const {
|
||||
const auto& dimensions = input.Shape();
|
||||
if (dimensions.size() != 1ULL) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
|
||||
|
@ -114,3 +115,4 @@ struct DecodeImage {
|
|||
private:
|
||||
CFDictionaryRef imageSourceOptions_{NULL};
|
||||
};
|
||||
} // namespace ort_extensions::internal
|
|
@ -12,10 +12,9 @@
|
|||
#include "op_def_struct.h"
|
||||
#include "ext_status.h"
|
||||
|
||||
|
||||
namespace ort_extensions::internal {
|
||||
struct DecodeImage {
|
||||
template <typename DictT>
|
||||
OrtxStatus Init(const DictT& attrs) {
|
||||
OrtxStatus OnInit() {
|
||||
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
||||
if (FAILED(hr)) {
|
||||
return {kOrtxErrorInternal, "[ImageDecoder]: Failed when CoInitialize."};
|
||||
|
@ -29,7 +28,7 @@ struct DecodeImage {
|
|||
return {};
|
||||
}
|
||||
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) const{
|
||||
const auto& dimensions = input.Shape();
|
||||
if (dimensions.size() != 1ULL) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
|
||||
|
@ -147,3 +146,4 @@ struct DecodeImage {
|
|||
private:
|
||||
winrt::com_ptr<IWICImagingFactory> pIWICFactory_;
|
||||
};
|
||||
} // namespace ort_extensions::internal
|
|
@ -1,16 +1,15 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "ocos.h"
|
||||
#include "decode_image.hpp"
|
||||
#include "encode_image.hpp"
|
||||
#include "draw_bounding_box.hpp"
|
||||
|
||||
const std::vector<const OrtCustomOp*>& VisionLoader() {
|
||||
static OrtOpLoader op_loader(CustomCpuStruct("EncodeImage", ort_extensions::KernelEncodeImage),
|
||||
CustomCpuStruct("DecodeImage", ort_extensions::KernelDecodeImage),
|
||||
static OrtOpLoader op_loader(CustomCpuStructV2("DecodeImage", ort_extensions::DecodeImage),
|
||||
CustomCpuStruct("EncodeImage", ort_extensions::KernelEncodeImage),
|
||||
CustomCpuStruct("DrawBoundingBoxes", ort_extensions::DrawBoundingBoxes));
|
||||
return op_loader.GetCustomOps();
|
||||
}
|
||||
|
||||
FxLoadCustomOpFactory LoadCustomOpClasses_Vision = VisionLoader;
|
||||
FxLoadCustomOpFactory LoadCustomOpClasses_Vision = VisionLoader;
|
||||
|
|
|
@ -1,159 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "png.h"
|
||||
#include "jpeglib.h"
|
||||
#include "op_def_struct.h"
|
||||
#include "ext_status.h"
|
||||
|
||||
|
||||
OrtxStatus image_decoder(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output);
|
||||
|
||||
struct DecodeImage {
|
||||
template <typename DictT>
|
||||
OrtxStatus Init(const DictT& attrs) {
|
||||
return {};
|
||||
}
|
||||
|
||||
OrtxStatus Compute(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
|
||||
return image_decoder(input, output);
|
||||
}
|
||||
};
|
||||
|
||||
class JMemorySourceManager : public jpeg_source_mgr {
|
||||
public:
|
||||
// Constructor
|
||||
JMemorySourceManager(const uint8_t* encoded_image_data, const int64_t encoded_image_data_len) {
|
||||
// Initialize source fields
|
||||
next_input_byte = reinterpret_cast<const JOCTET*>(encoded_image_data);
|
||||
bytes_in_buffer = static_cast<size_t>(encoded_image_data_len);
|
||||
init_source = &JMemorySourceManager::initSource;
|
||||
fill_input_buffer = &JMemorySourceManager::fillInputBuffer;
|
||||
skip_input_data = &JMemorySourceManager::skipInputData;
|
||||
resync_to_restart = jpeg_resync_to_restart;
|
||||
term_source = &JMemorySourceManager::termSource;
|
||||
}
|
||||
|
||||
// Initialize source (no-op)
|
||||
static void initSource(j_decompress_ptr cinfo) {
|
||||
// No initialization needed
|
||||
}
|
||||
|
||||
// Fill input buffer (not used here, always return FALSE)
|
||||
static boolean fillInputBuffer(j_decompress_ptr cinfo) {
|
||||
return FALSE; // Buffer is managed manually
|
||||
}
|
||||
|
||||
// Skip input data
|
||||
static void skipInputData(j_decompress_ptr cinfo, long num_bytes) {
|
||||
JMemorySourceManager* srcMgr = reinterpret_cast<JMemorySourceManager*>(cinfo->src);
|
||||
if (num_bytes > 0) {
|
||||
size_t bytes_to_skip = static_cast<size_t>(num_bytes);
|
||||
while (bytes_to_skip > srcMgr->bytes_in_buffer) {
|
||||
bytes_to_skip -= srcMgr->bytes_in_buffer;
|
||||
if (srcMgr->fillInputBuffer(cinfo)) {
|
||||
// Error: buffer ran out
|
||||
srcMgr->extError = kOrtxErrorCorruptData;
|
||||
}
|
||||
}
|
||||
srcMgr->next_input_byte += bytes_to_skip;
|
||||
srcMgr->bytes_in_buffer -= bytes_to_skip;
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate source (no-op)
|
||||
static void termSource(j_decompress_ptr cinfo) {
|
||||
// No cleanup needed
|
||||
}
|
||||
|
||||
extError_t extError{kOrtxOK}; // Error handler
|
||||
};
|
||||
|
||||
inline OrtxStatus image_decoder(const ortc::Tensor<uint8_t>& input, ortc::Tensor<uint8_t>& output) {
|
||||
const auto& dimensions = input.Shape();
|
||||
if (dimensions.size() != 1ULL) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Only raw image formats are supported."};
|
||||
}
|
||||
|
||||
// Get data & the length
|
||||
const uint8_t* encoded_image_data = input.Data();
|
||||
const int64_t encoded_image_data_len = input.NumberOfElement();
|
||||
|
||||
// check it's a PNG image or JPEG image
|
||||
if (encoded_image_data_len < 8) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Invalid image data."};
|
||||
}
|
||||
|
||||
OrtxStatus status{};
|
||||
if (png_sig_cmp(encoded_image_data, 0, 8) == 0) {
|
||||
// Decode the PNG image
|
||||
png_image image;
|
||||
std::memset(&image, 0, sizeof(image)); // Use std::memset for clarity
|
||||
image.version = PNG_IMAGE_VERSION;
|
||||
|
||||
if (png_image_begin_read_from_memory(&image, encoded_image_data, static_cast<size_t>(encoded_image_data_len)) ==
|
||||
0) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to read PNG image."};
|
||||
}
|
||||
|
||||
image.format = PNG_FORMAT_RGB; // Ensure you have the appropriate format
|
||||
const int height = image.height;
|
||||
const int width = image.width;
|
||||
const int channels = PNG_IMAGE_PIXEL_CHANNELS(image.format); // Calculates the number of channels based on format
|
||||
|
||||
std::vector<int64_t> output_dimensions{height, width, channels};
|
||||
|
||||
uint8_t* decoded_image_data = output.Allocate(output_dimensions);
|
||||
if (decoded_image_data == nullptr) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to allocate memory for decoded image data."};
|
||||
}
|
||||
|
||||
if (png_image_finish_read(&image, nullptr, decoded_image_data, 0, nullptr) == 0) {
|
||||
return {kOrtxErrorInvalidArgument, "[ImageDecoder]: Failed to decode PNG image."};
|
||||
}
|
||||
} else {
|
||||
// Initialize JPEG decompression object
|
||||
jpeg_decompress_struct cinfo;
|
||||
jpeg_error_mgr jerr;
|
||||
cinfo.err = jpeg_std_error(&jerr);
|
||||
jpeg_create_decompress(&cinfo);
|
||||
|
||||
// Set up the custom memory source manager
|
||||
JMemorySourceManager srcManager(encoded_image_data, encoded_image_data_len);
|
||||
cinfo.src = &srcManager;
|
||||
|
||||
// Read the JPEG header to get image info
|
||||
jpeg_read_header(&cinfo, TRUE);
|
||||
|
||||
// Start decompression
|
||||
jpeg_start_decompress(&cinfo);
|
||||
|
||||
// Allocate memory for the image
|
||||
std::vector<int64_t> output_dimensions{cinfo.output_height, cinfo.output_width, cinfo.output_components};
|
||||
uint8_t* imageBuffer = output.Allocate(output_dimensions);
|
||||
|
||||
// Read the image data
|
||||
int row_stride = cinfo.output_width * cinfo.output_components;
|
||||
while (cinfo.output_scanline < cinfo.output_height) {
|
||||
uint8_t* row_ptr = imageBuffer + (cinfo.output_scanline * row_stride);
|
||||
jpeg_read_scanlines(&cinfo, &row_ptr, 1);
|
||||
if (srcManager.extError != kOrtxOK) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (srcManager.extError != kOrtxOK) {
|
||||
status = {srcManager.extError, "[ImageDecoder]: Failed to decode JPEG image."};
|
||||
}
|
||||
|
||||
// Finish decompression
|
||||
jpeg_finish_decompress(&cinfo);
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
|
@ -6,19 +6,9 @@
|
|||
#include "nlohmann/json.hpp"
|
||||
#include "file_sys.h"
|
||||
|
||||
#include "vision/decode_image.hpp"
|
||||
#include "image_processor.h"
|
||||
#include "c_api_utils.hpp"
|
||||
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
|
||||
#if WIN32
|
||||
#include "image_decoder_win32.hpp"
|
||||
#elif __APPLE__
|
||||
#include "image_decoder_darwin.hpp"
|
||||
#else
|
||||
#include "image_decoder.hpp"
|
||||
#endif
|
||||
#else
|
||||
#include "image_decoder.hpp"
|
||||
#endif
|
||||
|
||||
#include "image_transforms.hpp"
|
||||
#include "image_transforms_phi_3.hpp"
|
||||
|
@ -40,7 +30,7 @@ using namespace ort_extensions;
|
|||
using json = nlohmann::json;
|
||||
|
||||
Operation::KernelRegistry ImageProcessor::kernel_registry_ = {
|
||||
{"DecodeImage", []() { return CreateKernelInstance(&DecodeImage::Compute); }},
|
||||
{"DecodeImage", []() { return CreateKernelInstance(&ort_extensions::DecodeImage::Compute); }},
|
||||
{"Resize", []() { return CreateKernelInstance(&Resize::Compute); }},
|
||||
{"Rescale", []() { return CreateKernelInstance(&Rescale::Compute); }},
|
||||
{"Normalize", []() { return CreateKernelInstance(&Normalize::Compute); }},
|
||||
|
|
|
@ -230,7 +230,7 @@ struct Llama3ImageTransform {
|
|||
int64_t max_image_tiles, int64_t tile_size) {
|
||||
{
|
||||
auto possible_tile_arrangements = GetAllSupportedAspectRatios(max_image_tiles);
|
||||
std::vector<std::pair<int, int>> possible_canvas_sizes;
|
||||
std::vector<std::pair<int64_t, int64_t>> possible_canvas_sizes;
|
||||
|
||||
for (const auto& arrangement : possible_tile_arrangements) {
|
||||
possible_canvas_sizes.emplace_back(arrangement.first * tile_size, arrangement.second * tile_size);
|
||||
|
@ -263,7 +263,7 @@ struct Llama3ImageTransform {
|
|||
selected_scale = *std::max_element(downscaling_options.begin(), downscaling_options.end());
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> chosen_canvas;
|
||||
std::vector<std::pair<int64_t, int64_t>> chosen_canvas;
|
||||
for (size_t i = 0; i < scales.size(); ++i) {
|
||||
if (std::abs(scales[i] - selected_scale) < 1e-9) {
|
||||
chosen_canvas.push_back(possible_canvas_sizes[i]);
|
||||
|
@ -272,7 +272,7 @@ struct Llama3ImageTransform {
|
|||
|
||||
if (chosen_canvas.size() > 1) {
|
||||
auto optimal_canvas = std::min_element(chosen_canvas.begin(), chosen_canvas.end(),
|
||||
[](const std::pair<int, int>& a, const std::pair<int, int>& b) {
|
||||
[](const std::pair<int64_t, int64_t>& a, const std::pair<int64_t, int64_t>& b) {
|
||||
return (a.first * a.second) < (b.first * b.second);
|
||||
});
|
||||
return *optimal_canvas;
|
||||
|
|
|
@ -209,6 +209,11 @@ std::unique_ptr<KernelDef> CreateKernelInstance(OrtxStatus (T::*method)(Args...)
|
|||
return std::make_unique<KernelStruct<T, Args...>>(method);
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
std::unique_ptr<KernelDef> CreateKernelInstance(OrtxStatus (T::*method)(Args...) const) {
|
||||
return std::make_unique<KernelStruct<T, Args...>>(reinterpret_cast<OrtxStatus (T::*)(Args...)>(method));
|
||||
}
|
||||
|
||||
class Operation {
|
||||
public:
|
||||
using KernelRegistry = std::unordered_map<std::string_view, std::function<std::unique_ptr<KernelDef>()>>;
|
||||
|
|
|
@ -9,23 +9,14 @@
|
|||
#include "gtest/gtest.h"
|
||||
#include "shared/api/c_api_utils.hpp"
|
||||
|
||||
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
|
||||
#if WIN32
|
||||
#include "shared/api/image_decoder_win32.hpp"
|
||||
#elif __APPLE__
|
||||
#include "shared/api/image_decoder_darwin.hpp"
|
||||
#else
|
||||
#include "shared/api/image_decoder.hpp"
|
||||
#endif
|
||||
#else
|
||||
#include "shared/api/image_decoder.hpp"
|
||||
#endif
|
||||
#include "vision/decode_image.hpp"
|
||||
|
||||
|
||||
using namespace ort_extensions;
|
||||
|
||||
TEST(ImgDecoderTest, TestPngDecoder) {
|
||||
DecodeImage image_decoder;
|
||||
image_decoder.Init(NULL);
|
||||
ort_extensions::DecodeImage image_decoder;
|
||||
image_decoder.Init(std::unordered_map<std::string, std::variant<std::string>>());
|
||||
std::vector<uint8_t> png_data;
|
||||
std::filesystem::path png_path = "data/processor/exceltable.png";
|
||||
std::ifstream png_file(png_path, std::ios::binary);
|
||||
|
@ -60,8 +51,8 @@ TEST(ImgDecoderTest, TestPngDecoder) {
|
|||
}
|
||||
|
||||
TEST(ImageDecoderTest, TestJpegDecoder) {
|
||||
DecodeImage image_decoder;
|
||||
image_decoder.Init(NULL);
|
||||
ort_extensions::DecodeImage image_decoder;
|
||||
image_decoder.Init(std::unordered_map<std::string, std::variant<std::string>>());
|
||||
std::vector<uint8_t> jpeg_data;
|
||||
std::filesystem::path jpeg_path = "data/processor/australia.jpg";
|
||||
std::ifstream jpeg_file(jpeg_path, std::ios::binary);
|
||||
|
@ -139,8 +130,8 @@ TEST(ImageDecoderTest, TestJpegDecoder) {
|
|||
#if OCOS_ENABLE_VENDOR_IMAGE_CODECS
|
||||
#if defined(WIN32) || defined(__APPLE__)
|
||||
TEST(ImageDecoderTest, TestTiffDecoder) {
|
||||
DecodeImage image_decoder;
|
||||
image_decoder.Init(NULL);
|
||||
ort_extensions::DecodeImage image_decoder;
|
||||
image_decoder.Init(std::unordered_map<std::string, std::variant<std::string>>());
|
||||
std::vector<uint8_t> tiff_data;
|
||||
std::filesystem::path tiff_path = "data/processor/canoe.tif";
|
||||
std::ifstream tiff_file(tiff_path, std::ios::binary);
|
||||
|
@ -174,4 +165,4 @@ TEST(ImageDecoderTest, TestTiffDecoder) {
|
|||
std::vector<uint8_t>({82, 66, 49, 74, 66, 57, 74, 66, 49, 82, 74, 57}));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -3,7 +3,8 @@ import numpy as np
|
|||
from PIL import Image
|
||||
from onnxruntime_extensions import OrtPyFunction, ONNXRuntimeError, util
|
||||
|
||||
|
||||
@unittest.skip("The opencv based operators are not supported in the offical release any more"
|
||||
"please build from source code to with OCOS_ENABLE_CV2 and OCOS_ENABLE_OPENCV_CODECS enabled.")
|
||||
class TestOpenCV(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
|
|
|
@ -34,8 +34,6 @@ CMAKE_FLAG_TO_OPS = {
|
|||
"SegmentExtraction",
|
||||
],
|
||||
"OCOS_ENABLE_OPENCV_CODECS": [
|
||||
"DecodeImage",
|
||||
"EncodeImage",
|
||||
"ImageReader"
|
||||
],
|
||||
"OCOS_ENABLE_RE2_REGEX": [
|
||||
|
|
Загрузка…
Ссылка в новой задаче