support tokenizers build only in C API mode (#783)
* support tokenizer build only in C API mode * fix the python build. * fix the selectedops build --------- Co-authored-by: Sayan Shaw <52221015+sayanshaw24@users.noreply.github.com>
This commit is contained in:
Родитель
7851b51ee3
Коммит
be29e28dd7
|
@ -202,6 +202,13 @@ stages:
|
|||
ctest -C RelWithDebInfo --output-on-failure
|
||||
displayName: Build ort-extensions with API enabled and run tests
|
||||
|
||||
- bash: |
|
||||
set -e -x -u
|
||||
./build.sh -DOCOS_BUILD_PRESET=token_api_only -DOCOS_BUILD_SHARED_LIB=OFF
|
||||
cd out/Linux
|
||||
ctest -C RelWithDebInfo --output-on-failure
|
||||
displayName: Build ort-extensions with tokenizer API only enabled and run tests
|
||||
|
||||
|
||||
- stage: MacOSBuilds
|
||||
dependsOn: []
|
||||
|
|
|
@ -81,6 +81,7 @@ option(OCOS_ENABLE_STATIC_LIB "Enable generating static library" OFF)
|
|||
option(OCOS_ENABLE_SELECTED_OPLIST "Enable including the selected_ops tool file" OFF)
|
||||
option(OCOS_ENABLE_C_API "Enable building the C API" OFF)
|
||||
|
||||
option(OCOS_BUILD_SHARED_LIB "Enable building the dynamic library" ON)
|
||||
option(OCOS_BUILD_PYTHON "Enable building the Python package" OFF)
|
||||
option(OCOS_BUILD_JAVA "Enable building the Java package" OFF)
|
||||
option(OCOS_BUILD_ANDROID "Enable building the Android package" OFF)
|
||||
|
@ -698,7 +699,7 @@ endif()
|
|||
|
||||
# If building a shared library we can't throw an internal exception type across the library boundary as the type
|
||||
# will be unknown. Set a compile definition so the code can adjust to the build type.
|
||||
if(NOT OCOS_ENABLE_STATIC_LIB)
|
||||
if(OCOS_BUILD_SHARED_LIB)
|
||||
list(APPEND OCOS_COMPILE_DEFINITIONS OCOS_SHARED_LIBRARY)
|
||||
endif()
|
||||
|
||||
|
@ -724,15 +725,32 @@ list(APPEND ocos_libraries noexcep_operators)
|
|||
target_compile_definitions(ocos_operators PRIVATE ${OCOS_COMPILE_DEFINITIONS})
|
||||
target_link_libraries(ocos_operators PRIVATE ${ocos_libraries})
|
||||
|
||||
set (file_patterns "shared/lib/*.cc")
|
||||
if (OCOS_ENABLE_C_API)
|
||||
list(APPEND file_patterns "shared/api/*.h*" "shared/api/*.c" "shared/api/*.cc")
|
||||
file(GLOB _TARGET_LIB_SRC "shared/lib/*.cc")
|
||||
if(OCOS_ENABLE_C_API)
|
||||
file(GLOB utils_TARGET_SRC "shared/api/c_api_utils.*" "shared/api/runner.hpp")
|
||||
list(APPEND _TARGET_LIB_SRC ${utils_TARGET_SRC})
|
||||
if(_HAS_TOKENIZER)
|
||||
file(GLOB tok_TARGET_SRC "shared/api/c_api_tokenizer.cc" "shared/api/token*")
|
||||
list(APPEND _TARGET_LIB_SRC ${tok_TARGET_SRC})
|
||||
endif()
|
||||
if(OCOS_ENABLE_AUDIO)
|
||||
file(GLOB audio_TARGET_SRC "shared/api/c_api_feature_extraction.*" "shared/api/speech_*")
|
||||
list(APPEND _TARGET_LIB_SRC ${audio_TARGET_SRC})
|
||||
endif()
|
||||
if(OCOS_ENABLE_CV2)
|
||||
file(GLOB cv2_TARGET_SRC "shared/api/c_api_processor.*" "shared/api/image_*.*")
|
||||
list(APPEND _TARGET_LIB_SRC ${cv2_TARGET_SRC})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
file(GLOB shared_TARGET_LIB_SRC ${file_patterns})
|
||||
|
||||
if(NOT OCOS_ENABLE_STATIC_LIB AND CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
||||
add_executable(ortcustomops ${shared_TARGET_LIB_SRC})
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
||||
if(OCOS_ENABLE_STATIC_LIB)
|
||||
message(FATAL_ERROR "Emscripten build does not support building a static library.")
|
||||
endif()
|
||||
# Emscripten does not support building a shared library with custom ops.
|
||||
# and backward compatible with the previous version, we silently turn off the shared library build.
|
||||
set(OCOS_BUILD_SHARED_LIB OFF CACHE INTERNAL "" FORCE)
|
||||
add_executable(ortcustomops ${_TARGET_LIB_SRC})
|
||||
set_target_properties(ortcustomops PROPERTIES LINK_FLAGS " \
|
||||
-s WASM=1 \
|
||||
-s NO_EXIT_RUNTIME=0 \
|
||||
|
@ -751,13 +769,12 @@ if(NOT OCOS_ENABLE_STATIC_LIB AND CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
|||
set_property(TARGET ortcustomops APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s DEMANGLE_SUPPORT=0")
|
||||
endif()
|
||||
else()
|
||||
add_library(ortcustomops STATIC ${shared_TARGET_LIB_SRC})
|
||||
add_library(ortcustomops STATIC ${_TARGET_LIB_SRC})
|
||||
if (HAS_SDL)
|
||||
target_compile_options(ortcustomops PRIVATE "/sdl")
|
||||
endif()
|
||||
add_library(onnxruntime_extensions ALIAS ortcustomops)
|
||||
standardize_output_folder(ortcustomops)
|
||||
set(_BUILD_SHARED_LIBRARY TRUE)
|
||||
endif()
|
||||
set_target_properties(ortcustomops PROPERTIES FOLDER "operators")
|
||||
|
||||
|
@ -832,9 +849,12 @@ target_include_directories(ortcustomops PUBLIC "$<TARGET_PROPERTY:ocos_operators
|
|||
|
||||
target_link_libraries(ortcustomops PUBLIC ocos_operators)
|
||||
|
||||
if(_BUILD_SHARED_LIBRARY)
|
||||
if(OCOS_BUILD_SHARED_LIB)
|
||||
file(GLOB shared_TARGET_SRC "shared/*.cc" "shared/*.h")
|
||||
if (OCOS_ENABLE_C_API)
|
||||
if (NOT _HAS_TOKENIZER OR NOT OCOS_ENABLE_CV2 OR NOT OCOS_ENABLE_AUDIO)
|
||||
message(FATAL_ERROR "Shared library build requires GPT2_TOKENIZER, CV2 and AUDIO to be enabled.")
|
||||
endif()
|
||||
list(APPEND shared_TARGET_SRC "shared/extensions_c.def")
|
||||
else()
|
||||
list(APPEND shared_TARGET_SRC "shared/ortcustomops.def")
|
||||
|
@ -885,6 +905,7 @@ endif()
|
|||
|
||||
if(OCOS_BUILD_PYTHON)
|
||||
message(STATUS "Python Build is enabled")
|
||||
set(shared_TARGET_LIB_SRC ${_TARGET_LIB_SRC}) # these library file are also needed for python build
|
||||
include(ext_python)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@ add_test_target(TARGET ocos_test
|
|||
LIBRARIES ortcustomops ${ocos_libraries})
|
||||
target_compile_definitions(ocos_test PRIVATE ${OCOS_COMPILE_DEFINITIONS})
|
||||
|
||||
if (OCOS_ENABLE_C_API)
|
||||
if (OCOS_ENABLE_C_API AND OCOS_BUILD_SHARED_LIB)
|
||||
file(GLOB pp_api_TEST_SRC
|
||||
"${TEST_SRC_DIR}/pp_api_test/*.c"
|
||||
"${TEST_SRC_DIR}/pp_api_test/*.cc"
|
||||
|
@ -163,73 +163,75 @@ else()
|
|||
find_library(ONNXRUNTIME onnxruntime HINTS "${ONNXRUNTIME_LIB_DIR}")
|
||||
endif()
|
||||
|
||||
if("${ONNXRUNTIME}" STREQUAL "ONNXRUNTIME-NOTFOUND")
|
||||
message(WARNING "The prebuilt onnxruntime library was not found, extensions_test will be skipped.")
|
||||
else()
|
||||
block()
|
||||
if(NOT IOS)
|
||||
set(use_extensions_shared_library 1)
|
||||
endif()
|
||||
if (OCOS_BUILD_SHARED_LIB)
|
||||
if("${ONNXRUNTIME}" STREQUAL "ONNXRUNTIME-NOTFOUND")
|
||||
message(WARNING "The prebuilt onnxruntime library was not found, extensions_test will be skipped.")
|
||||
else()
|
||||
block()
|
||||
if(NOT IOS)
|
||||
set(use_extensions_shared_library 1)
|
||||
endif()
|
||||
|
||||
set(extensions_target $<IF:$<BOOL:${use_extensions_shared_library}>,extensions_shared,ortcustomops>)
|
||||
set(extensions_target $<IF:$<BOOL:${use_extensions_shared_library}>,extensions_shared,ortcustomops>)
|
||||
|
||||
file(GLOB shared_TEST_SRC
|
||||
"${TEST_SRC_DIR}/shared_test/*.cc"
|
||||
"${TEST_SRC_DIR}/shared_test/*.hpp")
|
||||
file(GLOB shared_TEST_SRC
|
||||
"${TEST_SRC_DIR}/shared_test/*.cc"
|
||||
"${TEST_SRC_DIR}/shared_test/*.hpp")
|
||||
|
||||
set(extensions_test_libraries ${extensions_target} ${ONNXRUNTIME})
|
||||
set(extensions_test_libraries ${extensions_target} ${ONNXRUNTIME})
|
||||
|
||||
if(use_extensions_shared_library)
|
||||
list(APPEND extensions_test_libraries ${ocos_libraries})
|
||||
endif()
|
||||
if(use_extensions_shared_library)
|
||||
list(APPEND extensions_test_libraries ${ocos_libraries})
|
||||
endif()
|
||||
|
||||
# needs to link with stdc++fs in Linux
|
||||
if(LINUX)
|
||||
list(APPEND extensions_test_libraries stdc++fs -pthread)
|
||||
endif()
|
||||
# needs to link with stdc++fs in Linux
|
||||
if(LINUX)
|
||||
list(APPEND extensions_test_libraries stdc++fs -pthread)
|
||||
endif()
|
||||
|
||||
add_test_target(TARGET extensions_test
|
||||
TEST_SOURCES ${shared_TEST_SRC}
|
||||
LIBRARIES ${extensions_test_libraries}
|
||||
TEST_DATA_DIRECTORIES ${TEST_SRC_DIR}/data)
|
||||
add_test_target(TARGET extensions_test
|
||||
TEST_SOURCES ${shared_TEST_SRC}
|
||||
LIBRARIES ${extensions_test_libraries}
|
||||
TEST_DATA_DIRECTORIES ${TEST_SRC_DIR}/data)
|
||||
|
||||
target_include_directories(extensions_test PRIVATE ${spm_INCLUDE_DIRS})
|
||||
target_include_directories(extensions_test PRIVATE ${spm_INCLUDE_DIRS})
|
||||
|
||||
target_compile_definitions(extensions_test PUBLIC ${OCOS_COMPILE_DEFINITIONS})
|
||||
if(use_extensions_shared_library)
|
||||
target_compile_definitions(extensions_test PUBLIC ORT_EXTENSIONS_UNIT_TEST_USE_EXTENSIONS_SHARED_LIBRARY)
|
||||
endif()
|
||||
target_compile_definitions(extensions_test PUBLIC ${OCOS_COMPILE_DEFINITIONS})
|
||||
if(use_extensions_shared_library)
|
||||
target_compile_definitions(extensions_test PUBLIC ORT_EXTENSIONS_UNIT_TEST_USE_EXTENSIONS_SHARED_LIBRARY)
|
||||
endif()
|
||||
|
||||
# FUTURE: This is required to use the ORT C++ API with delayed init which must be done conditionally using
|
||||
# ifdef OCOS_BUILD_SHARED in RegisterCustomOps and where onnxruntime_cxx_api.h is included .
|
||||
# ---
|
||||
# We have to remove the OCOS_BUILD_SHARED when building the test code. It is used to delay population of the
|
||||
# ORT api pointer until RegisterCustomOps is called, but the test code needs to create an ORT env which requires
|
||||
# the pointer to exist.
|
||||
# set(test_compile_definitions ${OCOS_COMPILE_DEFINITIONS})
|
||||
# remove(test_compile_definitions "OCOS_SHARED_LIBRARY")
|
||||
# target_compile_definitions(extensions_test PUBLIC ${test_compile_definitions})
|
||||
# FUTURE: This is required to use the ORT C++ API with delayed init which must be done conditionally using
|
||||
# ifdef OCOS_BUILD_SHARED in RegisterCustomOps and where onnxruntime_cxx_api.h is included .
|
||||
# ---
|
||||
# We have to remove the OCOS_BUILD_SHARED when building the test code. It is used to delay population of the
|
||||
# ORT api pointer until RegisterCustomOps is called, but the test code needs to create an ORT env which requires
|
||||
# the pointer to exist.
|
||||
# set(test_compile_definitions ${OCOS_COMPILE_DEFINITIONS})
|
||||
# remove(test_compile_definitions "OCOS_SHARED_LIBRARY")
|
||||
# target_compile_definitions(extensions_test PUBLIC ${test_compile_definitions})
|
||||
|
||||
# Copy onnxruntime DLL files into the same directory as the test binary.
|
||||
if(WIN32)
|
||||
file(TO_CMAKE_PATH "${ONNXRUNTIME_LIB_DIR}/*" ONNXRUNTIME_LIB_FILEPATTERN)
|
||||
file(GLOB ONNXRUNTIME_LIB_FILES CONFIGURE_DEPENDS "${ONNXRUNTIME_LIB_FILEPATTERN}")
|
||||
add_custom_command(
|
||||
TARGET extensions_test POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME_LIB_FILES} $<TARGET_FILE_DIR:extensions_test>)
|
||||
endif()
|
||||
# Copy onnxruntime DLL files into the same directory as the test binary.
|
||||
if(WIN32)
|
||||
file(TO_CMAKE_PATH "${ONNXRUNTIME_LIB_DIR}/*" ONNXRUNTIME_LIB_FILEPATTERN)
|
||||
file(GLOB ONNXRUNTIME_LIB_FILES CONFIGURE_DEPENDS "${ONNXRUNTIME_LIB_FILEPATTERN}")
|
||||
add_custom_command(
|
||||
TARGET extensions_test POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME_LIB_FILES} $<TARGET_FILE_DIR:extensions_test>)
|
||||
endif()
|
||||
|
||||
# Copy onnxruntime shared library to known location for easy access, e.g., for adb push to emulator or device.
|
||||
if(ANDROID)
|
||||
add_custom_command(
|
||||
TARGET extensions_test POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME} ${CMAKE_BINARY_DIR}/lib
|
||||
)
|
||||
endif()
|
||||
# Copy onnxruntime shared library to known location for easy access, e.g., for adb push to emulator or device.
|
||||
if(ANDROID)
|
||||
add_custom_command(
|
||||
TARGET extensions_test POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ONNXRUNTIME} ${CMAKE_BINARY_DIR}/lib
|
||||
)
|
||||
endif()
|
||||
|
||||
if (OCOS_ENABLE_C_API)
|
||||
# avoid copying the same data directory at the same time.
|
||||
add_dependencies(extensions_test pp_api_test)
|
||||
endif()
|
||||
endblock()
|
||||
if (OCOS_ENABLE_C_API)
|
||||
# avoid copying the same data directory at the same time.
|
||||
add_dependencies(extensions_test pp_api_test)
|
||||
endif()
|
||||
endblock()
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -17,3 +17,5 @@ Most APIs accept raw data inputs such as audio, image compressed binary formats,
|
|||
**Image processing:** `OrtxCreateProcessor` can create an image processor object from a pre-defined workflow in JSON format to process image files into a tensor-like data type. An example code snippet can be found [here](../test/pp_api_test/test_processor.cc#L75).
|
||||
|
||||
**Audio feature extraction:** `OrtxCreateSpeechFeatureExtractor` creates a speech feature extractor to obtain log mel spectrum data as input for the Whisper model. An example code snippet can be found [here](../test/pp_api_test/test_feature_extraction.cc#L16).
|
||||
|
||||
NB: If onnxruntime-extensions is to build as a shared library, which requires the OCOS_ENABLE_AUDIO OCOS_ENABLE_CV2 OCOS_ENABLE_OPENCV_CODECS OCOS_ENABLE_GPT2_TOKENIZER build flags are ON to have a full function of binary. Only onnxruntime-extensions static library can be used for a minimal build with the selected operators, so in that case, the shared library build can be switched off by `-DOCOS_BUILD_SHARED_LIB=OFF`.
|
||||
|
|
|
@ -32,6 +32,8 @@ class LocaleBaseTest : public testing::Test {
|
|||
std::string default_locale_;
|
||||
};
|
||||
|
||||
#if defined(ENABLE_WORDPIECE_TOKENIZER) && defined(ENABLE_BERT_TOKENIZER)
|
||||
|
||||
TEST(tokenizer, bert_word_split) {
|
||||
ustring ind("##");
|
||||
ustring text("A AAA B BB");
|
||||
|
@ -261,3 +263,5 @@ TEST(tokenizer, basic_tok_eager) {
|
|||
tokenizer.Compute(test_case, output);
|
||||
EXPECT_EQ(output.Data(), expect_result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче