* new CI configuration

* Set up CI with Azure Pipelines

[skip ci]

* install numpy in cibuildwheel

* add pyproject.toml

* upgrade vmImage

* update the build python versions

* remove the pytest

* move the wheel build files

* enable sdist setup.py as well.

* use git command line

* Update wheels.yml for Azure Pipelines

* disable the pypy package for macos;

* fix the external repo code tag

* fix the ctest problem

* fix the unicode 8217.

* fix the locale base test
This commit is contained in:
Wenbing Li 2021-09-25 00:40:12 -07:00 коммит произвёл GitHub
Родитель 98c32dfe4a
Коммит 9f3abe20fd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 166 добавлений и 66 удалений

49
.az/wheels.yml Normal file
Просмотреть файл

@ -0,0 +1,49 @@
jobs:
- job: linux
pool: {vmImage: 'ubuntu-latest'}
steps:
- task: UsePythonVersion@0
- bash: |
set -o errexit
python3 -m pip install --upgrade pip
pip3 install cibuildwheel==2.1.2
displayName: Install dependencies
- bash: cibuildwheel --output-dir wheelhouse .
displayName: Build wheels
- task: PublishBuildArtifacts@1
inputs: {pathtoPublish: 'wheelhouse'}
- job: macos
pool: {vmImage: 'macOS-latest'}
variables:
CIBW_ARCHS_MACOS: "x86_64 universal2 arm64"
# Skip trying to test arm64 builds on Intel Macs
# CIBW_TEST_SKIP: "*-macosx_arm64 *-macosx_universal2:arm64"
# Disable building PyPy wheels
CIBW_SKIP: pp*
steps:
- task: UsePythonVersion@0
- bash: |
set -o errexit
python3 -m pip install --upgrade pip
python3 -m pip install cibuildwheel==2.1.2
displayName: Install dependencies
- bash: cibuildwheel --output-dir wheelhouse .
displayName: Build wheels
- task: PublishBuildArtifacts@1
inputs: {pathtoPublish: wheelhouse}
# - job: windows
# pool: {vmImage: 'windows-latest'}
# steps:
# - task: UsePythonVersion@0
# - bash: |
# set -o errexit
# python -m pip install --upgrade pip
# pip install cibuildwheel==2.1.2
# displayName: Install dependencies
# - bash: cibuildwheel --output-dir wheelhouse .
# displayName: Build wheels
# - task: PublishBuildArtifacts@1
# inputs: {pathtoPublish: 'wheelhouse'}

Просмотреть файл

@ -264,7 +264,7 @@ endif()
if (OCOS_ENABLE_SPM_TOKENIZER)
# SentencePiece
target_include_directories(ocos_operators PUBLIC ${sentencepieceproject_INCLUDE_DIRS})
target_include_directories(ocos_operators PUBLIC ${spm_INCLUDE_DIRS})
list(APPEND OCOS_COMPILE_DEFINITIONS ENABLE_SPM_TOKENIZER)
list(APPEND ocos_libraries sentencepiece-static)
endif()

Просмотреть файл

@ -1,5 +1,15 @@
prune ci_build
prune docs
exclude *.bat
exclude *.yaml
exclude *.git*
include *.txt
global-include *.def
recursive-include cmake *.*
recursive-include includes *.*
recursive-include operators *.*
recursive-include pyop *.*
recursive-include shared *.*
prune ci_build
prune docs
prune test
prune _subbuild
prune out
exclude *.bat
exclude *.yaml
exclude *.git*

Просмотреть файл

@ -4,8 +4,8 @@
set -e -x -u
OSNAME=android
if [ -z "$NDK_ROOT" ]; then export NDK_ROOT=`ls -d $HOME/Android/ndk/* 2>/dev/null`; fi
if [ -z "$NDK_ROOT" ]
if [[ -z ${NDK_ROOT+x} ]]; then NDK_ROOT=`ls -d $HOME/Android/Sdk/ndk/* 2>/dev/null`; fi
if [[ -z "${NDK_ROOT}" ]]
then
echo "ERROR: cannot find where NDK was installed, using NDK_ROOT to specify it"
exit 7

Просмотреть файл

@ -44,7 +44,7 @@ jobs:
displayName: build the customop library with onnxruntime
- script: |
cd out/Linux
cd out/Linux/RelWithDebInfo
ctest -C RelWithDebInfo
displayName: Run C++ native tests
@ -119,7 +119,7 @@ jobs:
displayName: build the customop library with onnxruntime
- script: |
cd out/Darwin
cd out/Darwin/RelWithDebInfo
ctest -C RelWithDebInfo
displayName: Run C++ native tests

4
cmake/externals/blingfire.cmake поставляемый
Просмотреть файл

@ -1,7 +1,7 @@
FetchContent_Declare(
Blingfire
GIT_REPOSITORY https://github.com/microsoft/BlingFire.git
GIT_TAG master
GIT_TAG 0831265c1aca95ca02eca5bf1155e4251e545328
)
@ -12,4 +12,4 @@ if (NOT blingfire_POPULATED)
# enable size optimization build
add_subdirectory(${blingfire_SOURCE_DIR} ${blingfire_BINARY_DIR} EXCLUDE_FROM_ALL)
endif ()
endif ()

23
cmake/externals/sentencepieceproject.cmake поставляемый
Просмотреть файл

@ -1,18 +1,19 @@
FetchContent_Declare(
sentencepieceproject
spm
GIT_REPOSITORY https://github.com/google/sentencepiece.git
GIT_TAG v0.1.96
)
# spm is abbr. of sentencepiece to meet the MAX_PATH compiling requirement on Windows
FetchContent_GetProperties(spm)
FetchContent_GetProperties(sentencepieceproject)
if(NOT sentencepieceproject_POPULATED)
FetchContent_Populate(sentencepieceproject)
add_subdirectory(${sentencepieceproject_SOURCE_DIR} ${sentencepieceproject_BINARY_DIR} EXCLUDE_FROM_ALL)
if(NOT spm_POPULATED)
FetchContent_Populate(spm)
add_subdirectory(${spm_SOURCE_DIR} ${spm_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
set(sentencepieceproject_INCLUDE_DIRS
${sentencepieceproject_SOURCE_DIR}/third_party/protobuf-lite
${sentencepieceproject_SOURCE_DIR}/src/builtin_pb
${sentencepieceproject_SOURCE_DIR}/third_party
${sentencepieceproject_SOURCE_DIR}/src
set(spm_INCLUDE_DIRS
${spm_SOURCE_DIR}/third_party/protobuf-lite
${spm_SOURCE_DIR}/src/builtin_pb
${spm_SOURCE_DIR}/third_party
${spm_SOURCE_DIR}/src
)

Просмотреть файл

@ -7,10 +7,9 @@
The entry point to onnxruntime custom op library
"""
__version__ = "0.3.2"
__author__ = "Microsoft"
from ._version import __version__
from ._ocos import get_library_path # noqa
from ._ocos import Opdef, PyCustomOpDef # noqa
from ._ocos import hash_64 # noqa

Просмотреть файл

@ -121,6 +121,6 @@ class EagerOp:
def optimize_model(model_or_file, output_file):
sess_options = EagerOp.get_ort_session_options()
sess_options.graph_optimization_level = _ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
sess_options.graph_optimization_level = _ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
sess_options.optimized_model_filepath = output_file
_ort.InferenceSession(model_or_file if isinstance(model_or_file, str) else model_or_file.SerializeToString(), sess_options)

Просмотреть файл

@ -0,0 +1,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
###############################################################################
__version__ = "0.4.0"

Просмотреть файл

@ -53,7 +53,9 @@ std::vector<ustring> BasicTokenizer::Tokenize(ustring text) {
continue;
}
if (tokenize_punctuation_ && ::iswpunct(c)) {
// 0x2019 unicode is not punctuation in some Linux platform,
// to be consistent, take it as punctatuation always.
if (tokenize_punctuation_ && (::iswpunct(c) || c == wint_t(0x2019))) {
push_current_token_and_clear();
push_single_char_and_clear(c);
continue;

7
pyproject.toml Normal file
Просмотреть файл

@ -0,0 +1,7 @@
[project]
# since onnxruntime havn't supported Python 3.10 yet
requires-python = "<3.10"
[build-system]
# Minimum requirements for the build system to execute.
requires = ["setuptools", "wheel", "numpy>=1.18.5"] # PEP 508 specifications.

Просмотреть файл

@ -1,16 +1,11 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
###########################################################################
from setuptools.command.build_ext import build_ext as _build_ext
from setuptools.command.develop import develop as _develop
from setuptools.command.build_py import build_py as _build_py
from contextlib import contextmanager
from setuptools import setup, find_packages
from setuptools.command.build_ext import build_ext as _build_ext
import os
import sys
@ -18,26 +13,13 @@ import setuptools
import pathlib
import subprocess
from textwrap import dedent
TOP_DIR = os.path.dirname(__file__)
TOP_DIR = os.path.dirname(__file__) or os.getcwd()
PACKAGE_NAME = 'onnxruntime_extensions'
if '--nightly_build' in sys.argv:
PACKAGE_NAME = 'ortext_nightly'
sys.argv.remove('--nightly_build')
@contextmanager
def chdir(path):
orig_path = os.getcwd()
os.chdir(str(path))
try:
yield
finally:
os.chdir(orig_path)
def load_msvcvar():
if os.environ.get('vcvars'):
stdout, _ = subprocess.Popen([
@ -55,6 +37,18 @@ def load_msvcvar():
"please install one or specify the environement variable VCVARS to the path of VS vcvars64.bat.")
def read_git_refs(git_args):
stdout, _ = subprocess.Popen(
['git'] + git_args,
cwd=TOP_DIR,
stdout=subprocess.PIPE, universal_newlines=True).communicate()
for _ln in stdout.splitlines():
_ln = dedent(_ln).strip('\n\r')
if _ln:
return _ln
return ''
class BuildCMakeExt(_build_ext):
def run(self):
@ -94,10 +88,9 @@ class BuildCMakeExt(_build_ext):
'--parallel'
]
with chdir(build_temp):
self.spawn(['cmake', str(project_dir)] + cmake_args)
if not self.dry_run:
self.spawn(['cmake', '--build', '.'] + build_args)
self.spawn(['cmake', '-S', str(project_dir), '-B', str(build_temp)] + cmake_args)
if not self.dry_run:
self.spawn(['cmake', '--build', str(build_temp)] + build_args)
if sys.platform == "win32":
self.copy_file(build_temp / config / 'ortcustomops.dll',
@ -106,19 +99,23 @@ class BuildCMakeExt(_build_ext):
def read_requirements():
with open(os.path.join(TOP_DIR, "requirements.txt"), "r") as f:
requirements = [_ for _ in [_.strip("\r\n ")
for _ in f.readlines()] if _ is not None]
requirements = [_ for _ in [dedent(_) for _ in f.readlines()] if _ is not None]
return requirements
# read version from the package file.
def read_version():
version_str = '1.0.0'
with (open(os.path.join(TOP_DIR, 'onnxruntime_extensions/__init__.py'), "r")) as f:
line = [_ for _ in [_.strip("\r\n ")
for _ in f.readlines()] if _.startswith("__version__")]
with (open(os.path.join(TOP_DIR, 'onnxruntime_extensions/_version.py'), "r")) as f:
line = [_ for _ in [dedent(_) for _ in f.readlines()] if _.startswith("__version__")]
if len(line) > 0:
version_str = line[0].split('=')[1].strip('" ')
version_str = line[0].split('=')[1].strip('" \n\r')
# is it a nightly or dev build?
if os.path.isdir('.git') and \
not read_git_refs(['rev-parse', '--abbrev-ref', 'HEAD']).startswith('rel-'):
# append a git commit id from git remote repo, while the local change ids are skipped.
version_str += '+' + read_git_refs(['rev-parse', 'HEAD'])[:7]
return version_str

Просмотреть файл

@ -6,6 +6,32 @@
#include "wordpiece_tokenizer.hpp"
#include "bert_tokenizer.hpp"
#include <clocale>
class LocaleBaseTest : public testing::Test{
public:
// Remember that SetUp() is run immediately before a test starts.
void SetUp() override {
#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) && !defined(__GNUC__))
default_locale_ = std::locale().name();
std::setlocale(LC_CTYPE, "C");
#else
default_locale_ = std::locale("").name();
std::setlocale(LC_CTYPE, "en_US.UTF-8");
#endif
}
// TearDown() is invoked immediately after a test finishes.
void TearDown() override {
if (!default_locale_.empty()) {
std::setlocale(LC_CTYPE, default_locale_.c_str());
}
}
private:
std::string default_locale_;
};
TEST(tokenizer, bert_word_split) {
ustring ind("##");
ustring text("A AAA B BB");
@ -59,8 +85,8 @@ TEST(tokenizer, wordpiece_basic_tokenizer) {
std::vector<int32_t> indices;
std::vector<int64_t> rows;
KernelWordpieceTokenizer_Tokenizer(vocab, ustring("##"), ustring("[unk]"), text, tokens, indices, rows);
//EXPECT_EQ(indices, std::vector<int32_t>({9, 6, 7, 12, 10, 11}));
//EXPECT_EQ(rows, std::vector<int64_t>({0, 6}));
// EXPECT_EQ(indices, std::vector<int32_t>({9, 6, 7, 12, 10, 11}));
// EXPECT_EQ(rows, std::vector<int64_t>({0, 6}));
}
std::unordered_map<std::u32string, int32_t> get_vocabulary_wordpiece() {
@ -127,15 +153,18 @@ TEST(tokenizer, bert_wordpiece_tokenizer_rows) {
EXPECT_EQ(rows, std::vector<int64_t>({0, 5, 7}));
}
TEST(tokenizer, basic_tokenizer_chinese) {
TEST_F(LocaleBaseTest, basic_tokenizer_chinese) {
ustring test_case = ustring("ÀÁÂÃÄÅÇÈÉÊËÌÍÎÑÒÓÔÕÖÚÜ\t䗓𨖷虴𨀐辘𧄋脟𩑢𡗶镇伢𧎼䪱轚榶𢑌㺽𤨡!#$%&(Tom@microsoft.com)*+,-./:;<=>?@[\\]^_`{|}~");
std::vector<ustring> expect_result = ustring_vector_convertor({"aaaaaaceeeeiiinooooouu", "", "𨖷", "", "𨀐", "", "𧄋", "", "𩑢", "𡗶", "", "", "𧎼", "", "", "", "𢑌", "", "𤨡", "!", "#", "$", "%", "&", "(", "tom", "@", "microsoft", ".", "com", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~"});
std::vector<ustring> expect_result = ustring_vector_convertor({"aaaaaaceeeeiiinooooouu",
"", "𨖷", "", "𨀐", "", "𧄋", "", "𩑢", "𡗶", "", "", "𧎼", "", "", "", "𢑌", "", "𤨡",
"!", "#", "$", "%", "&", "(", "tom", "@", "microsoft", ".", "com", ")", "*", "+", ",", "-", ".", "/", ":",
";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~"});
BasicTokenizer tokenizer(true, true, true, true, true);
auto result = tokenizer.Tokenize(test_case);
EXPECT_EQ(result, expect_result);
}
TEST(tokenizer, basic_tokenizer_russia) {
TEST_F(LocaleBaseTest, basic_tokenizer_russia) {
ustring test_case = ustring("A $100,000 price-tag@big>small на русском языке");
std::vector<ustring> expect_result = ustring_vector_convertor({"a", "$", "100", ",", "000", "price", "-", "tag", "@", "big", ">", "small", "на", "русском", "языке"});
BasicTokenizer tokenizer(true, true, true, true, true);
@ -143,7 +172,7 @@ TEST(tokenizer, basic_tokenizer_russia) {
EXPECT_EQ(result, expect_result);
}
TEST(tokenizer, basic_tokenizer) {
TEST_F(LocaleBaseTest, basic_tokenizer) {
ustring test_case = ustring("I mean, youll need something to talk about next Sunday, right?");
std::vector<ustring> expect_result = ustring_vector_convertor({"I", "mean", ",", "you", "", "ll", "need", "something", "to", "talk", "about", "next", "Sunday", ",", "right", "?"});
BasicTokenizer tokenizer(false, true, true, true, true);
@ -217,4 +246,4 @@ TEST(tokenizer, truncation_longest_first) {
truncate.Truncate(test_input1, test_input2, 12);
EXPECT_EQ(test_input1, std::vector<int64_t>({1, 2, 3, 4, 5}));
EXPECT_EQ(test_input2, std::vector<int64_t>({1, 2, 3, 4, 5, 6 ,7}));
}
}