64 строки
2.3 KiB
Python
64 строки
2.3 KiB
Python
# coding: utf-8
|
|
import unittest
|
|
import numpy as np
|
|
from onnxruntime_extensions import util
|
|
from onnxruntime_extensions import PyOrtFunction, BlingFireSentenceBreaker
|
|
|
|
|
|
def _run_blingfire_sentencebreaker(input, output, model_path):
|
|
t2stc = PyOrtFunction.from_customop(BlingFireSentenceBreaker, model=model_path)
|
|
result = t2stc(input)
|
|
np.testing.assert_array_equal(result, output)
|
|
|
|
|
|
class TestBlingFireSentenceBreaker(unittest.TestCase):
|
|
def test_text_to_case1(self):
|
|
inputs = np.array(
|
|
[
|
|
"This is the Bling-Fire tokenizer. Autophobia, also called monophobia, isolophobia, "
|
|
+ "or eremophobia, is the specific phobia of isolation."
|
|
+ " 2007年9月日历表_2007年9月农历阳历一览表-万年历. "
|
|
+ "I saw a girl with a telescope. Я увидел девушку с телескопом."
|
|
]
|
|
)
|
|
outputs = np.array(
|
|
[
|
|
"This is the Bling-Fire tokenizer.",
|
|
"Autophobia, also called monophobia, isolophobia, or eremophobia, "
|
|
+ "is the specific phobia of isolation."
|
|
+ " 2007年9月日历表_2007年9月农历阳历一览表-万年历.",
|
|
"I saw a girl with a telescope.",
|
|
"Я увидел девушку с телескопом.",
|
|
]
|
|
)
|
|
_run_blingfire_sentencebreaker(
|
|
input=inputs,
|
|
output=outputs,
|
|
model_path=util.get_test_data_file("data", "default_sentence_break_model.bin"),
|
|
)
|
|
|
|
def test_text_to_case2(self):
|
|
# input is empty
|
|
inputs = np.array([""])
|
|
outputs = np.array([""])
|
|
_run_blingfire_sentencebreaker(
|
|
input=inputs,
|
|
output=outputs,
|
|
model_path=util.get_test_data_file("data", "default_sentence_break_model.bin"),
|
|
)
|
|
|
|
def test_text_to_case3(self):
|
|
# input is whitespace
|
|
inputs = np.array([" "])
|
|
# output of blingfire sbd.bin model
|
|
outputs = np.array([""])
|
|
_run_blingfire_sentencebreaker(
|
|
input=inputs,
|
|
output=outputs,
|
|
model_path=util.get_test_data_file("data", "default_sentence_break_model.bin"),
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|