Adding sequence models to CI and skipping failing ones

This commit is contained in:
Peyman Manikashani 2018-12-18 15:50:23 -08:00
Родитель fc839bc371
Коммит 4ef44170ab
3 изменённых файлов: 161 добавлений и 9 удалений

Просмотреть файл

@ -14,9 +14,10 @@ import tempfile
onnx = pytest.importorskip("onnx")
from onnx import numpy_helper
from .onnx_verify_helper import generate_sequence_data, generate_sequential_data, generate_sparse_data, verify_results_with_onnxruntime, generate_sparse_data_non_seq
from .onnx_test_helper import find_onnx_value_info_proto_with_matching_name, save_cntk_data_as_onnx_tensor, save_test_data, save_onnx_model_with_validation_data
from .onnx_test_helper import find_onnx_value_info_proto_with_matching_name, save_cntk_data_as_onnx_tensor
from .onnx_verify_helper import verify_model
from .onnx_op_test import verify_sequence_model
# To test models locally, create folder 'onnx_models' and put in model folders.
# For example.
@ -313,4 +314,94 @@ def test_cntk_model(model_name):
rtol=1e-3,
atol=1e-4)
verify_model(model_name, str(os.path.abspath(tmpdir)))
verify_results_with_onnxruntime(model_name, str(os.path.abspath(tmpdir)))
rnn_base_dir = get_base_dir('rnn_models')
rnn_model_names = [dir for dir in os.listdir(rnn_base_dir)
if os.path.isfile(os.path.join(rnn_base_dir, dir)) and dir.rfind('.model') + len('.model') == len(dir)] if os.path.exists(rnn_base_dir) else []
skip_rnn_model_names = [
'SmartReply.Base_BiLSTM_Exported_input_replaced_with_gather_for_indice_input.cntk.model',
'SmartReply.cvae_input_replaced_with_gather_for_indice_input.cntk.model',
'SmartReply.SelfAtt.infer_model.cnt.model',
'Speech.lstm_pit.cntk48.ElementTimes3117.model',
]
verify_with_resave = [
'SmartReply.Base_BiLSTM_Exported_input_replaced_with_gather_for_indice_input.cntk.model',
'SmartReply.cvae_input_replaced_with_gather_for_indice_input.cntk.model',
'SmartReply.3outputs.Trained.cnt_replaced_embedding_with_gather.model',
'SmartReply.3outputs.Untrained.model'
]
models_with_sequential_data = [
'Speech.princeton.gather.flattened.model',
'Speech.model.lstm.900.converted.LSTMoutputW.model',
'Speech.cris.ff.model.dbn.HLast.model',
'Speech.262.cntk.model'
]
seq_models_with_sparse_data = [
'Bing.Malta50.proto1_128_gru_normv3_ep3_z.model',
'SmartReply.3outputs.Trained.cnt_replaced_embedding_with_gather.model',
'SmartReply.3outputs.Untrained.model',
]
non_seq_models_with_sparse_data = [
'Speech.Polyphony.DNN.FinalModel.cmf.model'
]
def verify_model(cntk_model, node_name, tmpdir, model_name, image = None, skip_round_trip_test = True):
if (node_name is not None):
cntk_node = cntk_model.find_by_name(node_name)
if not cntk_node:
cntk_node = C.logging.depth_first_search(cntk_model, lambda x: x.uid == node_name, depth = 10)[0]
cntk_node_model = C.as_composite(cntk_node)
else:
node_name = "full"
cntk_node_model = cntk_model
sanitized_node_name = model_name + node_name.replace("/", ".")
if (image is None):
image = np.random.rand(*np.shape(cntk_model.arguments[0])).astype(np.float32)
test_model_path = os.path.join(str(tmpdir), R'test_' + sanitized_node_name)
print(test_model_path)
if os.path.exists(test_model_path):
shutil.rmtree(test_model_path, ignore_errors=True)
verify_sequence_model(cntk_node_model, image, tmpdir, sanitized_node_name, resave = not skip_round_trip_test)
@pytest.mark.parametrize('model_name',
[model_name for model_name in rnn_model_names],
ids=[model_name for model_name in rnn_model_names])
def test_cntk_rnn_models(model_name):
if model_name in skip_rnn_model_names:
pytest.skip('Skip cntk rnn model test. ')
model_dir = os.path.join(rnn_base_dir, model_name)
model = C.Function.load(model_dir, format=C.ModelFormat.CNTKv2)
# Generate model-specific data
data = []
np.random.seed(0)
sequence_length = 10
for arg in model.arguments:
if model_name in models_with_sequential_data:
data.append(generate_sequential_data((1,sequence_length) + arg.shape))
elif model_name in seq_models_with_sparse_data:
data.append(generate_sparse_data(1, sequence_length, arg.shape[0]))
elif model_name in non_seq_models_with_sparse_data:
data.append(generate_sparse_data_non_seq(1, arg.shape[0]))
else:
data.append(generate_sequence_data(1, sequence_length, arg.shape[0]))
# Validate model results
if(model_name in verify_with_resave):
verify_model(model, None, tmpdir, model_name, data[0] if len(data) == 1 else data , True)
else:
save_onnx_model_with_validation_data(tmpdir, model, data[0] if len(data) == 1 else data, model_name, device=None)
verify_results_with_onnxruntime(model_name, str(os.path.abspath(tmpdir)))

Просмотреть файл

@ -10,7 +10,7 @@ import scipy
import cntk as C
import pytest
onnx = pytest.importorskip("onnx")
from .onnx_verify_helper import verify_model, get_onnx_test_runner_callscript
from .onnx_verify_helper import verify_results_with_onnxruntime, get_onnx_test_runner_callscript
CNTK_FREEDIM_AXIS_DENOTATION = -3
DIM_SIZE_FOR_NON_BATCH_OPS = 1
@ -264,5 +264,28 @@ def save_test_data(model, onnx_model, test_data_path, input_data, output_data, n
# print out command line for onnx test runner
print(get_onnx_test_runner_callscript(name, tmpdir))
failed_cases_count = verify_model(name, tmpdir)
assert failed_cases_count == 0, 'there are test failures.'
failed_cases_count = verify_results_with_onnxruntime(name, tmpdir)
assert failed_cases_count == 0
def create_or_purge_folder(test_onnx_path, create=True):
if 'test_' not in test_onnx_path:
return
if os.path.exists(test_onnx_path):
shutil.rmtree(test_onnx_path, ignore_errors=True)
if create:
os.mkdir(test_onnx_path)
def save_onnx_model_with_validation_data(tmpdir, model, data, name, device=None):
folder = os.path.join(str(tmpdir), "test_" + name)
create_or_purge_folder(folder)
model_file_name = os.path.join(folder, name + ".onnx")
input_dict = dict(zip(model.arguments, data))
o0 = model.eval(input_dict, device=device)
model.save(model_file_name, format = C.ModelFormat.ONNX)
onnx_model = onnx.load(model_file_name)
test_data_path = os.path.join(folder, "test_data_set_0")
create_or_purge_folder(test_data_path)
save_test_data(model, onnx_model, test_data_path, data, np.array(o0), name, tmpdir)

Просмотреть файл

@ -4,7 +4,9 @@
# ==============================================================================
from __future__ import print_function
import os, re, sys, subprocess
import os, re, sys, subprocess, scipy, pytest, numpy as np
import cntk as C
onnx = pytest.importorskip("onnx")
windows = os.getenv("OS")=="Windows_NT"
@ -55,7 +57,7 @@ def parse_verify_out_str(content):
return total_failed_cases
def verify_model(model_name, model_dir):
def verify_results_with_onnxruntime(model_name, model_dir):
path_prefix = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], 'ONNXRuntime') if 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ else ''
onnx_test_runner_path_str = str(os.path.join(path_prefix, 'onnx_test_runner.exe'))
# run only on windows.
@ -66,4 +68,40 @@ def verify_model(model_name, model_dir):
return parse_verify_out_str(process.stdout.decode('utf-8'))
def get_onnx_test_runner_callscript(model_name, model_dir):
return R'onnx_test_runner.exe -n ' + model_name + ' ' + str(model_dir)
return R'onnx_test_runner.exe -n ' + model_name + ' ' + str(model_dir)
def generate_sequence_data(batch_size, seq_len, feature_size, input_as_index = False):
assert batch_size == 1
np.random.seed(0)
data = np.zeros((batch_size, seq_len)).astype(np.float32) if input_as_index else np.zeros((batch_size, seq_len, feature_size)).astype(np.float32)
for i in range(0,seq_len):
one_hot_index = np.random.random_integers(0, feature_size - 1)
if input_as_index:
data[0][i] = one_hot_index
else:
data[0][i][one_hot_index] = 1
return data
def generate_sequential_data(tensor_shape):
total = np.prod(tensor_shape)
return np.reshape(range(0, total), tensor_shape).astype(np.float32)
def generate_sparse_data(batch_size, seq_len, feature_size):
sparse_data = []
for batch in range(0, batch_size):
data = np.zeros((seq_len, feature_size)).astype(np.float32)
np.random.seed(0)
for i in range(0,seq_len):
one_hot_index = np.random.random_integers(0, feature_size - 1)
data[i][one_hot_index] = 1.0
sparse_data.append(scipy.sparse.csr_matrix(data))
return sparse_data
def generate_sparse_data_non_seq(batch_size, feature_size):
data = np.zeros((batch_size, feature_size)).astype(np.float32)
np.random.seed(0)
for i in range(0,batch_size):
one_hot_index = np.random.random_integers(0, feature_size - 1)
data[i][one_hot_index] = 1.0
sparse_data = scipy.sparse.csr_matrix(data)
return sparse_data