зеркало из https://github.com/microsoft/NimbusML.git
fix tests
This commit is contained in:
Родитель
9f188d8438
Коммит
7d659af568
|
@ -347,3 +347,4 @@ _doc_report.txt
|
|||
data.csv
|
||||
data.txt
|
||||
|
||||
/build/TestCoverageReport
|
||||
|
|
|
@ -299,9 +299,13 @@ set TestsPath1=%PackagePath%\tests
|
|||
set TestsPath2=%__currentScriptDir%src\python\tests
|
||||
set ReportPath=%__currentScriptDir%build\TestCoverageReport
|
||||
call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath1%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
|
||||
if errorlevel 1 (
|
||||
goto :Exit_Error
|
||||
)
|
||||
call "%PythonExe%" -m pytest --verbose --maxfail=1000 --capture=sys "%TestsPath2%" --cov="%PackagePath%" --cov-report term-missing --cov-report html:"%ReportPath%"
|
||||
goto :Exit_Success
|
||||
|
||||
if errorlevel 1 (
|
||||
goto :Exit_Error
|
||||
)
|
||||
|
||||
:Exit_Success
|
||||
endlocal
|
||||
|
|
|
@ -9,9 +9,9 @@ import pandas as pd
|
|||
from nimbusml import Pipeline
|
||||
from nimbusml.ensemble import LightGbmClassifier
|
||||
from nimbusml.feature_extraction.text import NGramFeaturizer
|
||||
from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
|
||||
from sklearn.utils.testing import assert_almost_equal
|
||||
from sklearn.utils.testing import assert_array_equal
|
||||
from sklearn.utils.testing import assert_array_almost_equal
|
||||
|
||||
|
||||
def transform_data(data=None, datatype=None):
|
||||
|
@ -34,7 +34,7 @@ def train_data_type_single(
|
|||
"Talk about second",
|
||||
"Thrid one",
|
||||
"Final example."]
|
||||
model = NGramFeaturizer(word_feature_extractor=n_gram())
|
||||
model = NGramFeaturizer()
|
||||
data_with_new_type = transform_data(data, fit_X_type)
|
||||
model.fit(data_with_new_type)
|
||||
test_data_with_new_type = transform_data(data, predict_X_type)
|
||||
|
@ -49,7 +49,7 @@ def train_data_type_ppl(fit_X_type=None, fit_Y_type=None, predict_X_type=None):
|
|||
"Final example."]
|
||||
label = [1, 0, 1, 1]
|
||||
model = Pipeline([
|
||||
NGramFeaturizer(word_feature_extractor=n_gram()),
|
||||
NGramFeaturizer(),
|
||||
LightGbmClassifier(min_data_per_leaf=1, n_thread=1)
|
||||
])
|
||||
data_with_new_type = transform_data(data, fit_X_type)
|
||||
|
@ -66,127 +66,127 @@ class TestTextDataType(unittest.TestCase):
|
|||
def test_check_text_datatype_single_list_list_series(self):
|
||||
result = train_data_type_single("list", "list", "series")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_series_list_series(self):
|
||||
result = train_data_type_single("series", "list", "series")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_series_list_list(self):
|
||||
result = train_data_type_single("series", "list", "list")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_array_list_series(self):
|
||||
result = train_data_type_single("array", "list", "series")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_series_array_dataframe(self):
|
||||
result = train_data_type_single("series", "array", "dataframe")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_array_series_series(self):
|
||||
result = train_data_type_single("array", "series", "series")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_dataframe_list_series(self):
|
||||
result = train_data_type_single("dataframe", "list", "series")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_series_series_dataframe(self):
|
||||
result = train_data_type_single("series", "series", "dataframe")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_single_dataframe_series_list(self):
|
||||
result = train_data_type_single("dataframe", "series", "list")
|
||||
assert len(result) == 4
|
||||
assert len(result.columns) == 11
|
||||
assert len(result.columns) == 66
|
||||
assert all([col.startswith('F0') for col in result.columns])
|
||||
|
||||
def test_check_text_datatype_ppl_series_list_array(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"series", "list", "array")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_list_series_dataframe(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"list", "series", "dataframe")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_list_list_series(self):
|
||||
result, scores, metrics = train_data_type_ppl("list", "list", "series")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_array_series_array(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"array", "series", "array")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_series_array_dataframe(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"series", "array", "dataframe")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_array_series_list(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"array", "series", "list")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_dataframe_list_series(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"dataframe", "list", "series")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_series_series_dataframe(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"series", "series", "dataframe")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
def test_check_text_datatype_ppl_dataframe_series_series(self):
|
||||
result, scores, metrics = train_data_type_ppl(
|
||||
"dataframe", "series", "series")
|
||||
assert len(result) == 4
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.69314718)
|
||||
assert_array_equal(scores['Score.0'].values, scores['Score.1'].values)
|
||||
assert_array_equal(scores['Score.0'].values, [0.5, 0.5, 0.5, 0.5])
|
||||
assert_almost_equal(metrics['Log-loss'].item(), 0.4402459)
|
||||
assert_array_equal(scores['Score.0'].values, result['Score.0'].values)
|
||||
assert_array_almost_equal(scores['Score.0'].values, [0.359195, 0.528997, 0.214895, 0.354186])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -38,7 +38,7 @@ class TestNGramFeaturizer(unittest.TestCase):
|
|||
X_train = texttransform.fit_transform(X_train[:100])
|
||||
sum = X_train.iloc[:].sum().sum()
|
||||
print(sum)
|
||||
assert_equal(sum, 4594, "sum of all features is incorrect!")
|
||||
assert_equal(sum, 30513, "sum of all features is incorrect!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -91,7 +91,7 @@ class TestNGramFeaturizer(unittest.TestCase):
|
|||
textt = NGramFeaturizer(word_feature_extractor=n_gram()) << 'review'
|
||||
X = textt.fit_transform(X)
|
||||
|
||||
assert X.shape == (25, 21)
|
||||
assert X.shape == (25, 116)
|
||||
|
||||
mymodel = LogisticRegressionBinaryClassifier().fit(X, y, verbose=0)
|
||||
X_test = textt.transform(test_reviews)
|
||||
|
@ -180,7 +180,7 @@ class TestNGramFeaturizer(unittest.TestCase):
|
|||
'outg': ['review']}
|
||||
X = textt.fit_transform(X)
|
||||
|
||||
assert X.shape == (25, 22)
|
||||
assert X.shape == (25, 117)
|
||||
# columns ordering changed between 0.22 and 0.23
|
||||
assert 'review' in (X.columns[0], X.columns[-1])
|
||||
X = X.drop('review', axis=1)
|
||||
|
@ -204,7 +204,7 @@ class TestNGramFeaturizer(unittest.TestCase):
|
|||
columns={'features': ['id', 'education']})
|
||||
|
||||
features = xf.fit_transform(data)
|
||||
assert features.shape == (248, 259)
|
||||
assert features.shape == (248, 652)
|
||||
|
||||
def test_ngramfeaturizer_multi(self):
|
||||
|
||||
|
|
|
@ -146,4 +146,4 @@ class TestSyntaxOneHotVectorizer(unittest.TestCase):
|
|||
ng4 = NGramFeaturizer(word_feature_extractor=n_gram()) << {
|
||||
'out1': ['education1', 'education2']}
|
||||
output4 = ng4.fit_transform(X)
|
||||
assert output4.shape == (5, 7)
|
||||
assert output4.shape == (5, 13)
|
||||
|
|
Загрузка…
Ссылка в новой задаче