nlp-recipes/tests/integration/test_notebooks_sentence_sim...

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import pytest
import papermill as pm
import scrapbook as sb
from tests.notebooks_common import OUTPUT_NOTEBOOK, KERNEL_NAME


ABS_TOL = 0.2
ABS_TOL_PEARSONS = 0.05


@pytest.fixture(scope="module")
def baseline_results():
    return {
        "Word2vec Cosine": 0.6476606845766778,
        "Word2vec Cosine with Stop Words": 0.6683808069062863,
        "Word2vec WMD": 0.6574175839579567,
        "Word2vec WMD with Stop Words": 0.6574175839579567,
        "GLoVe Cosine": 0.6688056947022161,
        "GLoVe Cosine with Stop Words": 0.6049380247374541,
        "GLoVe WMD": 0.6267300417407605,
        "GLoVe WMD with Stop Words": 0.48470008225931194,
        "fastText Cosine": 0.6707510007525627,
        "fastText Cosine with Stop Words": 0.6771300330824099,
        "fastText WMD": 0.6394958913339955,
        "fastText WMD with Stop Words": 0.5177829727556036,
        "TF-IDF Cosine": 0.6749213786510483,
        "TF-IDF Cosine with Stop Words": 0.7118087132257667,
        "Doc2vec Cosine": 0.5337078384749167,
        "Doc2vec Cosine with Stop Words": 0.4498543211602068,
    }


@pytest.mark.integration
def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):
    notebook_path = notebooks["similarity_embeddings_baseline"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]
    for key, value in baseline_results.items():
        assert results[key] == pytest.approx(value, abs=ABS_TOL)


@pytest.mark.gpu
@pytest.mark.integration
@pytest.mark.skip(
    reason="push for release, no horovod installation automation or documentation yet"
)
def test_gensen_local(notebooks):
    notebook_path = notebooks["gensen_local"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            max_epoch=1,
            config_filepath="examples/sentence_similarity/gensen_config.json",
            base_data_path="data",
        ),
    )

    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]
    expected = {"0": {"0": 1, "1": 0.95}, "1": {"0": 0.95, "1": 1}}

    for key, value in expected.items():
        for k, v in value.items():
            assert results[key][k] == pytest.approx(v, abs=ABS_TOL_PEARSONS)


@pytest.mark.gpu
@pytest.mark.integration
def test_bert_encoder(notebooks, tmp):
    notebook_path = notebooks["bert_encoder"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(NUM_GPUS=1, MAX_SEQ_LENGTH=128, CACHE_DIR=tmp),
    )
    size_emb = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["size_emb"]
    assert size_emb == 768


@pytest.mark.integration
@pytest.mark.azureml
def test_bert_senteval(
    notebooks, subscription_id, resource_group, workspace_name, workspace_region, tmp
):
    notebook_path = notebooks["bert_senteval"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            subscription_id=subscription_id,
            resource_group=resource_group,
            workspace_name=workspace_name,
            workspace_region=workspace_region,
            CACHE_DIR=tmp,
            LOCAL_UTILS="utils_nlp",
            LOCAL_SENTEVAL="utils_nlp/eval/SentEval",
            EXPERIMENT_NAME="test-nlp-ss-bert",
            CLUSTER_NAME="eval-gpu",
            MAX_NODES=1,
        ),
    )
    pearson = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["pearson"]
    mse = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["mse"]
    assert pearson == pytest.approx(0.6, abs=ABS_TOL)
    assert mse < 1.8


@pytest.mark.integration
@pytest.mark.azureml
def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):
    notebook_path = notebooks["similarity_embeddings_baseline"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]
    for key, value in baseline_results.items():
        assert results[key] == pytest.approx(value, abs=ABS_TOL)


@pytest.mark.usefixtures("teardown_service")
@pytest.mark.integration
@pytest.mark.azureml
def test_automl_local_deployment_aci(
    notebooks, subscription_id, resource_group, workspace_name, workspace_region
):
    notebook_path = notebooks["automl_local_deployment_aci"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters={
            "automl_iterations": 1,
            "automl_iteration_timeout": 7,
            "config_path": None,
            "webservice_name": "aci-test-service",
            "subscription_id": subscription_id,
            "resource_group": resource_group,
            "workspace_name": workspace_name,
            "workspace_region": workspace_region,
        },
    )
    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["pearson_correlation"]
    assert result == pytest.approx(0.5, abs=ABS_TOL)


@pytest.mark.integration
@pytest.mark.azureml
@pytest.mark.skip(
    reason="push for release, no horovod installation automation or documentation yet"
)
def test_gensen_aml_deep_dive(notebooks):
    notebook_path = notebooks["gensen_aml_deep_dive"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters=dict(
            CACHE_DIR="./tests/integration/temp",
            AZUREML_CONFIG_PATH="./tests/integration/.azureml",
            UTIL_NLP_PATH="./utils_nlp",
            MAX_EPOCH=1,
            TRAIN_SCRIPT="./examples/sentence_similarity/gensen_train.py",
            CONFIG_PATH="./examples/sentence_similarity/gensen_config.json",
            MAX_TOTAL_RUNS=1,
            MAX_CONCURRENT_RUNS=1,
        ),
    )
    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict
    assert result["min_val_loss"] > 5
    assert result["learning_rate"] >= 0.0001
    assert result["learning_rate"] <= 0.001


@pytest.mark.integration
@pytest.mark.azureml
@pytest.mark.skip(
    reason="can't run programmatically, AKS cluster takes ~20 minutes to create and there is no blocking call in the notebook to tell that the cluster creation is in progress"
)
def test_automl_with_pipelines_deployment_aks(notebooks):
    notebook_path = notebooks["automl_with_pipelines_deployment_aks"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK)
adjusted results in integration tests and refactor fixtures 2019-07-17 15:19:38 +03:00			`# Copyright (c) Microsoft Corporation. All rights reserved.`
			`# Licensed under the MIT License.`

			`import pytest`
			`import papermill as pm`
			`import scrapbook as sb`
:bug: 2019-07-26 16:41:48 +03:00			`from tests.notebooks_common import OUTPUT_NOTEBOOK, KERNEL_NAME`
adjusted results in integration tests and refactor fixtures 2019-07-17 15:19:38 +03:00
fix bugs on duplicate code 2019-07-30 01:27:52 +03:00
Fixed some messed conflict resolution 2019-07-20 01:31:13 +03:00			`ABS_TOL = 0.2`
Changes for code review comments. 1. Added compute_correlation_coeff method to utils and separated it from the predict method to ensure single responsbility. 2. Added tests accordingly. 3. In the notebook added a scrap to track preditions and assert it in tests. 4. Also added extra documentation to explain what the predict method is doing. 5. Minor fix to stop train at max_epoch. 2019-07-25 23:49:48 +03:00			`ABS_TOL_PEARSONS = 0.05`
Fixed some messed conflict resolution 2019-07-20 01:31:13 +03:00

			`@pytest.fixture(scope="module")`
			`def baseline_results():`
			`return {`
			`"Word2vec Cosine": 0.6476606845766778,`
			`"Word2vec Cosine with Stop Words": 0.6683808069062863,`
			`"Word2vec WMD": 0.6574175839579567,`
			`"Word2vec WMD with Stop Words": 0.6574175839579567,`
			`"GLoVe Cosine": 0.6688056947022161,`
			`"GLoVe Cosine with Stop Words": 0.6049380247374541,`
			`"GLoVe WMD": 0.6267300417407605,`
			`"GLoVe WMD with Stop Words": 0.48470008225931194,`
			`"fastText Cosine": 0.6707510007525627,`
			`"fastText Cosine with Stop Words": 0.6771300330824099,`
			`"fastText WMD": 0.6394958913339955,`
			`"fastText WMD with Stop Words": 0.5177829727556036,`
			`"TF-IDF Cosine": 0.6749213786510483,`
			`"TF-IDF Cosine with Stop Words": 0.7118087132257667,`
			`"Doc2vec Cosine": 0.5337078384749167,`
			`"Doc2vec Cosine with Stop Words": 0.4498543211602068,`
			`}`
adjusted results in integration tests and refactor fixtures 2019-07-17 15:19:38 +03:00
add gensen aml test 2019-07-26 01:38:03 +03:00
:bug: in configuration 2019-08-08 19:03:55 +03:00			`@pytest.mark.integration`
			`def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):`
			`notebook_path = notebooks["similarity_embeddings_baseline"]`
			`pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)`
			`results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]`
			`for key, value in baseline_results.items():`
			`assert results[key] == pytest.approx(value, abs=ABS_TOL)`

rename tests to be the same as the notebook file 2019-09-27 18:52:13 +03:00
:bug: 2019-08-07 16:33:34 +03:00			`@pytest.mark.gpu`
			`@pytest.mark.integration`
rename tests to be the same as the notebook file 2019-09-27 18:52:13 +03:00			`@pytest.mark.skip(`
			`reason="push for release, no horovod installation automation or documentation yet"`
			`)`
:bug: 2019-08-07 16:33:34 +03:00			`def test_gensen_local(notebooks):`
			`notebook_path = notebooks["gensen_local"]`
			`pm.execute_notebook(`
			`notebook_path,`
			`OUTPUT_NOTEBOOK,`
			`kernel_name=KERNEL_NAME,`
			`parameters=dict(`
			`max_epoch=1,`
update examples folder refs 2019-08-19 17:51:35 +03:00			`config_filepath="examples/sentence_similarity/gensen_config.json",`
:bug: 2019-08-07 16:33:34 +03:00			`base_data_path="data",`
			`),`
			`)`

			`results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]`
			`expected = {"0": {"0": 1, "1": 0.95}, "1": {"0": 0.95, "1": 1}}`

			`for key, value in expected.items():`
			`for k, v in value.items():`
			`assert results[key][k] == pytest.approx(v, abs=ABS_TOL_PEARSONS)`
bert_senteval integration test 2019-07-30 20:48:05 +03:00
:bug: 2019-08-07 16:33:34 +03:00
added test for bert_encoder notebook 2019-08-07 17:59:15 +03:00			`@pytest.mark.gpu`
			`@pytest.mark.integration`
			`def test_bert_encoder(notebooks, tmp):`
			`notebook_path = notebooks["bert_encoder"]`
			`pm.execute_notebook(`
			`notebook_path,`
			`OUTPUT_NOTEBOOK,`
			`kernel_name=KERNEL_NAME,`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`parameters=dict(NUM_GPUS=1, MAX_SEQ_LENGTH=128, CACHE_DIR=tmp),`
added test for bert_encoder notebook 2019-08-07 17:59:15 +03:00			`)`
			`size_emb = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["size_emb"]`
			`assert size_emb == 768`
bert_senteval integration test 2019-07-30 20:48:05 +03:00

			`@pytest.mark.integration`
			`@pytest.mark.azureml`
			`def test_bert_senteval(`
			`notebooks, subscription_id, resource_group, workspace_name, workspace_region, tmp`
			`):`
			`notebook_path = notebooks["bert_senteval"]`
			`pm.execute_notebook(`
			`notebook_path,`
			`OUTPUT_NOTEBOOK,`
			`kernel_name=KERNEL_NAME,`
			`parameters=dict(`
			`subscription_id=subscription_id,`
			`resource_group=resource_group,`
			`workspace_name=workspace_name,`
			`workspace_region=workspace_region,`
			`CACHE_DIR=tmp,`
			`LOCAL_UTILS="utils_nlp",`
			`LOCAL_SENTEVAL="utils_nlp/eval/SentEval",`
			`EXPERIMENT_NAME="test-nlp-ss-bert",`
			`CLUSTER_NAME="eval-gpu",`
			`MAX_NODES=1,`
			`),`
			`)`
fixed :bug: in bert_senteval and added integration test asserts 2019-09-27 19:07:47 +03:00			`pearson = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["pearson"]`
:bug: 2019-09-27 19:13:50 +03:00			`mse = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["mse"]`
fixed :bug: in bert_senteval and added integration test asserts 2019-09-27 19:07:47 +03:00			`assert pearson == pytest.approx(0.6, abs=ABS_TOL)`
			`assert mse < 1.8`
bert_senteval integration test 2019-07-30 20:48:05 +03:00

Change pytest.mark.notebooks 2019-07-30 18:21:32 +03:00			`@pytest.mark.integration`
			`@pytest.mark.azureml`
adjusted results in integration tests and refactor fixtures 2019-07-17 15:19:38 +03:00			`def test_similarity_embeddings_baseline_runs(notebooks, baseline_results):`
			`notebook_path = notebooks["similarity_embeddings_baseline"]`
:bug: 2019-07-26 16:41:48 +03:00			`pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)`
adjusted results in integration tests and refactor fixtures 2019-07-17 15:19:38 +03:00			`results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["results"]`
			`for key, value in baseline_results.items():`
			`assert results[key] == pytest.approx(value, abs=ABS_TOL)`

add gensen aml test 2019-07-26 01:38:03 +03:00
Tests and update to use CLI auth so that tests run smoothly 2019-07-30 02:47:08 +03:00			`@pytest.mark.usefixtures("teardown_service")`
Change pytest.mark.notebooks 2019-07-30 18:21:32 +03:00			`@pytest.mark.integration`
			`@pytest.mark.azureml`
rename tests to be the same as the notebook file 2019-09-27 18:52:13 +03:00			`def test_automl_local_deployment_aci(`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`notebooks, subscription_id, resource_group, workspace_name, workspace_region`
			`):`
rename tests to be the same as the notebook file 2019-09-27 18:52:13 +03:00			`notebook_path = notebooks["automl_local_deployment_aci"]`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`pm.execute_notebook(`
			`notebook_path,`
			`OUTPUT_NOTEBOOK,`
			`parameters={`
fixed :bug: with merge 2019-09-25 13:19:28 +03:00			`"automl_iterations": 1,`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`"automl_iteration_timeout": 7,`
fixed :bug: with merge 2019-09-25 13:19:28 +03:00			`"config_path": None,`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`"webservice_name": "aci-test-service",`
			`"subscription_id": subscription_id,`
			`"resource_group": resource_group,`
			`"workspace_name": workspace_name,`
			`"workspace_region": workspace_region,`
			`},`
			`)`
Add tests for sentence similarity 2019-07-23 06:55:18 +03:00			`result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["pearson_correlation"]`
			`assert result == pytest.approx(0.5, abs=ABS_TOL)`

add gensen aml test 2019-07-26 01:38:03 +03:00
merged with staging and added mark of azureml 2019-07-26 14:23:25 +03:00			`@pytest.mark.integration`
add gensen aml test 2019-07-26 01:38:03 +03:00			`@pytest.mark.azureml`
rename tests to be the same as the notebook file 2019-09-27 18:52:13 +03:00			`@pytest.mark.skip(`
			`reason="push for release, no horovod installation automation or documentation yet"`
			`)`
			`def test_gensen_aml_deep_dive(notebooks):`
			`notebook_path = notebooks["gensen_aml_deep_dive"]`
add gensen aml test 2019-07-26 01:38:03 +03:00			`pm.execute_notebook(`
			`notebook_path,`
			`OUTPUT_NOTEBOOK,`
			`parameters=dict(`
			`CACHE_DIR="./tests/integration/temp",`
			`AZUREML_CONFIG_PATH="./tests/integration/.azureml",`
fix bugs on duplicate code 2019-07-30 01:27:52 +03:00			`UTIL_NLP_PATH="./utils_nlp",`
add gensen aml test 2019-07-26 01:38:03 +03:00			`MAX_EPOCH=1,`
update examples folder refs 2019-08-19 17:51:35 +03:00			`TRAIN_SCRIPT="./examples/sentence_similarity/gensen_train.py",`
			`CONFIG_PATH="./examples/sentence_similarity/gensen_config.json",`
add gensen aml test 2019-07-26 01:38:03 +03:00			`MAX_TOTAL_RUNS=1,`
			`MAX_CONCURRENT_RUNS=1,`
			`),`
			`)`
			`result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict`
fix bugs on duplicate code 2019-07-30 01:27:52 +03:00			`assert result["min_val_loss"] > 5`
add gensen aml test 2019-07-26 01:38:03 +03:00			`assert result["learning_rate"] >= 0.0001`
			`assert result["learning_rate"] <= 0.001`

skipping test 2019-08-07 16:38:52 +03:00
			`@pytest.mark.integration`
			`@pytest.mark.azureml`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`@pytest.mark.skip(`
			`reason="can't run programmatically, AKS cluster takes ~20 minutes to create and there is no blocking call in the notebook to tell that the cluster creation is in progress"`
			`)`
skipping test 2019-08-07 16:38:52 +03:00			`def test_automl_with_pipelines_deployment_aks(notebooks):`
			`notebook_path = notebooks["automl_with_pipelines_deployment_aks"]`
bert_senteval integration test 2019-07-30 20:48:05 +03:00			`pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK)`