From 35423b3674b9c5c89d447a90f1b982c552fefb5f Mon Sep 17 00:00:00 2001 From: Shruthi42 <13177030+Shruthi42@users.noreply.github.com> Date: Tue, 4 May 2021 11:09:35 +0100 Subject: [PATCH] Add accuracy at threshold 0.5 to classification report (#450) Adds the metric "Accuracy at threshold 0.5" to the classification report (`classification_crossval_report.ipynb`). Also deletes the unused `classification_report.ipynb`. --- CHANGELOG.md | 2 + .../ML/reports/classification_report.ipynb | 287 ------------------ InnerEye/ML/reports/classification_report.py | 9 +- InnerEye/ML/run_ml.py | 2 +- .../ML/reports/test_classification_report.py | 36 ++- setup.py | 7 +- 6 files changed, 33 insertions(+), 310 deletions(-) delete mode 100644 InnerEye/ML/reports/classification_report.ipynb diff --git a/CHANGELOG.md b/CHANGELOG.md index 61e46ce3..1e6807bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y model configs with custom behaviour while leveraging the existing InnerEye workflows. - ([#445](https://github.com/microsoft/InnerEye-DeepLearning/pull/445)) Adding test coverage for the `HelloContainer` model with multiple GPUs +- ([#450](https://github.com/microsoft/InnerEye-DeepLearning/pull/450)) Adds the metric "Accuracy at threshold 0.5" to the classification report (`classification_crossval_report.ipynb`). ### Changed @@ -93,6 +94,7 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y ### Removed - ([#439](https://github.com/microsoft/InnerEye-DeepLearning/pull/439)) Deprecated `start_epoch` config argument. +- ([#450](https://github.com/microsoft/InnerEye-DeepLearning/pull/450)) Delete unused `classification_report.ipynb`. ### Deprecated diff --git a/InnerEye/ML/reports/classification_report.ipynb b/InnerEye/ML/reports/classification_report.ipynb deleted file mode 100644 index 49f612fe..00000000 --- a/InnerEye/ML/reports/classification_report.ipynb +++ /dev/null @@ -1,287 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "%%javascript\n", - "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", - " return false;\n", - "}\n", - "// Stops auto-scrolling so entire output is visible: see https://stackoverflow.com/a/41646403" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": { - "pycharm": { - "name": "#%%\n" - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# Default parameter values. They will be overwritten by papermill notebook parameters.\n", - "# This cell must carry the tag \"parameters\" in its metadata.\n", - "from pathlib import Path\n", - "import pickle\n", - "import codecs\n", - "\n", - "innereye_path = Path.cwd().parent.parent.parent\n", - "train_metrics_csv = \"\"\n", - "val_metrics_csv = innereye_path / 'Tests' / 'ML' / 'reports' / 'val_metrics_classification.csv'\n", - "test_metrics_csv = innereye_path / 'Tests' / 'ML' / 'reports' / 'test_metrics_classification.csv'\n", - "number_best_and_worst_performing = 20\n", - "config= \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "import sys\n", - "print(f\"Adding to path: {innereye_path}\")\n", - "if str(innereye_path) not in sys.path:\n", - " sys.path.append(str(innereye_path))\n", - "\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "\n", - "config = pickle.loads(codecs.decode(config.encode(), \"base64\"))\n", - "\n", - "from InnerEye.ML.reports.notebook_report import print_header\n", - "from InnerEye.ML.reports.classification_report import plot_pr_and_roc_curves_from_csv, \\\n", - "print_k_best_and_worst_performing, print_metrics_for_all_prediction_targets, \\\n", - "plot_k_best_and_worst_performing, get_labels_and_predictions\n", - "\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "plt.rcParams['figure.figsize'] = (20, 10)\n", - "\n", - "#convert params to Path\n", - "train_metrics_csv = Path(train_metrics_csv)\n", - "val_metrics_csv = Path(val_metrics_csv)\n", - "test_metrics_csv = Path(test_metrics_csv)" - ] - }, - { - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "# Train Metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "if train_metrics_csv.is_file():\n", - " print_metrics_for_all_prediction_targets(val_metrics_csv=train_metrics_csv, test_metrics_csv=train_metrics_csv,\n", - " config=config, is_thresholded=False)" - ] - }, - { - "cell_type": "markdown", - "id": "6", - "metadata": {}, - "source": [ - "# Validation Metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "if val_metrics_csv.is_file():\n", - " print_metrics_for_all_prediction_targets(val_metrics_csv=val_metrics_csv, test_metrics_csv=val_metrics_csv,\n", - " config=config, is_thresholded=False)" - ] - }, - { - "cell_type": "markdown", - "id": "8", - "metadata": {}, - "source": [ - "# Test Metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "if val_metrics_csv.is_file() and test_metrics_csv.is_file():\n", - " print_metrics_for_all_prediction_targets(val_metrics_csv=val_metrics_csv, test_metrics_csv=test_metrics_csv,\n", - " config=config, is_thresholded=False)" - ] - }, - { - "cell_type": "markdown", - "id": "10", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# AUC and PR curves\n", - "## Train Set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "if train_metrics_csv.is_file():\n", - " plot_pr_and_roc_curves_from_csv(metrics_csv=train_metrics_csv, config=config)" - ] - }, - { - "cell_type": "markdown", - "id": "12", - "metadata": {}, - "source": [ - "## Validation set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "if val_metrics_csv.is_file():\n", - " plot_pr_and_roc_curves_from_csv(metrics_csv=val_metrics_csv, config=config)" - ] - }, - { - "cell_type": "markdown", - "id": "14", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "## Test set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "if test_metrics_csv.is_file():\n", - " plot_pr_and_roc_curves_from_csv(metrics_csv=test_metrics_csv, config=config)" - ] - }, - { - "cell_type": "markdown", - "id": "16", - "metadata": {}, - "source": [ - "# Best and worst samples by ID" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "17", - "metadata": {}, - "outputs": [], - "source": [ - "if val_metrics_csv.is_file() and test_metrics_csv.is_file():\n", - " for prediction_target in config.target_names:\n", - " print_header(f\"Class {prediction_target}\", level=3)\n", - " print_k_best_and_worst_performing(val_metrics_csv=val_metrics_csv, test_metrics_csv=test_metrics_csv,\n", - " k=number_best_and_worst_performing,\n", - " prediction_target=prediction_target)" - ] - }, - { - "cell_type": "markdown", - "id": "18", - "metadata": {}, - "source": [ - "# Plot best and worst sample images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19", - "metadata": {}, - "outputs": [], - "source": [ - "if val_metrics_csv.is_file() and test_metrics_csv.is_file():\n", - " for prediction_target in config.target_names:\n", - " print_header(f\"Class {prediction_target}\", level=3)\n", - " plot_k_best_and_worst_performing(val_metrics_csv=val_metrics_csv, test_metrics_csv=test_metrics_csv,\n", - " k=number_best_and_worst_performing, prediction_target=prediction_target, config=config)" - ] - } - ], - "metadata": { - "celltoolbar": "Tags", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/InnerEye/ML/reports/classification_report.py b/InnerEye/ML/reports/classification_report.py index 86000843..4eb33f0e 100644 --- a/InnerEye/ML/reports/classification_report.py +++ b/InnerEye/ML/reports/classification_report.py @@ -49,7 +49,8 @@ class ReportedScalarMetrics(Enum): AUC_PR = "Area under PR Curve", False AUC_ROC = "Area under ROC Curve", False OptimalThreshold = "Optimal threshold", False - Accuracy = "Accuracy at optimal threshold", True + AccuracyAtOptimalThreshold = "Accuracy at optimal threshold", True + AccuracyAtThreshold05 = "Accuracy at threshold 0.5", True Sensitivity = "Sensitivity at optimal threshold", True Specificity = "Specificity at optimal threshold", True @@ -326,10 +327,14 @@ def get_metric(predictions_to_set_optimal_threshold: LabelsAndPredictions, precision, recall, _ = precision_recall_curve(predictions_to_compute_metrics.labels, predictions_to_compute_metrics.model_outputs) return auc(recall, precision) - elif metric is ReportedScalarMetrics.Accuracy: + elif metric is ReportedScalarMetrics.AccuracyAtOptimalThreshold: return binary_classification_accuracy(model_output=predictions_to_compute_metrics.model_outputs, label=predictions_to_compute_metrics.labels, threshold=optimal_threshold) + elif metric is ReportedScalarMetrics.AccuracyAtThreshold05: + return binary_classification_accuracy(model_output=predictions_to_compute_metrics.model_outputs, + label=predictions_to_compute_metrics.labels, + threshold=0.5) elif metric is ReportedScalarMetrics.Specificity: return recall_score(predictions_to_compute_metrics.labels, predictions_to_compute_metrics.model_outputs >= optimal_threshold, pos_label=0) diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py index 48ee4131..f30580a6 100644 --- a/InnerEye/ML/run_ml.py +++ b/InnerEye/ML/run_ml.py @@ -894,5 +894,5 @@ class MLRunner: val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) except Exception as ex: - print_exception(ex, "Failed to generated reporting notebook.") + print_exception(ex, "Failed to generate reporting notebook.") raise diff --git a/Tests/ML/reports/test_classification_report.py b/Tests/ML/reports/test_classification_report.py index 2fef2456..370ddf85 100644 --- a/Tests/ML/reports/test_classification_report.py +++ b/Tests/ML/reports/test_classification_report.py @@ -196,17 +196,23 @@ def test_get_metric() -> None: accuracy = get_metric(predictions_to_compute_metrics=test_metrics, predictions_to_set_optimal_threshold=val_metrics, - metric=ReportedScalarMetrics.Accuracy) + metric=ReportedScalarMetrics.AccuracyAtOptimalThreshold) assert accuracy == 0.5 accuracy = get_metric(predictions_to_compute_metrics=test_metrics, predictions_to_set_optimal_threshold=val_metrics, - metric=ReportedScalarMetrics.Accuracy, + metric=ReportedScalarMetrics.AccuracyAtOptimalThreshold, optimal_threshold=0.1) assert accuracy == 0.5 + accuracy = get_metric(predictions_to_compute_metrics=test_metrics, + predictions_to_set_optimal_threshold=val_metrics, + metric=ReportedScalarMetrics.AccuracyAtThreshold05) + + assert accuracy == 0.5 + specificity = get_metric(predictions_to_compute_metrics=test_metrics, predictions_to_set_optimal_threshold=val_metrics, metric=ReportedScalarMetrics.Specificity) @@ -257,12 +263,13 @@ def test_get_metrics_table_single_run() -> None: is_thresholded=False, is_crossval_report=False) expected_header = "Metric Value".split('\t') expected_rows = [ - "Area under PR Curve 0.5417".split('\t'), - "Area under ROC Curve 0.5000".split('\t'), - "Optimal threshold 0.6000".split('\t'), - "Accuracy at optimal threshold 0.5000".split('\t'), - "Sensitivity at optimal threshold 0.5000".split('\t'), - "Specificity at optimal threshold 0.5000".split('\t'), + f"{ReportedScalarMetrics.AUC_PR.value[0]} 0.5417".split('\t'), + f"{ReportedScalarMetrics.AUC_ROC.value[0]} 0.5000".split('\t'), + f"{ReportedScalarMetrics.OptimalThreshold.value[0]} 0.6000".split('\t'), + f"{ReportedScalarMetrics.AccuracyAtOptimalThreshold.value[0]} 0.5000".split('\t'), + f"{ReportedScalarMetrics.AccuracyAtThreshold05.value[0]} 0.5000".split('\t'), + f"{ReportedScalarMetrics.Sensitivity.value[0]} 0.5000".split('\t'), + f"{ReportedScalarMetrics.Specificity.value[0]} 0.5000".split('\t'), ] check_table_equality(header, rows, expected_header, expected_rows) @@ -283,12 +290,13 @@ def test_get_metrics_table_crossval() -> None: is_thresholded=False, is_crossval_report=True) expected_header = "Metric Split 0 Split 1 Split 2 Mean (std)".split('\t') expected_rows = [ - "Area under PR Curve 0.5417 0.4481 0.6889 0.5595 (0.0991)".split('\t'), - "Area under ROC Curve 0.5000 0.2778 0.7222 0.5000 (0.1814)".split('\t'), - "Optimal threshold 0.6000 0.6000 0.6000 0.6000 (0.0000)".split('\t'), - "Accuracy at optimal threshold 0.5000 0.2500 0.7500 0.5000 (0.2041)".split('\t'), - "Sensitivity at optimal threshold 0.5000 0.1667 0.8333 0.5000 (0.2722)".split('\t'), - "Specificity at optimal threshold 0.5000 0.1667 0.8333 0.5000 (0.2722)".split('\t') + f"{ReportedScalarMetrics.AUC_PR.value[0]} 0.5417 0.4481 0.6889 0.5595 (0.0991)".split('\t'), + f"{ReportedScalarMetrics.AUC_ROC.value[0]} 0.5000 0.2778 0.7222 0.5000 (0.1814)".split('\t'), + f"{ReportedScalarMetrics.OptimalThreshold.value[0]} 0.6000 0.6000 0.6000 0.6000 (0.0000)".split('\t'), + f"{ReportedScalarMetrics.AccuracyAtOptimalThreshold.value[0]} 0.5000 0.2500 0.7500 0.5000 (0.2041)".split('\t'), + f"{ReportedScalarMetrics.AccuracyAtThreshold05.value[0]} 0.5000 0.1667 0.8333 0.5000 (0.2722)".split('\t'), + f"{ReportedScalarMetrics.Sensitivity.value[0]} 0.5000 0.1667 0.8333 0.5000 (0.2722)".split('\t'), + f"{ReportedScalarMetrics.Specificity.value[0]} 0.5000 0.1667 0.8333 0.5000 (0.2722)".split('\t') ] check_table_equality(header, rows, expected_header, expected_rows) diff --git a/setup.py b/setup.py index bd88875f..9ea5d21c 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ from ruamel.yaml.comments import CommentedMap from InnerEye.Common import fixed_paths from InnerEye.Common.common_util import namespace_to_path -from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, INNEREYE_PACKAGE_ROOT +from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME ML_NAMESPACE = "InnerEye.ML" @@ -118,11 +118,6 @@ if is_dev_package: print("\n ***** NOTE: This package is built for development purpose only. DO NOT RELEASE THIS! *****") print(f"\n ***** Will install dev package data: {package_data} *****\n") -package_data[INNEREYE_PACKAGE_NAME] += [ - str(INNEREYE_PACKAGE_ROOT / r"ML/reports/segmentation_report.ipynb"), - str(INNEREYE_PACKAGE_ROOT / r"ML/reports/classification_report.ipynb") -] - pre_processed_packages = _pre_process_packages() try: setuptools.setup(