Add evaluations for IC and OD (#1)

This commit is contained in:
TK 2020-11-24 10:17:44 -08:00 коммит произвёл GitHub
Родитель 11e804704e
Коммит 2a41819dbb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 725 добавлений и 114 удалений

131
.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,131 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
.idea/*

34
LICENSE
Просмотреть файл

@ -1,21 +1,21 @@
MIT License
MIT License
Copyright (c) Microsoft Corporation.
Copyright (c) 2020 Microsoft
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Просмотреть файл

@ -1,33 +1,14 @@
# Project
# Introduction
This repo contains evaluation metric codes used in Microsoft Cognitive Services Computer Vision for tasks such as classification and object detection.
> This repo has been populated by an initial template to help get you started. Please
> make sure to update the content to build a great experience for community-building.
# Functionalities
This repo currently offers evaluation metrics for two vision tasks:
As the maintainer of this project, please make a few updates:
- Improving this README.MD file to provide a great experience
- Updating SUPPORT.MD with content about this project's support experience
- Understanding the security reporting process in SECURITY.MD
- Remove this section from the README
## Contributing
This project welcomes contributions and suggestions. Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
provided by the bot. You will only need to do this once across all repos using our CLA.
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
## Trademarks
This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
trademarks or logos is subject to and must follow
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
Any use of third-party trademarks or logos are subject to those third-party's policies.
- Image classification:
- `evaluators.TopKAccuracyEvaluator`: computes the top-k accuracy, i.e., accuracy of the top k predictions with highest confidence.
- `evaluators.AveragePrecisionEvaluator`: computes the average precision, precision averaged across different confidence thresholds.
- `evaluators.ThresholdAccuracyEvaluator`: computes the threshold based accuracy, i.e., accuracy of the predictions with confidence over a certain threshold.
- `evaluators.EceLossEvaluator`: computes the [ECE loss](https://arxiv.org/pdf/1706.04599.pdf), i.e., the expected calibration error, given the model confidence and true labels for a set of data points.
- Object detection:
- `evaluators.MeanAveragePrecisionEvaluatorForSingleIOU`, `evaluators.MeanAveragePrecisionEvaluatorForMultipleIOUs`: computes the mean average precision (mAP), i.e. mean average precision across different classes, under single or multiple [IoU(s)](https://en.wikipedia.org/wiki/Jaccard_index).

Просмотреть файл

@ -1,41 +0,0 @@
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
<!-- END MICROSOFT SECURITY.MD BLOCK -->

Просмотреть файл

@ -1,25 +0,0 @@
# TODO: The maintainer of this repo has not yet edited this file
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
- **No CSS support:** Fill out this template with information about how to file issues and get help.
- **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
- **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
# Support
## How to file issues and get help
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
issues before filing new issues to avoid duplicates. For new issues, file your bug or
feature request as a new Issue.
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
## Microsoft Support Policy
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.

34
setup.py Normal file
Просмотреть файл

@ -0,0 +1,34 @@
import setuptools
from os import path
VERSION = '0.1.0'
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), 'r') as f:
long_description = f.read()
setuptools.setup(name='vision-evaluation',
author='Ping Jin, Shohei Ono, I-Ting Fang',
description="Evaluation code for vision tasks.",
long_description=long_description,
long_description_content_type="text/markdown",
url='https://github.com/pj-ms/vision-evaluation',
version=VERSION,
license='MIT',
python_requires='>=3.6',
packages=setuptools.find_packages(),
keywords='vision metric evaluation classification detection',
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
],
install_requires=[
'numpy~=1.18.3',
'sklearn',
])

186
test/test_evaluators.py Normal file
Просмотреть файл

@ -0,0 +1,186 @@
import unittest
import numpy as np
from vision_evaluation.evaluators import AveragePrecisionEvaluator, TopKAccuracyEvaluator, ThresholdAccuracyEvaluator, MeanAveragePrecisionEvaluatorForSingleIOU, EceLossEvaluator
class TestClassificationEvaluator(unittest.TestCase):
TARGETS = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1])
PREDICTIONS = np.array([[1, 0],
[0, 1],
[0.5, 0.5],
[0.1, 0.9],
[0.44, 0.56],
[0.09, 0.91],
[0.91, 0.09],
[0.37, 0.63],
[0.34, 0.66],
[0.89, 0.11]])
def test_top_k_accuracy_evaluator(self):
top1_acc_evaluator = TopKAccuracyEvaluator(1)
top1_acc_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
top5_acc_evaluator = TopKAccuracyEvaluator(5)
top5_acc_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
self.assertEqual(top1_acc_evaluator.get_report(average='micro')["top1_accuracy"], 0.4)
self.assertEqual(top5_acc_evaluator.get_report(average='micro')["top5_accuracy"], 1.0)
self.assertEqual(top1_acc_evaluator.get_report(average='macro')["top1_accuracy"], 0.4)
self.assertEqual(top5_acc_evaluator.get_report(average='macro')["top5_accuracy"], 1.0)
def test_average_precision_evaluator(self):
evaluator = AveragePrecisionEvaluator()
evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
self.assertEqual(evaluator.get_report(average='micro')["average_precision"], 0.4476823176823177)
self.assertEqual(evaluator.get_report(average='macro')["average_precision"], 0.47574404761904765)
def test_ece_loss_evaluator(self):
evaluator = EceLossEvaluator()
evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
self.assertEqual(0.584, evaluator.get_report()["calibration_ece"])
def test_threshold_accuracy_evaluator(self):
thresh03_evaluator = ThresholdAccuracyEvaluator(0.3)
thresh03_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
self.assertEqual(0.4, thresh03_evaluator.get_report()["accuracy_0.3"])
thresh05_evaluator = ThresholdAccuracyEvaluator(0.5)
thresh05_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
self.assertEqual(0.3, thresh05_evaluator.get_report()["accuracy_0.5"])
class TestMeanAveragePrecisionEvaluatorForSingleIOU(unittest.TestCase):
def test_perfect_one_image(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[[0, 1.0, 0, 0, 1, 1],
[1, 1.0, 0.5, 0.5, 1, 1],
[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1],
[2, 0.1, 0.1, 0.5, 0.5]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 1.0)
self.assertTrue(isinstance(report["mAP_50"], float))
def test_wrong_one_image(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[[0, 1.0, 0, 0, 1, 1],
[0, 1.0, 0.5, 0.5, 1, 1],
[1, 1.0, 0.5, 0.5, 1, 1]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 0.75)
self.assertTrue(isinstance(report["mAP_50"], float))
def test_perfect_two_images(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[[0, 1.0, 0, 0, 1, 1],
[1, 1.0, 0.5, 0.5, 1, 1]],
[[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1]],
[[2, 0.1, 0.1, 0.5, 0.5]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 1.0)
self.assertTrue(isinstance(report["mAP_50"], float))
def test_two_batches(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[[0, 1.0, 0, 0, 1, 1],
[1, 1.0, 0.5, 0.5, 1, 1]],
[[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1]],
[[2, 0.1, 0.1, 0.5, 0.5]]]
evaluator.add_predictions(predictions, targets)
predictions = [[[0, 1.0, 0.9, 0.9, 1, 1], # Wrong
[1, 1.0, 0.5, 0.5, 1, 1]],
[[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1]],
[[2, 0.1, 0.1, 0.5, 0.5]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 0.75)
self.assertTrue(isinstance(report["mAP_50"], float))
def test_iou_threshold(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[[0, 1.0, 0.5, 0.5, 1, 1], # IOU 0.25
[1, 1.0, 0.5, 0.5, 1, 1]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 0.5)
self.assertTrue(isinstance(report["mAP_50"], float))
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.2)
predictions = [[[0, 1.0, 0.5, 0.5, 1, 1], # IOU 0.25
[1, 1.0, 0.5, 0.5, 1, 1]]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_20"], 1.0)
self.assertTrue(isinstance(report["mAP_20"], float))
def test_no_predictions(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[]]
targets = [[[0, 0, 0, 1, 1],
[1, 0.5, 0.5, 1, 1],
[2, 0.1, 0.1, 0.5, 0.5]]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 0.0)
self.assertTrue(isinstance(report["mAP_50"], float))
def test_no_targets(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
predictions = [[[0, 1.0, 0, 0, 1, 1],
[1, 1.0, 0.5, 0.5, 1, 1],
[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
targets = [[]]
evaluator.add_predictions(predictions, targets)
report = evaluator.get_report()
self.assertEqual(report["mAP_50"], 0.0)
self.assertTrue(isinstance(report["mAP_50"], float))
def test_empty_result(self):
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
report = evaluator.get_report()
self.assertIn('mAP_50', report)
self.assertEqual(report["mAP_50"], 0.0)
self.assertTrue(isinstance(report["mAP_50"], float))

3
tox.ini Normal file
Просмотреть файл

@ -0,0 +1,3 @@
[flake8]
exclude = .git,build,dist
max-line-length = 200

Просмотреть файл

@ -0,0 +1,3 @@
from .evaluators import MeanAveragePrecisionEvaluatorForMultipleIOUs, TopKAccuracyEvaluator, ThresholdAccuracyEvaluator, AveragePrecisionEvaluator, EceLossEvaluator
__all__ = ['MeanAveragePrecisionEvaluatorForMultipleIOUs', 'TopKAccuracyEvaluator', 'ThresholdAccuracyEvaluator', "AveragePrecisionEvaluator", "EceLossEvaluator"]

Просмотреть файл

@ -0,0 +1,339 @@
import collections
import statistics
import sklearn.metrics
import numpy as np
from abc import ABC
def _top_k_prediction_indices(prediction, k):
top_k_preds = np.argsort(-prediction, axis=1)[:, :k]
return top_k_preds
def _targets_to_mat(targets, n_class):
if len(targets.shape) == 1:
target_mat = np.zeros((len(targets), n_class), dtype=int)
for i, t in enumerate(targets):
target_mat[i, t] = 1
else:
target_mat = targets
return target_mat
class Evaluator(ABC):
"""Class to evaluate model outputs and report the result.
"""
def __init__(self):
self.reset()
def add_predictions(self, predictions, targets):
raise NotImplementedError
def get_report(self, **kwargs):
raise NotImplementedError
def add_custom_field(self, name, value):
self.custom_fields[name] = str(value)
def reset(self):
self.custom_fields = {}
class TopKAccuracyEvaluator(Evaluator):
def __init__(self, k):
self.k = k
super(TopKAccuracyEvaluator, self).__init__()
def reset(self):
super(TopKAccuracyEvaluator, self).reset()
self.total_num = 0
self.topk_correct_num = 0
def add_predictions(self, predictions, targets):
""" Evaluate a batch of predictions.
Args:
predictions: the model output numpy array. Shape (N, num_class)
targets: the golden truths. Shape (N,)
"""
assert len(predictions) == len(targets)
assert len(targets.shape) == 1
n_sample = len(predictions)
n_class = predictions.shape[1]
k = min(self.k, n_class)
top_k_predictions = _top_k_prediction_indices(predictions, k)
self.topk_correct_num += len([1 for sample_idx in range(n_sample) if targets[sample_idx] in top_k_predictions[sample_idx]])
self.total_num += len(predictions)
def get_report(self, **kwargs):
return {f'top{self.k}_accuracy': float(self.topk_correct_num) / self.total_num if self.total_num else 0.0}
class AveragePrecisionEvaluator(Evaluator, ABC):
def reset(self):
super(AveragePrecisionEvaluator, self).reset()
self.all_targets = np.array([])
self.all_predictions = np.array([])
def add_predictions(self, predictions, targets):
target_mat = _targets_to_mat(targets, predictions.shape[1])
if self.all_predictions.size != 0:
self.all_predictions = np.append(self.all_predictions, predictions, axis=0)
else:
self.all_predictions = np.copy(predictions)
if self.all_targets.size != 0:
self.all_targets = np.append(self.all_targets, target_mat, axis=0)
else:
self.all_targets = np.copy(target_mat)
def calculate_average_precision_score(self, average='macro'):
"""
average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
If ``None``, the scores for each class are returned. Otherwise,
this determines the type of averaging performed on the data:
``'micro'``:
Calculate metrics globally by considering each element of the label
indicator matrix as a label.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label).
``'samples'``:
Calculate metrics for each instance, and find their average.
"""
assert self.all_targets.size == self.all_predictions.size
ap = 0.0
if self.all_targets.size > 0:
non_empty_idx = np.where(np.invert(np.all(self.all_targets == 0, axis=0)))[0]
if non_empty_idx.size != 0:
ap = sklearn.metrics.average_precision_score(self.all_targets[:, non_empty_idx], self.all_predictions[:, non_empty_idx], average=average)
return ap
def get_report(self, **kwargs):
return {'average_precision': self.calculate_average_precision_score(kwargs['average'])}
class EceLossEvaluator(Evaluator):
"""
Computes the expected calibration error (ECE) given the model confidence and true labels for a set of data points.
https://arxiv.org/pdf/1706.04599.pdf
"""
def __init__(self, n_bins=15):
# Calibration ECE, Divide the probability into nbins
self.n_bins = n_bins
bins = np.linspace(0, 1, self.n_bins + 1)
self.bin_lower_bounds = bins[:-1]
self.bin_upper_bounds = bins[1:]
super(EceLossEvaluator, self).__init__()
def add_predictions(self, predictions, targets):
""" Evaluate a batch of predictions.
Args:
predictions: the model output numpy array. Shape (N, num_class)
targets: the golden truths. Shape (N,)
"""
# calibration_ece
self.total_num += len(predictions)
indices = _top_k_prediction_indices(predictions, 1).flatten()
confidence = predictions[np.arange(len(predictions)), indices]
correct = (indices == targets)
for bin_i in range(self.n_bins):
bin_lower_bound, bin_upper_bound = self.bin_lower_bounds[bin_i], self.bin_upper_bounds[bin_i]
in_bin = np.logical_and(confidence > bin_lower_bound, confidence <= bin_upper_bound)
self.total_correct_in_bin[bin_i] += correct[in_bin].astype(int).sum()
self.sum_confidence_in_bin[bin_i] += confidence[in_bin].astype(float).sum()
def get_report(self, **kwargs):
return {'calibration_ece': float(np.sum(np.abs(self.total_correct_in_bin - self.sum_confidence_in_bin)) / self.total_num) if self.total_num else 0.0}
def reset(self):
super(EceLossEvaluator, self).reset()
self.total_num = 0
self.total_correct_in_bin = np.zeros(self.n_bins)
self.sum_confidence_in_bin = np.zeros(self.n_bins)
class ThresholdAccuracyEvaluator(Evaluator):
def __init__(self, threshold):
super(ThresholdAccuracyEvaluator, self).__init__()
self._threshold = threshold
def add_predictions(self, predictions, targets):
""" Evaluate a batch of predictions.
Args:
predictions: the model output array. Shape (N, num_class)
targets: the ground truths. Shape (N, num_class)
"""
assert len(predictions) == len(targets)
target_mat = _targets_to_mat(targets, predictions.shape[1])
prediction_over_thres = predictions > self._threshold
num = np.multiply(prediction_over_thres, target_mat).sum(1) # shape (N,)
den = (np.add(prediction_over_thres, target_mat) >= 1).sum(1) # shape (N,)
den[den == 0] = 1 # To avoid zero-division. If den==0, num should be zero as well.
self.correct_num += (num / den).sum()
self.total_num += len(predictions)
def get_report(self, average='macro'):
return {f'accuracy_{self._threshold}': float(self.correct_num) / self.total_num if self.total_num else 0.0}
def reset(self):
super(ThresholdAccuracyEvaluator, self).reset()
self.correct_num = 0
self.total_num = 0
class MeanAveragePrecisionEvaluatorForSingleIOU(Evaluator):
def __init__(self, iou=0.5):
super(MeanAveragePrecisionEvaluatorForSingleIOU, self).__init__()
self.iou = iou
def add_predictions(self, predictions, targets):
""" Evaluate list of image with object detection results using single IOU evaluation.
Args:
predictions: list of predictions [[[label_idx, probability, L, T, R, B], ...], [...], ...]
targets: list of image targets [[[label_idx, L, T, R, B], ...], ...]
"""
assert len(predictions) == len(targets)
eval_predictions = collections.defaultdict(list)
eval_ground_truths = collections.defaultdict(dict)
for img_idx, prediction in enumerate(predictions):
for bbox in prediction:
label = int(bbox[0])
eval_predictions[label].append([img_idx, float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), float(bbox[5])])
for img_idx, target in enumerate(targets):
for bbox in target:
label = int(bbox[0])
if img_idx not in eval_ground_truths[label]:
eval_ground_truths[label][img_idx] = []
eval_ground_truths[label][img_idx].append([float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4])])
class_indices = set(list(eval_predictions.keys()) + list(eval_ground_truths.keys()))
for class_index in class_indices:
is_correct, probabilities = self._evaluate_predictions(eval_ground_truths[class_index], eval_predictions[class_index], self.iou)
true_num = sum([len(t) for t in eval_ground_truths[class_index].values()])
self.is_correct[class_index].extend(is_correct)
self.probabilities[class_index].extend(probabilities)
self.true_num[class_index] += true_num
@staticmethod
def _calculate_area(rect):
w = rect[2] - rect[0] + 1e-5
h = rect[3] - rect[1] + 1e-5
return float(w * h) if w > 0 and h > 0 else 0.0
@staticmethod
def _calculate_iou(rect0, rect1):
rect_intersect = [max(rect0[0], rect1[0]),
max(rect0[1], rect1[1]),
min(rect0[2], rect1[2]),
min(rect0[3], rect1[3])]
calc_area = MeanAveragePrecisionEvaluatorForSingleIOU._calculate_area
area_intersect = calc_area(rect_intersect)
return area_intersect / (calc_area(rect0) + calc_area(rect1) - area_intersect)
def _is_true_positive(self, prediction, ground_truth, already_detected, iou_threshold):
image_id = prediction[0]
prediction_rect = prediction[2:6]
if image_id not in ground_truth:
return False, already_detected
ious = np.array([self._calculate_iou(prediction_rect, g) for g in ground_truth[image_id]])
best_bb = np.argmax(ious)
best_iou = ious[best_bb]
if best_iou < iou_threshold or (image_id, best_bb) in already_detected:
return False, already_detected
already_detected.add((image_id, best_bb))
return True, already_detected
def _evaluate_predictions(self, ground_truths, predictions, iou_threshold):
""" Evaluate the correctness of the given predictions.
Args:
ground_truths: List of ground truths for the class. {image_id: [[left, top, right, bottom], [...]], ...}
predictions: List of predictions for the class. [[image_id, probability, left, top, right, bottom], [...], ...]
iou_threshold: Minimum IOU threshold to be considered as a same bounding box.
"""
# Sort the predictions by the probability
sorted_predictions = sorted(predictions, key=lambda x: -x[1])
already_detected = set()
is_correct = []
for prediction in sorted_predictions:
correct, already_detected = self._is_true_positive(prediction, ground_truths, already_detected,
iou_threshold)
is_correct.append(correct)
is_correct = np.array(is_correct)
probabilities = np.array([p[1] for p in sorted_predictions])
return is_correct, probabilities
@staticmethod
def _calculate_average_precision(is_correct, probabilities, true_num, average='macro'):
if true_num == 0:
return 0
if not is_correct or not any(is_correct):
return 0
recall = float(np.sum(is_correct)) / true_num
return sklearn.metrics.average_precision_score(is_correct, probabilities, average=average) * recall
def get_report(self, average='macro'):
all_aps = []
for class_index in self.is_correct:
ap = MeanAveragePrecisionEvaluatorForSingleIOU._calculate_average_precision(self.is_correct[class_index], self.probabilities[class_index], self.true_num[class_index], average)
all_aps.append(ap)
mean_ap = float(statistics.mean(all_aps)) if all_aps else 0.0
return {"mAP_{}".format(int(self.iou * 100)): mean_ap}
def reset(self):
self.is_correct = collections.defaultdict(list)
self.probabilities = collections.defaultdict(list)
self.true_num = collections.defaultdict(int)
super(MeanAveragePrecisionEvaluatorForSingleIOU, self).reset()
class MeanAveragePrecisionEvaluatorForMultipleIOUs(Evaluator):
DEFAULT_IOU_VALUES = [0.3, 0.5, 0.75, 0.9]
def __init__(self, ious=DEFAULT_IOU_VALUES):
self.evaluators = [MeanAveragePrecisionEvaluatorForSingleIOU(iou)
for iou in ious]
super(MeanAveragePrecisionEvaluatorForMultipleIOUs, self).__init__()
def add_predictions(self, predictions, targets):
for evaluator in self.evaluators:
evaluator.add_predictions(predictions, targets)
def get_report(self, **kwargs):
report = {}
for evaluator in self.evaluators:
report.update(evaluator.get_report(kwargs['average']))
return report
def reset(self):
for evaluator in self.evaluators:
evaluator.reset()
super(MeanAveragePrecisionEvaluatorForMultipleIOUs, self).reset()