Add evaluations for IC and OD (#1)
This commit is contained in:
Родитель
11e804704e
Коммит
2a41819dbb
|
@ -0,0 +1,131 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
.idea/*
|
34
LICENSE
34
LICENSE
|
@ -1,21 +1,21 @@
|
|||
MIT License
|
||||
MIT License
|
||||
|
||||
Copyright (c) Microsoft Corporation.
|
||||
Copyright (c) 2020 Microsoft
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
43
README.md
43
README.md
|
@ -1,33 +1,14 @@
|
|||
# Project
|
||||
# Introduction
|
||||
This repo contains evaluation metric codes used in Microsoft Cognitive Services Computer Vision for tasks such as classification and object detection.
|
||||
|
||||
> This repo has been populated by an initial template to help get you started. Please
|
||||
> make sure to update the content to build a great experience for community-building.
|
||||
# Functionalities
|
||||
This repo currently offers evaluation metrics for two vision tasks:
|
||||
|
||||
As the maintainer of this project, please make a few updates:
|
||||
|
||||
- Improving this README.MD file to provide a great experience
|
||||
- Updating SUPPORT.MD with content about this project's support experience
|
||||
- Understanding the security reporting process in SECURITY.MD
|
||||
- Remove this section from the README
|
||||
|
||||
## Contributing
|
||||
|
||||
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
||||
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
|
||||
the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
|
||||
|
||||
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
|
||||
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
|
||||
provided by the bot. You will only need to do this once across all repos using our CLA.
|
||||
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
|
||||
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
||||
|
||||
## Trademarks
|
||||
|
||||
This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
|
||||
trademarks or logos is subject to and must follow
|
||||
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
|
||||
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
|
||||
Any use of third-party trademarks or logos are subject to those third-party's policies.
|
||||
- Image classification:
|
||||
- `evaluators.TopKAccuracyEvaluator`: computes the top-k accuracy, i.e., accuracy of the top k predictions with highest confidence.
|
||||
- `evaluators.AveragePrecisionEvaluator`: computes the average precision, precision averaged across different confidence thresholds.
|
||||
- `evaluators.ThresholdAccuracyEvaluator`: computes the threshold based accuracy, i.e., accuracy of the predictions with confidence over a certain threshold.
|
||||
- `evaluators.EceLossEvaluator`: computes the [ECE loss](https://arxiv.org/pdf/1706.04599.pdf), i.e., the expected calibration error, given the model confidence and true labels for a set of data points.
|
||||
- Object detection:
|
||||
- `evaluators.MeanAveragePrecisionEvaluatorForSingleIOU`, `evaluators.MeanAveragePrecisionEvaluatorForMultipleIOUs`: computes the mean average precision (mAP), i.e. mean average precision across different classes, under single or multiple [IoU(s)](https://en.wikipedia.org/wiki/Jaccard_index).
|
||||
|
41
SECURITY.md
41
SECURITY.md
|
@ -1,41 +0,0 @@
|
|||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
* Full paths of source file(s) related to the manifestation of the issue
|
||||
* The location of the affected source code (tag/branch/commit or direct URL)
|
||||
* Any special configuration required to reproduce the issue
|
||||
* Step-by-step instructions to reproduce the issue
|
||||
* Proof-of-concept or exploit code (if possible)
|
||||
* Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
25
SUPPORT.md
25
SUPPORT.md
|
@ -1,25 +0,0 @@
|
|||
# TODO: The maintainer of this repo has not yet edited this file
|
||||
|
||||
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
|
||||
|
||||
- **No CSS support:** Fill out this template with information about how to file issues and get help.
|
||||
- **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
|
||||
- **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
|
||||
|
||||
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
|
||||
|
||||
# Support
|
||||
|
||||
## How to file issues and get help
|
||||
|
||||
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
|
||||
issues before filing new issues to avoid duplicates. For new issues, file your bug or
|
||||
feature request as a new Issue.
|
||||
|
||||
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
|
||||
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
|
||||
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
|
||||
|
||||
## Microsoft Support Policy
|
||||
|
||||
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
|
|
@ -0,0 +1,34 @@
|
|||
import setuptools
|
||||
from os import path
|
||||
|
||||
VERSION = '0.1.0'
|
||||
|
||||
here = path.abspath(path.dirname(__file__))
|
||||
|
||||
# Get the long description from the README file
|
||||
with open(path.join(here, 'README.md'), 'r') as f:
|
||||
long_description = f.read()
|
||||
|
||||
setuptools.setup(name='vision-evaluation',
|
||||
author='Ping Jin, Shohei Ono, I-Ting Fang',
|
||||
description="Evaluation code for vision tasks.",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url='https://github.com/pj-ms/vision-evaluation',
|
||||
version=VERSION,
|
||||
license='MIT',
|
||||
python_requires='>=3.6',
|
||||
packages=setuptools.find_packages(),
|
||||
keywords='vision metric evaluation classification detection',
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
],
|
||||
install_requires=[
|
||||
'numpy~=1.18.3',
|
||||
'sklearn',
|
||||
])
|
|
@ -0,0 +1,186 @@
|
|||
import unittest
|
||||
import numpy as np
|
||||
from vision_evaluation.evaluators import AveragePrecisionEvaluator, TopKAccuracyEvaluator, ThresholdAccuracyEvaluator, MeanAveragePrecisionEvaluatorForSingleIOU, EceLossEvaluator
|
||||
|
||||
|
||||
class TestClassificationEvaluator(unittest.TestCase):
|
||||
TARGETS = np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1])
|
||||
PREDICTIONS = np.array([[1, 0],
|
||||
[0, 1],
|
||||
[0.5, 0.5],
|
||||
[0.1, 0.9],
|
||||
[0.44, 0.56],
|
||||
[0.09, 0.91],
|
||||
[0.91, 0.09],
|
||||
[0.37, 0.63],
|
||||
[0.34, 0.66],
|
||||
[0.89, 0.11]])
|
||||
|
||||
def test_top_k_accuracy_evaluator(self):
|
||||
top1_acc_evaluator = TopKAccuracyEvaluator(1)
|
||||
top1_acc_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
|
||||
|
||||
top5_acc_evaluator = TopKAccuracyEvaluator(5)
|
||||
top5_acc_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
|
||||
|
||||
self.assertEqual(top1_acc_evaluator.get_report(average='micro')["top1_accuracy"], 0.4)
|
||||
self.assertEqual(top5_acc_evaluator.get_report(average='micro')["top5_accuracy"], 1.0)
|
||||
|
||||
self.assertEqual(top1_acc_evaluator.get_report(average='macro')["top1_accuracy"], 0.4)
|
||||
self.assertEqual(top5_acc_evaluator.get_report(average='macro')["top5_accuracy"], 1.0)
|
||||
|
||||
def test_average_precision_evaluator(self):
|
||||
evaluator = AveragePrecisionEvaluator()
|
||||
evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
|
||||
self.assertEqual(evaluator.get_report(average='micro')["average_precision"], 0.4476823176823177)
|
||||
self.assertEqual(evaluator.get_report(average='macro')["average_precision"], 0.47574404761904765)
|
||||
|
||||
def test_ece_loss_evaluator(self):
|
||||
evaluator = EceLossEvaluator()
|
||||
evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
|
||||
self.assertEqual(0.584, evaluator.get_report()["calibration_ece"])
|
||||
|
||||
def test_threshold_accuracy_evaluator(self):
|
||||
thresh03_evaluator = ThresholdAccuracyEvaluator(0.3)
|
||||
thresh03_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
|
||||
self.assertEqual(0.4, thresh03_evaluator.get_report()["accuracy_0.3"])
|
||||
|
||||
thresh05_evaluator = ThresholdAccuracyEvaluator(0.5)
|
||||
thresh05_evaluator.add_predictions(self.PREDICTIONS, self.TARGETS)
|
||||
self.assertEqual(0.3, thresh05_evaluator.get_report()["accuracy_0.5"])
|
||||
|
||||
|
||||
class TestMeanAveragePrecisionEvaluatorForSingleIOU(unittest.TestCase):
|
||||
def test_perfect_one_image(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[[0, 1.0, 0, 0, 1, 1],
|
||||
[1, 1.0, 0.5, 0.5, 1, 1],
|
||||
[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1],
|
||||
[2, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 1.0)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
def test_wrong_one_image(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[[0, 1.0, 0, 0, 1, 1],
|
||||
[0, 1.0, 0.5, 0.5, 1, 1],
|
||||
[1, 1.0, 0.5, 0.5, 1, 1]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 0.75)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
def test_perfect_two_images(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[[0, 1.0, 0, 0, 1, 1],
|
||||
[1, 1.0, 0.5, 0.5, 1, 1]],
|
||||
[[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1]],
|
||||
[[2, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 1.0)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
def test_two_batches(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[[0, 1.0, 0, 0, 1, 1],
|
||||
[1, 1.0, 0.5, 0.5, 1, 1]],
|
||||
[[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1]],
|
||||
[[2, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
|
||||
predictions = [[[0, 1.0, 0.9, 0.9, 1, 1], # Wrong
|
||||
[1, 1.0, 0.5, 0.5, 1, 1]],
|
||||
[[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1]],
|
||||
[[2, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 0.75)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
def test_iou_threshold(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[[0, 1.0, 0.5, 0.5, 1, 1], # IOU 0.25
|
||||
[1, 1.0, 0.5, 0.5, 1, 1]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 0.5)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.2)
|
||||
|
||||
predictions = [[[0, 1.0, 0.5, 0.5, 1, 1], # IOU 0.25
|
||||
[1, 1.0, 0.5, 0.5, 1, 1]]]
|
||||
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_20"], 1.0)
|
||||
self.assertTrue(isinstance(report["mAP_20"], float))
|
||||
|
||||
def test_no_predictions(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[]]
|
||||
targets = [[[0, 0, 0, 1, 1],
|
||||
[1, 0.5, 0.5, 1, 1],
|
||||
[2, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 0.0)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
def test_no_targets(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
|
||||
predictions = [[[0, 1.0, 0, 0, 1, 1],
|
||||
[1, 1.0, 0.5, 0.5, 1, 1],
|
||||
[2, 1.0, 0.1, 0.1, 0.5, 0.5]]]
|
||||
|
||||
targets = [[]]
|
||||
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
report = evaluator.get_report()
|
||||
self.assertEqual(report["mAP_50"], 0.0)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
||||
|
||||
def test_empty_result(self):
|
||||
evaluator = MeanAveragePrecisionEvaluatorForSingleIOU(iou=0.5)
|
||||
report = evaluator.get_report()
|
||||
self.assertIn('mAP_50', report)
|
||||
self.assertEqual(report["mAP_50"], 0.0)
|
||||
self.assertTrue(isinstance(report["mAP_50"], float))
|
|
@ -0,0 +1,3 @@
|
|||
[flake8]
|
||||
exclude = .git,build,dist
|
||||
max-line-length = 200
|
|
@ -0,0 +1,3 @@
|
|||
from .evaluators import MeanAveragePrecisionEvaluatorForMultipleIOUs, TopKAccuracyEvaluator, ThresholdAccuracyEvaluator, AveragePrecisionEvaluator, EceLossEvaluator
|
||||
|
||||
__all__ = ['MeanAveragePrecisionEvaluatorForMultipleIOUs', 'TopKAccuracyEvaluator', 'ThresholdAccuracyEvaluator', "AveragePrecisionEvaluator", "EceLossEvaluator"]
|
|
@ -0,0 +1,339 @@
|
|||
import collections
|
||||
import statistics
|
||||
import sklearn.metrics
|
||||
import numpy as np
|
||||
from abc import ABC
|
||||
|
||||
|
||||
def _top_k_prediction_indices(prediction, k):
|
||||
top_k_preds = np.argsort(-prediction, axis=1)[:, :k]
|
||||
return top_k_preds
|
||||
|
||||
|
||||
def _targets_to_mat(targets, n_class):
|
||||
if len(targets.shape) == 1:
|
||||
target_mat = np.zeros((len(targets), n_class), dtype=int)
|
||||
for i, t in enumerate(targets):
|
||||
target_mat[i, t] = 1
|
||||
else:
|
||||
target_mat = targets
|
||||
|
||||
return target_mat
|
||||
|
||||
|
||||
class Evaluator(ABC):
|
||||
"""Class to evaluate model outputs and report the result.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_report(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def add_custom_field(self, name, value):
|
||||
self.custom_fields[name] = str(value)
|
||||
|
||||
def reset(self):
|
||||
self.custom_fields = {}
|
||||
|
||||
|
||||
class TopKAccuracyEvaluator(Evaluator):
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
super(TopKAccuracyEvaluator, self).__init__()
|
||||
|
||||
def reset(self):
|
||||
super(TopKAccuracyEvaluator, self).reset()
|
||||
self.total_num = 0
|
||||
self.topk_correct_num = 0
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
""" Evaluate a batch of predictions.
|
||||
Args:
|
||||
predictions: the model output numpy array. Shape (N, num_class)
|
||||
targets: the golden truths. Shape (N,)
|
||||
"""
|
||||
assert len(predictions) == len(targets)
|
||||
assert len(targets.shape) == 1
|
||||
|
||||
n_sample = len(predictions)
|
||||
n_class = predictions.shape[1]
|
||||
|
||||
k = min(self.k, n_class)
|
||||
top_k_predictions = _top_k_prediction_indices(predictions, k)
|
||||
self.topk_correct_num += len([1 for sample_idx in range(n_sample) if targets[sample_idx] in top_k_predictions[sample_idx]])
|
||||
|
||||
self.total_num += len(predictions)
|
||||
|
||||
def get_report(self, **kwargs):
|
||||
return {f'top{self.k}_accuracy': float(self.topk_correct_num) / self.total_num if self.total_num else 0.0}
|
||||
|
||||
|
||||
class AveragePrecisionEvaluator(Evaluator, ABC):
|
||||
def reset(self):
|
||||
super(AveragePrecisionEvaluator, self).reset()
|
||||
self.all_targets = np.array([])
|
||||
self.all_predictions = np.array([])
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
target_mat = _targets_to_mat(targets, predictions.shape[1])
|
||||
|
||||
if self.all_predictions.size != 0:
|
||||
self.all_predictions = np.append(self.all_predictions, predictions, axis=0)
|
||||
else:
|
||||
self.all_predictions = np.copy(predictions)
|
||||
|
||||
if self.all_targets.size != 0:
|
||||
self.all_targets = np.append(self.all_targets, target_mat, axis=0)
|
||||
else:
|
||||
self.all_targets = np.copy(target_mat)
|
||||
|
||||
def calculate_average_precision_score(self, average='macro'):
|
||||
"""
|
||||
average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
|
||||
If ``None``, the scores for each class are returned. Otherwise,
|
||||
this determines the type of averaging performed on the data:
|
||||
|
||||
``'micro'``:
|
||||
Calculate metrics globally by considering each element of the label
|
||||
indicator matrix as a label.
|
||||
``'macro'``:
|
||||
Calculate metrics for each label, and find their unweighted
|
||||
mean. This does not take label imbalance into account.
|
||||
``'weighted'``:
|
||||
Calculate metrics for each label, and find their average, weighted
|
||||
by support (the number of true instances for each label).
|
||||
``'samples'``:
|
||||
Calculate metrics for each instance, and find their average.
|
||||
"""
|
||||
assert self.all_targets.size == self.all_predictions.size
|
||||
ap = 0.0
|
||||
if self.all_targets.size > 0:
|
||||
non_empty_idx = np.where(np.invert(np.all(self.all_targets == 0, axis=0)))[0]
|
||||
if non_empty_idx.size != 0:
|
||||
ap = sklearn.metrics.average_precision_score(self.all_targets[:, non_empty_idx], self.all_predictions[:, non_empty_idx], average=average)
|
||||
|
||||
return ap
|
||||
|
||||
def get_report(self, **kwargs):
|
||||
return {'average_precision': self.calculate_average_precision_score(kwargs['average'])}
|
||||
|
||||
|
||||
class EceLossEvaluator(Evaluator):
|
||||
"""
|
||||
Computes the expected calibration error (ECE) given the model confidence and true labels for a set of data points.
|
||||
|
||||
https://arxiv.org/pdf/1706.04599.pdf
|
||||
"""
|
||||
|
||||
def __init__(self, n_bins=15):
|
||||
# Calibration ECE, Divide the probability into nbins
|
||||
self.n_bins = n_bins
|
||||
bins = np.linspace(0, 1, self.n_bins + 1)
|
||||
self.bin_lower_bounds = bins[:-1]
|
||||
self.bin_upper_bounds = bins[1:]
|
||||
super(EceLossEvaluator, self).__init__()
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
""" Evaluate a batch of predictions.
|
||||
Args:
|
||||
predictions: the model output numpy array. Shape (N, num_class)
|
||||
targets: the golden truths. Shape (N,)
|
||||
"""
|
||||
|
||||
# calibration_ece
|
||||
|
||||
self.total_num += len(predictions)
|
||||
|
||||
indices = _top_k_prediction_indices(predictions, 1).flatten()
|
||||
confidence = predictions[np.arange(len(predictions)), indices]
|
||||
correct = (indices == targets)
|
||||
for bin_i in range(self.n_bins):
|
||||
bin_lower_bound, bin_upper_bound = self.bin_lower_bounds[bin_i], self.bin_upper_bounds[bin_i]
|
||||
in_bin = np.logical_and(confidence > bin_lower_bound, confidence <= bin_upper_bound)
|
||||
self.total_correct_in_bin[bin_i] += correct[in_bin].astype(int).sum()
|
||||
self.sum_confidence_in_bin[bin_i] += confidence[in_bin].astype(float).sum()
|
||||
|
||||
def get_report(self, **kwargs):
|
||||
return {'calibration_ece': float(np.sum(np.abs(self.total_correct_in_bin - self.sum_confidence_in_bin)) / self.total_num) if self.total_num else 0.0}
|
||||
|
||||
def reset(self):
|
||||
super(EceLossEvaluator, self).reset()
|
||||
self.total_num = 0
|
||||
self.total_correct_in_bin = np.zeros(self.n_bins)
|
||||
self.sum_confidence_in_bin = np.zeros(self.n_bins)
|
||||
|
||||
|
||||
class ThresholdAccuracyEvaluator(Evaluator):
|
||||
def __init__(self, threshold):
|
||||
super(ThresholdAccuracyEvaluator, self).__init__()
|
||||
self._threshold = threshold
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
""" Evaluate a batch of predictions.
|
||||
Args:
|
||||
predictions: the model output array. Shape (N, num_class)
|
||||
targets: the ground truths. Shape (N, num_class)
|
||||
"""
|
||||
assert len(predictions) == len(targets)
|
||||
|
||||
target_mat = _targets_to_mat(targets, predictions.shape[1])
|
||||
|
||||
prediction_over_thres = predictions > self._threshold
|
||||
num = np.multiply(prediction_over_thres, target_mat).sum(1) # shape (N,)
|
||||
den = (np.add(prediction_over_thres, target_mat) >= 1).sum(1) # shape (N,)
|
||||
den[den == 0] = 1 # To avoid zero-division. If den==0, num should be zero as well.
|
||||
self.correct_num += (num / den).sum()
|
||||
self.total_num += len(predictions)
|
||||
|
||||
def get_report(self, average='macro'):
|
||||
return {f'accuracy_{self._threshold}': float(self.correct_num) / self.total_num if self.total_num else 0.0}
|
||||
|
||||
def reset(self):
|
||||
super(ThresholdAccuracyEvaluator, self).reset()
|
||||
self.correct_num = 0
|
||||
self.total_num = 0
|
||||
|
||||
|
||||
class MeanAveragePrecisionEvaluatorForSingleIOU(Evaluator):
|
||||
def __init__(self, iou=0.5):
|
||||
super(MeanAveragePrecisionEvaluatorForSingleIOU, self).__init__()
|
||||
self.iou = iou
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
""" Evaluate list of image with object detection results using single IOU evaluation.
|
||||
Args:
|
||||
predictions: list of predictions [[[label_idx, probability, L, T, R, B], ...], [...], ...]
|
||||
targets: list of image targets [[[label_idx, L, T, R, B], ...], ...]
|
||||
"""
|
||||
|
||||
assert len(predictions) == len(targets)
|
||||
|
||||
eval_predictions = collections.defaultdict(list)
|
||||
eval_ground_truths = collections.defaultdict(dict)
|
||||
for img_idx, prediction in enumerate(predictions):
|
||||
for bbox in prediction:
|
||||
label = int(bbox[0])
|
||||
eval_predictions[label].append([img_idx, float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), float(bbox[5])])
|
||||
|
||||
for img_idx, target in enumerate(targets):
|
||||
for bbox in target:
|
||||
label = int(bbox[0])
|
||||
if img_idx not in eval_ground_truths[label]:
|
||||
eval_ground_truths[label][img_idx] = []
|
||||
eval_ground_truths[label][img_idx].append([float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4])])
|
||||
|
||||
class_indices = set(list(eval_predictions.keys()) + list(eval_ground_truths.keys()))
|
||||
for class_index in class_indices:
|
||||
is_correct, probabilities = self._evaluate_predictions(eval_ground_truths[class_index], eval_predictions[class_index], self.iou)
|
||||
true_num = sum([len(t) for t in eval_ground_truths[class_index].values()])
|
||||
|
||||
self.is_correct[class_index].extend(is_correct)
|
||||
self.probabilities[class_index].extend(probabilities)
|
||||
self.true_num[class_index] += true_num
|
||||
|
||||
@staticmethod
|
||||
def _calculate_area(rect):
|
||||
w = rect[2] - rect[0] + 1e-5
|
||||
h = rect[3] - rect[1] + 1e-5
|
||||
return float(w * h) if w > 0 and h > 0 else 0.0
|
||||
|
||||
@staticmethod
|
||||
def _calculate_iou(rect0, rect1):
|
||||
rect_intersect = [max(rect0[0], rect1[0]),
|
||||
max(rect0[1], rect1[1]),
|
||||
min(rect0[2], rect1[2]),
|
||||
min(rect0[3], rect1[3])]
|
||||
calc_area = MeanAveragePrecisionEvaluatorForSingleIOU._calculate_area
|
||||
area_intersect = calc_area(rect_intersect)
|
||||
return area_intersect / (calc_area(rect0) + calc_area(rect1) - area_intersect)
|
||||
|
||||
def _is_true_positive(self, prediction, ground_truth, already_detected, iou_threshold):
|
||||
image_id = prediction[0]
|
||||
prediction_rect = prediction[2:6]
|
||||
if image_id not in ground_truth:
|
||||
return False, already_detected
|
||||
|
||||
ious = np.array([self._calculate_iou(prediction_rect, g) for g in ground_truth[image_id]])
|
||||
best_bb = np.argmax(ious)
|
||||
best_iou = ious[best_bb]
|
||||
|
||||
if best_iou < iou_threshold or (image_id, best_bb) in already_detected:
|
||||
return False, already_detected
|
||||
|
||||
already_detected.add((image_id, best_bb))
|
||||
return True, already_detected
|
||||
|
||||
def _evaluate_predictions(self, ground_truths, predictions, iou_threshold):
|
||||
""" Evaluate the correctness of the given predictions.
|
||||
Args:
|
||||
ground_truths: List of ground truths for the class. {image_id: [[left, top, right, bottom], [...]], ...}
|
||||
predictions: List of predictions for the class. [[image_id, probability, left, top, right, bottom], [...], ...]
|
||||
iou_threshold: Minimum IOU threshold to be considered as a same bounding box.
|
||||
"""
|
||||
|
||||
# Sort the predictions by the probability
|
||||
sorted_predictions = sorted(predictions, key=lambda x: -x[1])
|
||||
already_detected = set()
|
||||
is_correct = []
|
||||
for prediction in sorted_predictions:
|
||||
correct, already_detected = self._is_true_positive(prediction, ground_truths, already_detected,
|
||||
iou_threshold)
|
||||
is_correct.append(correct)
|
||||
|
||||
is_correct = np.array(is_correct)
|
||||
probabilities = np.array([p[1] for p in sorted_predictions])
|
||||
|
||||
return is_correct, probabilities
|
||||
|
||||
@staticmethod
|
||||
def _calculate_average_precision(is_correct, probabilities, true_num, average='macro'):
|
||||
if true_num == 0:
|
||||
return 0
|
||||
if not is_correct or not any(is_correct):
|
||||
return 0
|
||||
recall = float(np.sum(is_correct)) / true_num
|
||||
return sklearn.metrics.average_precision_score(is_correct, probabilities, average=average) * recall
|
||||
|
||||
def get_report(self, average='macro'):
|
||||
all_aps = []
|
||||
for class_index in self.is_correct:
|
||||
ap = MeanAveragePrecisionEvaluatorForSingleIOU._calculate_average_precision(self.is_correct[class_index], self.probabilities[class_index], self.true_num[class_index], average)
|
||||
all_aps.append(ap)
|
||||
|
||||
mean_ap = float(statistics.mean(all_aps)) if all_aps else 0.0
|
||||
return {"mAP_{}".format(int(self.iou * 100)): mean_ap}
|
||||
|
||||
def reset(self):
|
||||
self.is_correct = collections.defaultdict(list)
|
||||
self.probabilities = collections.defaultdict(list)
|
||||
self.true_num = collections.defaultdict(int)
|
||||
super(MeanAveragePrecisionEvaluatorForSingleIOU, self).reset()
|
||||
|
||||
|
||||
class MeanAveragePrecisionEvaluatorForMultipleIOUs(Evaluator):
|
||||
DEFAULT_IOU_VALUES = [0.3, 0.5, 0.75, 0.9]
|
||||
|
||||
def __init__(self, ious=DEFAULT_IOU_VALUES):
|
||||
self.evaluators = [MeanAveragePrecisionEvaluatorForSingleIOU(iou)
|
||||
for iou in ious]
|
||||
super(MeanAveragePrecisionEvaluatorForMultipleIOUs, self).__init__()
|
||||
|
||||
def add_predictions(self, predictions, targets):
|
||||
for evaluator in self.evaluators:
|
||||
evaluator.add_predictions(predictions, targets)
|
||||
|
||||
def get_report(self, **kwargs):
|
||||
report = {}
|
||||
for evaluator in self.evaluators:
|
||||
report.update(evaluator.get_report(kwargs['average']))
|
||||
return report
|
||||
|
||||
def reset(self):
|
||||
for evaluator in self.evaluators:
|
||||
evaluator.reset()
|
||||
super(MeanAveragePrecisionEvaluatorForMultipleIOUs, self).reset()
|
Загрузка…
Ссылка в новой задаче