Adding coco functionality (#523)

2020-03-24 22:21:58 +00:00 · 2020-03-24 22:21:58 +00:00 · 198b985581
--- a/environment.yml
+++ b/environment.yml
@ -36,6 +36,7 @@ dependencies:
 - pre-commit>=1.14.4
 - pyyaml>=5.1.2
 - requests>=2.22.0
+- cytoolz
 - pip:
  - nvidia-ml-py3
  - nteract-scrapbook
--- a/scenarios/detection/01_training_introduction.ipynb
+++ b/scenarios/detection/01_training_introduction.ipynb
@ -239,6 +239,14 @@
    "You'll notice that inside the annotation xml file, we can see which image the file references `<path>`, the number of `<objects>` in the image, that the image is of (`<name>`) and the bounding box of that object (`<bndbox>`)."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### COCO format \n",
+    "A second popular annotation standard uses the COCO format introduced by the [COCO](http://cocodataset.org) challenge. Annotations in this format need to be converted first to Pascal VOC syntax in order to run this notebook. The function `coco2voc` does exactly that. See the notebook [04_coco_accuracy_vs_speed.ipynb](04_coco_accuracy_vs_speed.ipynb) for a code example."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
--- a/scenarios/detection/04_coco_accuracy_vs_speed.ipynb
+++ b/scenarios/detection/04_coco_accuracy_vs_speed.ipynb
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -20,6 +20,8 @@ from fastai.vision import cnn_learner, DatasetType, models
 from fastai.vision.data import ImageList, imagenet_stats
 from typing import List, Tuple
 from tempfile import TemporaryDirectory
+
+from .resources import coco_sample
 from utils_cv.common.data import unzip_url
 from utils_cv.common.gpu import db_num_workers
 from utils_cv.classification.data import Urls as ic_urls
@ -148,6 +150,7 @@ def detection_notebooks():
        "01": os.path.join(folder_notebooks, "01_training_introduction.ipynb"),
        "02": os.path.join(folder_notebooks, "02_mask_rcnn.ipynb"),
        "03": os.path.join(folder_notebooks, "03_keypoint_rcnn.ipynb"),
+        "04": os.path.join(folder_notebooks, "04_coco_accuracy_vs_speed.ipynb"),
        "11": os.path.join(
            folder_notebooks, "11_exploring_hyperparameters_on_azureml.ipynb"
        ),
@ -684,6 +687,14 @@ def od_detections(od_detection_dataset):
    return learner.predict_dl(od_detection_dataset.test_dl, threshold=0)


+@pytest.fixture(scope="session")
+def coco_sample_path(tmpdir_factory) -> str:
+    """ Returns the path to a coco-formatted annotation. """
+    path = tmpdir_factory.mktemp("data").join("coco_sample.json")
+    path.write_text(coco_sample, encoding=None)
+    return path
+
+
 # ----- AML Settings ----------------------------------------------------------


--- a/tests/resources.py
+++ b/tests/resources.py
@ -0,0 +1,131 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+# NOTE: This file is used by pytest to inject fixtures automatically. As it is explained in the documentation
+# https://docs.pytest.org/en/latest/fixture.html:
+# "If during implementing your tests you realize that you want to use a fixture function from multiple test files
+# you can move it to a conftest.py file. You don't need to import the fixture you want to use in a test, it
+# automatically gets discovered by pytest."
+
+coco_sample = """
+{
+    "info": {
+        "year": "2020",
+        "version": "1",
+        "description": "",
+        "contributor": "",
+        "url": "https://url",
+        "date_created": "2020-01-01T00:00:00+00:00"
+    },
+    "licenses": [
+        {
+            "id": 1,
+            "url": "",
+            "name": "name"
+        }
+    ],
+    "categories": [
+        {
+            "id": 0,
+            "name": "bottle",
+            "supercategory": "none"
+        },
+        {
+            "id": 1,
+            "name": "can",
+            "supercategory": "cells"
+        },
+        {
+            "id": 2,
+            "name": "carton",
+            "supercategory": "cells"
+        }
+    ],
+    "images": [
+        {
+            "id": 0,
+            "license": 1,
+            "file_name": "image0.jpg",
+            "height": 480,
+            "width": 640,
+            "date_captured": "2020-01-01T00:00:00+00:00",
+			"coco_url": "http://image0.jpg"
+        },
+        {
+            "id": 1,
+            "license": 1,
+            "file_name": "image1.jpg",
+            "height": 480,
+            "width": 640,
+            "date_captured": "2020-01-01T00:00:00+00:00",
+			"url": "http://image1.jpg"
+        },
+        {
+            "id": 2,
+            "license": 1,
+            "file_name": "image2.jpg",
+            "height": 480,
+            "width": 640,
+            "date_captured": "2020-01-01T00:00:00+00:00"
+        }
+    ],
+    "annotations": [
+        {
+            "id": 0,
+            "image_id": 0,
+            "category_id": 2,
+            "bbox": [
+                100,
+                200,
+                300,
+                400
+            ],
+            "area": 10000,
+            "segmentation": [],
+            "iscrowd": 0
+        },
+        {
+            "id": 1,
+            "image_id": 0,
+            "category_id": 1,
+            "bbox": [
+                100,
+                200,
+                300,
+                400
+            ],
+            "area": 10000,
+            "segmentation": [],
+            "iscrowd": 0
+        },
+        {
+            "id": 2,
+            "image_id": 1,
+            "category_id": 1,
+            "bbox": [
+                100,
+                200,
+                300,
+                400
+            ],
+            "area": 10000,
+            "segmentation": [],
+            "iscrowd": 0
+        },
+        {
+            "id": 3,
+            "image_id": 2,
+            "category_id": 1,
+            "bbox": [
+                100,
+                200,
+                300,
+                400
+            ],
+            "area": 10000,
+            "segmentation": [],
+            "iscrowd": 0
+        }
+    ]
+}
+"""
--- a/tests/unit/detection/test_detection_data.py
+++ b/tests/unit/detection/test_detection_data.py
@ -3,6 +3,7 @@

 import hashlib
 import numpy as np
+import os
 import pytest
 import requests

@ -12,12 +13,50 @@ import xml.etree.ElementTree as ET

 from utils_cv.detection.data import (
    coco_labels,
+    coco2voc,
    Urls,
    extract_keypoints_from_labelbox_json,
    extract_masks_from_labelbox_json,
 )


+def test_urls():
+    # Test if all urls are valid
+    all_urls = Urls.all()
+    for url in all_urls:
+        with requests.get(url):
+            pass
+
+
+def test_coco_labels():
+    # Compare first five labels for quick check
+    COCO_LABELS_FIRST_FIVE = (
+        "__background__",
+        "person",
+        "bicycle",
+        "car",
+        "motorcycle",
+    )
+
+    labels = coco_labels()
+    for i in range(5):
+        assert labels[i] == COCO_LABELS_FIRST_FIVE[i]
+
+    # Check total number of labels
+    assert len(labels) == 91
+
+
+def test_coco2voc(coco_sample_path):
+    output_dir = "coco2voc_output"
+    coco2voc(
+        anno_path = coco_sample_path,
+        output_dir = output_dir,
+        download_images = False
+    )
+    filenames = os.listdir(os.path.join(output_dir, "annotations"))
+    assert len(filenames) == 3
+
+
@pytest.fixture(scope="session")
 def labelbox_export_data(tmp_session):
    tmp_session = Path(tmp_session)
@ -146,32 +185,6 @@ def labelbox_export_data(tmp_session):
    return data_dir, mask_json_path, keypoint_json_path, keypoint_truth_dict


-def test_urls():
-    # Test if all urls are valid
-    all_urls = Urls.all()
-    for url in all_urls:
-        with requests.get(url):
-            pass
-
-
-def test_coco_labels():
-    # Compare first five labels for quick check
-    COCO_LABELS_FIRST_FIVE = (
-        "__background__",
-        "person",
-        "bicycle",
-        "car",
-        "motorcycle",
-    )
-
-    labels = coco_labels()
-    for i in range(5):
-        assert labels[i] == COCO_LABELS_FIRST_FIVE[i]
-
-    # Check total number of labels
-    assert len(labels) == 91
-
-
 def test_extract_keypoints_from_labelbox_json(
    labelbox_export_data, tmp_session
 ):
--- a/tests/unit/detection/test_detection_notebooks.py
+++ b/tests/unit/detection/test_detection_notebooks.py
@ -112,6 +112,26 @@ def test_03_notebook_run(
    )


+@pytest.mark.gpu
+@pytest.mark.notebooks
+def test_04_notebook_run(detection_notebooks, tiny_od_data_path):
+    notebook_path = detection_notebooks["04"]
+    pm.execute_notebook(
+        notebook_path,
+        OUTPUT_NOTEBOOK,
+        parameters=dict(
+            PM_VERSION=pm.__version__,
+            DATA_PATH=tiny_od_data_path,
+            LABELS=["can", "carton", "milk_bottle", "water_bottle"]*21 #coco model was pre-trained on 80 classes 
+        ),
+        kernel_name=KERNEL_NAME,
+    )
+
+    nb_output = sb.read_notebook(OUTPUT_NOTEBOOK)
+    assert len(nb_output.scraps["aps"].data) == 2
+    assert nb_output.scraps["num_test_images"].data == 38
+
+
@pytest.mark.gpu
@pytest.mark.notebooks
 def test_12_notebook_run(
--- a/utils_cv/detection/data.py
+++ b/utils_cv/detection/data.py
@ -12,6 +12,7 @@ import shutil
 import urllib.request
 import xml.etree.ElementTree as ET

+from .references.anno_coco2voc import coco2voc_main

 class Urls:
    # for now hardcoding base url into Urls class
@ -158,6 +159,24 @@ def coco_labels() -> List[str]:
    ]


+def coco2voc(
+    anno_path: str,
+    output_dir: str,
+    anno_type: str = "instance",
+    download_images: bool = False
+) -> None:
+    """ Convert COCO annotation (single .json file) to Pascal VOC annotations
+        (multiple .xml files).
+
+    Args:
+        anno_path: path to coco-formated .json annotation file
+        output_dir: root output directory
+        anno_type: "instance" for rectangle annotation, or "keypoint" for keypoint annotation.
+        download_images: if true then download images from their urls.
+    """
+    coco2voc_main(anno_path, output_dir, anno_type, download_images)
+
+
 def extract_masks_from_labelbox_json(
    labelbox_json_path: Union[str, Path],
    data_dir: Union[str, Path],
--- a/utils_cv/detection/dataset.py
+++ b/utils_cv/detection/dataset.py
@ -233,6 +233,7 @@ class DetectionDataset:
        seed: int = None,
        allow_negatives: bool = False,
        labels: List[str] = None,
+        max_num_images = None,
    ):
        """ initialize dataset

@ -270,6 +271,7 @@ class DetectionDataset:
        self.seed = seed
        self.keypoint_meta = keypoint_meta
        self.labels = labels
+        self.max_num_images = max_num_images

        # read annotations
        self._read_annos()
@ -305,6 +307,7 @@ class DetectionDataset:
        # it's assumed that the annotation filenames end with .xml.
        # If im_dir is not provided, then the image paths are read from inside
        # the .xml annotations.
+        im_paths = None
        if self.im_dir is None:
            anno_filenames = sorted(os.listdir(self.root / self.anno_dir))
        else:
@ -316,6 +319,13 @@ class DetectionDataset:
                os.path.splitext(s)[0] + ".xml" for s in im_filenames
            ]

+        # Reduce number of images if max_num_images is set
+        if self.max_num_images and len(anno_filenames) > self.max_num_images:
+            indices = np.unique(np.floor(np.linspace(0, len(anno_filenames)-1, self.max_num_images)).astype(int))
+            anno_filenames = [anno_filenames[i] for i in indices]
+            if im_paths:
+                im_paths = [im_paths[i] for i in indices]
+
        # Read all annotations
        self.im_paths = []
        self.anno_paths = []
--- a/utils_cv/detection/model.py
+++ b/utils_cv/detection/model.py
@ -27,6 +27,7 @@ import matplotlib.pyplot as plt

 from .references.engine import train_one_epoch, evaluate
 from .references.coco_eval import CocoEvaluator
+from .references.pycocotools_cocoeval import compute_ap
 from .bbox import bboxes_iou, DetectionBbox
 from ..common.gpu import torch_device

@ -256,36 +257,24 @@ def get_pretrained_keypointrcnn(


 def _calculate_ap(
-    e: CocoEvaluator, iou_threshold_idx: Union[int, slice] = slice(0, None)
+    e: CocoEvaluator, 
+    iou_thres: float = None,
+    area_range: str ='all',
+    max_detections: int = 100,
+    mode: int = 1,
 ) -> Dict[str, float]:
-    """ Calculate the Average Precision (AP) by averaging all iou
-    thresholds across all labels.
+    """ Calculate the average precision/recall for differnt IoU ranges.

-    coco_eval.eval['precision'] is a 5-dimensional array. Each dimension
-    represents the following:
-    1. [T] 10 evenly distributed thresholds for IoU, from 0.5 to 0.95. By
-    default, we use slice(0, None) which is the average from 0.5 to 0.95.
-    2. [R] 101 recall thresholds, from 0 to 101
-    3. [K] label, set to slice(0, None) to get precision over all the labels in
-    the dataset. Then take the mean over all labels.
-    4. [A] area size range of the target (all-0, small-1, medium-2, large-3)
-    5. [M] The maximum number of detection frames in a single image where index
-    0 represents max_det=1, 1 represents max_det=10, 2 represents max_det=100
-
-    Therefore, coco_eval.eval['precision'][0, :, 0, 0, 2] represents the value
-    of 101 precisions corresponding to 101 recalls from 0 to 100 when IoU=0.5.
+    Args:
+        iou_thres: IoU threshold (options: value in [0.5, 0.55, 0.6, ..., 0.95] or None to average over that range)
+        area_range: area size range of the target (options: ['all', 'small', 'medium', 'large'])
+        max_detections: maximum number of detection frames in a single image (options: [1, 10, 100])
+        mode: set to 1 for average precision and otherwise returns average recall
    """
-    precision_settings = (
-        iou_threshold_idx,
-        slice(0, None),
-        slice(0, None),
-        0,
-        2,
-    )
-    ap = {
-        k: np.mean(np.mean(v.eval["precision"][precision_settings]))
-        for k, v in e.coco_eval.items()
-    }
+    ap = {}
+    for key in e.coco_eval:
+        ap[key] = compute_ap(e.coco_eval[key], iouThr=iou_thres, areaRng=area_range, maxDets=max_detections, ap=mode)
+
    return ap


@ -554,7 +543,7 @@ class DetectionLearner:
                e = self.evaluate(dl=self.dataset.test_dl)
                self.ap.append(_calculate_ap(e))
                self.ap_iou_point_5.append(
-                    _calculate_ap(e, iou_threshold_idx=0)
+                    _calculate_ap(e)
                )

    def plot_precision_loss_curves(
--- a/utils_cv/detection/references/anno_coco2voc.py
+++ b/utils_cv/detection/references/anno_coco2voc.py
@ -0,0 +1,214 @@
+# Code copied and slightly modified from:
+# https://github.com/CasiaFan/Dataset_to_VOC_converter/blob/master/anno_coco2voc.py
+#
+# Most modifications are hlighlighted by the keyword "EDITED".
+
+
+import argparse, json
+import cytoolz
+from lxml import etree, objectify
+import os, re
+import urllib.request, pdb
+from urllib.parse import urlparse
+
+
+def instance2xml_base(anno, download_images):
+    # EDITED - make coco_url optional since only used when downloading the images
+    if 'coco_url' not in anno:
+        if 'url' in anno:
+            anno['coco_url'] = anno['url']
+        elif not download_images:
+            anno['coco_url'] = "not used anywhere in code"
+        else:
+            raise Exception("Annotation has to contain a 'url' or 'coco_url' field to download the image.")
+
+    E = objectify.ElementMaker(annotate=False)
+    anno_tree = E.annotation(
+        E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
+        E.filename(anno['file_name']),
+        E.source(
+            E.database('MS COCO 2014'),
+            E.annotation('MS COCO 2014'),
+            E.image('Flickr'),
+            E.url(anno['coco_url'])
+        ),
+        E.size(
+            E.width(anno['width']),
+            E.height(anno['height']),
+            E.depth(3)
+        ),
+        E.segmented(0),
+    )
+    return anno_tree
+
+
+def instance2xml_bbox(anno, bbox_type='xyxy'):
+    """bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
+    assert bbox_type in ['xyxy', 'xywh']
+    if bbox_type == 'xyxy':
+        xmin, ymin, w, h = anno['bbox']
+        xmax = xmin+w
+        ymax = ymin+h
+    else:
+        xmin, ymin, xmax, ymax = anno['bbox']
+    E = objectify.ElementMaker(annotate=False)
+    anno_tree = E.object(
+        E.name(anno['category_id']),
+        E.bndbox(
+            E.xmin(xmin),
+            E.ymin(ymin),
+            E.xmax(xmax),
+            E.ymax(ymax)
+        ),
+        E.difficult(anno['iscrowd'])
+    )
+    return anno_tree
+
+
+def parse_instance(content, outdir, download_images = False):
+    categories = {d['id']: d['name'] for d in content['categories']}
+
+    # EDITED - make sure image_id is of type int (and not of type string)
+    for i in range(len(content['annotations'])):
+        content['annotations'][i]['image_id'] = int(content['annotations'][i]['image_id'])
+
+    # EDITED - save all annotation .xml files into same sub-directory
+    anno_dir = os.path.join(outdir, "annotations")
+    if not os.path.exists(anno_dir):
+        os.makedirs(anno_dir)
+
+    # EDITED - download images
+    if download_images:
+        im_dir = os.path.join(outdir, "images")
+        if not os.path.exists(im_dir):
+            os.makedirs(im_dir)
+
+        for index, obj in enumerate(content['images']):
+            print(f"Downloading image {index} of {len(content['images'])} from: {obj['coco_url']}")
+
+            # Update 'filename' field to be a (local) filename and not a url
+            im_local_filename = os.path.splitext(os.path.basename(obj['file_name']))[0] + ".jpg"
+            obj['file_name'] = im_local_filename
+
+            # download image
+            dst_path = os.path.join(im_dir, im_local_filename)
+            urllib.request.urlretrieve(obj['coco_url'], dst_path)
+
+    # merge images and annotations: id in images vs image_id in annotations
+    merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
+    
+    # convert category id to name
+    for instance in merged_info_list:
+        assert 'category_id' in instance, f"WARNING: annotation error: image {instance['file_name']} has a rectangle without a 'category_id' field."
+        instance['category_id'] = categories[instance['category_id']]
+
+    # group by filename to pool all bbox in same file
+    img_filenames = {}
+    names_groups = cytoolz.groupby('file_name', merged_info_list).items()
+    for index, (name, groups) in enumerate(names_groups):
+        print(f"Converting annotations for image {index} of {len(names_groups)}: {name}")
+        assert not name.lower().startswith(("http:","https:")), "Image seems to be a url rather than local. Need to set 'download_images' = False"
+
+        anno_tree = instance2xml_base(groups[0], download_images)
+        # if one file have multiple different objects, save it in each category sub-directory
+        filenames = []
+        for group in groups:
+            filename = os.path.splitext(name)[0] + ".xml"
+
+            # EDITED - save all annotations in single folder, rather than separate folders for each object 
+            #filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), filename)) 
+            filenames.append(os.path.join(anno_dir, filename))
+
+            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
+
+        for filename in filenames:
+            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
+
+
+def keypoints2xml_base(anno):
+    annotation = etree.Element("annotation")
+    etree.SubElement(annotation, "folder").text = "VOC2014_keypoints"
+    etree.SubElement(annotation, "filename").text = anno['file_name']
+    source = etree.SubElement(annotation, "source")
+    etree.SubElement(source, "database").text = "MS COCO 2014"
+    etree.SubElement(source, "annotation").text = "MS COCO 2014"
+    etree.SubElement(source, "image").text = "Flickr"
+    etree.SubElement(source, "url").text = anno['coco_url']
+    size = etree.SubElement(annotation, "size")
+    etree.SubElement(size, "width").text = str(anno["width"])
+    etree.SubElement(size, "height").text = str(anno["height"])
+    etree.SubElement(size, "depth").text = '3'
+    etree.SubElement(annotation, "segmented").text = '0'
+    return annotation
+
+
+def keypoints2xml_object(anno, xmltree, keypoints_dict, bbox_type='xyxy'):
+    assert bbox_type in ['xyxy', 'xywh']
+    if bbox_type == 'xyxy':
+        xmin, ymin, w, h = anno['bbox']
+        xmax = xmin+w
+        ymax = ymin+h
+    else:
+        xmin, ymin, xmax, ymax = anno['bbox']
+    key_object = etree.SubElement(xmltree, "object")
+    etree.SubElement(key_object, "name").text = anno['category_id']
+    bndbox = etree.SubElement(key_object, "bndbox")
+    etree.SubElement(bndbox, "xmin").text = str(xmin)
+    etree.SubElement(bndbox, "ymin").text = str(ymin)
+    etree.SubElement(bndbox, "xmax").text = str(xmax)
+    etree.SubElement(bndbox, "ymax").text = str(ymax)
+    etree.SubElement(key_object, "difficult").text = '0'
+    keypoints = etree.SubElement(key_object, "keypoints")
+    for i in range(0, len(keypoints_dict)):
+        keypoint = etree.SubElement(keypoints, keypoints_dict[i+1])
+        etree.SubElement(keypoint, "x").text = str(anno['keypoints'][i*3])
+        etree.SubElement(keypoint, "y").text = str(anno['keypoints'][i*3+1])
+        etree.SubElement(keypoint, "v").text = str(anno['keypoints'][i*3+2])
+    return xmltree
+
+
+def parse_keypoints(content, outdir):
+    keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
+    # merge images and annotations: id in images vs image_id in annotations
+    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
+    # convert category name to person
+    for keypoint in merged_info_list:
+        keypoint['category_id'] = "person"
+    # group by filename to pool all bbox and keypoint in same file
+    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
+        filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
+        anno_tree = keypoints2xml_base(groups[0])
+        for group in groups:
+            anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
+        doc = etree.ElementTree(anno_tree)
+        doc.write(open(filename, "w"), pretty_print=True)
+        print("Formating keypoints xml file {} done!".format(name))
+
+
+def coco2voc_main(anno_file, output_dir, anno_type, download_images = False):
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    content = json.load(open(anno_file, 'r'))
+    
+    if anno_type == 'instance':
+        # EDITED - save all annotations in single folder, rather than separate folders for each object 
+        # make subdirectories
+        # sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]   #EDITED
+        # for sub_dir in sub_dirs:
+        #     sub_dir = os.path.join(output_dir, str(sub_dir))
+        #     if not os.path.exists(sub_dir):
+        #         os.makedirs(sub_dir)
+        parse_instance(content, output_dir, download_images)
+    elif anno_type == 'keypoint':
+        parse_keypoints(content, output_dir)
+    else:
+        error
+    
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--anno_file", help="annotation file for object instance/keypoint")
+    parser.add_argument("--type", type=str, help="object instance or keypoint", choices=['instance', 'keypoint'])
+    parser.add_argument("--output_dir", help="output directory for voc annotation xml file")
+    args = parser.parse_args()
+    main(args.anno_file, args.output_dir, args.type)
--- a/utils_cv/detection/references/pycocotools_cocoeval.py
+++ b/utils_cv/detection/references/pycocotools_cocoeval.py
@ -0,0 +1,41 @@
+# Code copied and slighly re-arranged from:
+# https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L422
+
+import numpy as np
+from pycocotools.cocoeval import Params
+
+
+def compute_ap(v, ap=1, iouThr=None, areaRng='all', maxDets=100, verbose = False):
+    p = Params()
+    aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+    mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+    if ap == 1:
+        # dimension of precision: [TxRxKxAxM]
+        s = v.eval['precision']
+        # IoU
+        if iouThr is not None:
+            t = np.where(iouThr == p.iouThrs)[0]
+            s = s[t]
+        s = s[:,:,:,aind,mind]
+    else:
+        # dimension of recall: [TxKxAxM]
+        s = self.eval['recall']
+        if iouThr is not None:
+            t = np.where(iouThr == p.iouThrs)[0]
+            s = s[t]
+        s = s[:,:,aind,mind]
+    if len(s[s>-1])==0:
+        mean_s = -1
+    else:
+        mean_s = np.mean(s[s>-1])
+
+    if verbose:
+        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
+        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+        typeStr = '(AP)' if ap==1 else '(AR)'
+        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
+            if iouThr is None else '{:0.2f}'.format(iouThr)
+        resStr = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
+        print(resStr)
+    
+    return mean_s