Exif Nested Metadata support for Image Dashboards (#2542)

* exif metadata extension * requirement update for latest pillow attributes * gate update to avoid py 3.7 * python lint fixes * isort lint fixes * test fixes * python lint fixes * modified gate for matplotlib on windows * lint fixes * gate fix * gate fix * reverted gate * test fix based on OS * isort fix * auto lintfix * adding matplotlib install * tweaking matplotlib install * gate cleanup * removed byte decoding * multiple click support * IC test fixes * test fixes * test fixes * test fixes * test fix * test fix * test changes * auto lint fixes * comment fixes * comment fixes * auto lint fixes
2024-03-05 13:53:14 -08:00 · 2024-03-05 13:53:14 -08:00 · e2c7dd0a95
--- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts
+++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeImageClassificationModelDebugging.ts
@ -23,7 +23,7 @@ export const FridgeImageClassificationModelDebugging = {
  modelOverviewData: {
    featureCohortView: {
      firstFeatureToSelect: "mean_pixel_value",
-      multiFeatureCohorts: 3,
+      multiFeatureCohorts: 6,
      secondFeatureToSelect: "Make",
      singleFeatureCohorts: 3
    },
--- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts
+++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeMultilabelModelDebugging.ts
@ -23,7 +23,7 @@ export const FridgeMultilabelModelDebugging = {
  modelOverviewData: {
    featureCohortView: {
      firstFeatureToSelect: "mean_pixel_value",
-      multiFeatureCohorts: 3,
+      multiFeatureCohorts: 6,
      secondFeatureToSelect: "Make",
      singleFeatureCohorts: 3
    },
--- a/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts
+++ b/libs/e2e/src/lib/describer/modelAssessment/datasets/FridgeObjectDetectionModelDebugging.ts
@ -3,7 +3,7 @@

 import { getOS } from "../../../../util/getOS";

-const FeatureCohorts = getOS() === "Linux" ? [2, 3] : 3;
+const FeatureCohorts = getOS() === "Linux" ? [3, 6] : 6;

 export const FridgeObjectDetectionModelDebugging = {
  causalAnalysisData: {
--- a/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts
+++ b/libs/e2e/src/lib/describer/modelAssessment/modelOverview/ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionArePresent.ts
@ -40,7 +40,8 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
    assertNumberOfChartRowsEqual(
      datasetShape,
      selectedFeatures,
-      defaultVisibleChart
+      defaultVisibleChart,
+      isVision
    );
  }
 }
@ -48,20 +49,21 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
 function assertNumberOfChartRowsEqual(
  datasetShape: IModelAssessmentData,
  selectedFeatures: number,
-  chartIdentifier: Locators
+  chartIdentifier: Locators,
+  isVision: boolean
 ): void {
  const featureCohortView = datasetShape.modelOverviewData?.featureCohortView;
  let expectedNumberOfCohorts = featureCohortView?.singleFeatureCohorts;
  if (selectedFeatures > 1) {
    expectedNumberOfCohorts = featureCohortView?.multiFeatureCohorts;
  }
-  console.log(selectedFeatures);
-  console.log(expectedNumberOfCohorts);
  if (Array.isArray(expectedNumberOfCohorts)) {
    cy.get(getChartItems(chartIdentifier))
      .its("length")
      .should("be.gte", expectedNumberOfCohorts[0])
      .and("be.lte", expectedNumberOfCohorts[1]);
+  } else if (isVision) {
+    cy.get(getChartItems(chartIdentifier)).its("length").should("be.gt", 2);
  } else {
    cy.get(getChartItems(chartIdentifier)).should(
      "have.length",
--- a/libs/e2e/src/util/comboBox.ts
+++ b/libs/e2e/src/util/comboBox.ts
@ -38,5 +38,5 @@ export function multiSelectComboBox(
        item
      )}`
    )
-    .click();
+    .click({ multiple: true });
 }
--- a/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py
+++ b/responsibleai_vision/responsibleai_vision/utils/feature_extractors.py
@ -7,15 +7,16 @@ import warnings
 from typing import Optional

 import pandas as pd
-from PIL import Image
+from PIL import ExifTags, Image
 from PIL.ExifTags import TAGS
+from PIL.TiffImagePlugin import IFDRational
 from tqdm import tqdm

 from responsibleai.feature_metadata import FeatureMetadata
 from responsibleai_vision.common.constants import (ExtractedFeatures,
                                                   ImageColumns)
 from responsibleai_vision.utils.image_reader import (
-    get_all_exif_feature_names, get_image_from_path,
+    IFD_CODE_LOOKUP, get_all_exif_feature_names, get_image_from_path,
    get_image_pointer_from_path)

 MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
@ -91,35 +92,48 @@ def extract_features(image_dataset: pd.DataFrame,
    return results, feature_names


-def append_exif_features(image, row_feature_values, feature_names,
-                         blacklisted_tags, feature_metadata):
+def process_data(data,
+                 tag,
+                 feature_names,
+                 feature_metadata,
+                 row_feature_values,
+                 blacklisted_tags):
+    if isinstance(data, IFDRational):
+        data = data.numerator / data.denominator
+    if isinstance(data, (str, int, float)):
+        if tag in feature_names:
+            if tag not in feature_metadata.categorical_features:
+                feature_metadata.categorical_features.append(tag)
+            row_feature_values[feature_names.index(tag)] = data
+        elif tag not in blacklisted_tags:
+            blacklisted_tags.add(tag)
+            warnings.warn(
+                f'Exif tag {tag} could not be found '
+                'in the feature names. Ignoring tag '
+                'from extracted metadata.')
+
+
+def append_exif_features(image,
+                         row_feature_values,
+                         feature_names,
+                         blacklisted_tags,
+                         feature_metadata):
    if isinstance(image, str):
        image_pointer_path = get_image_pointer_from_path(image)
        with Image.open(image_pointer_path) as im:
            exifdata = im.getexif()
            for tag_id in exifdata:
-                # get the tag name, instead of human unreadable tag id
-                tag = str(TAGS.get(tag_id, tag_id))
-                data = exifdata.get(tag_id)
-                # decode bytes
-                if isinstance(data, bytes):
-                    data = data.decode()
-                    if len(data) > MAX_CUSTOM_LEN:
-                        data = data[:MAX_CUSTOM_LEN] + '...'
-                if isinstance(data, str):
-                    if tag in feature_names:
-                        if tag not in feature_metadata.categorical_features:
-                            feature_metadata.categorical_features.append(tag)
-                        tag_index = feature_names.index(tag)
-                        row_feature_values[tag_index] = data
-                    else:
-                        # in theory this should now never happen with
-                        # latest code, but adding this check for safety
-                        if tag not in blacklisted_tags:
-                            blacklisted_tags.add(tag)
-                            warnings.warn(
-                                f'Exif tag {tag} could not be found '
-                                'in the feature names. Ignoring tag '
-                                'from extracted metadata.')
-                elif isinstance(data, int) or isinstance(data, float):
-                    row_feature_values[feature_names.index(tag)] = data
+                if tag_id in IFD_CODE_LOOKUP:
+                    ifd_data = exifdata.get_ifd(tag_id)
+                    for nested_tag_id, data in ifd_data.items():
+                        tag = ExifTags.GPSTAGS.get(nested_tag_id, None) \
+                            or ExifTags.TAGS.get(nested_tag_id, None) \
+                            or nested_tag_id
+                        process_data(data, tag, feature_names,
+                                     feature_metadata, row_feature_values,
+                                     blacklisted_tags)
+                else:
+                    tag = str(TAGS.get(tag_id, tag_id))
+                    data = exifdata.get(tag_id)
+                    process_data(data, tag, feature_names, feature_metadata,
+                                 row_feature_values, blacklisted_tags)
--- a/responsibleai_vision/responsibleai_vision/utils/image_reader.py
+++ b/responsibleai_vision/responsibleai_vision/utils/image_reader.py
@ -10,7 +10,7 @@ from urllib.parse import urlparse

 import requests
 from numpy import asarray
-from PIL import Image
+from PIL import ExifTags, Image
 from PIL.ExifTags import TAGS
 from requests.adapters import HTTPAdapter, Retry

@ -20,6 +20,8 @@ from responsibleai_vision.common.constants import (AutoMLImagesModelIdentifier,
 # domain mapped session for reuse
 _requests_sessions = {}

+IFD_CODE_LOOKUP = {t.value: t.name for t in ExifTags.IFD}
+

 def _get_retry_session(url):
    domain = urlparse(url.lower()).netloc
@ -88,15 +90,22 @@ def get_all_exif_feature_names(image_dataset):
            with Image.open(image_pointer_path) as im:
                exifdata = im.getexif()
                for tag_id in exifdata:
-                    # get the tag name, instead of human unreadable tag id
-                    tag = TAGS.get(tag_id, tag_id)
-                    if tag not in image_dataset.columns:
-                        data = exifdata.get(tag_id)
-                        if isinstance(data, str) or \
-                           isinstance(data, int) or \
-                           isinstance(data, float) or \
-                           isinstance(data, bytes):
+                    # nesting for IFD block tags
+                    if tag_id in IFD_CODE_LOOKUP:
+                        ifd_data = exifdata.get_ifd(tag_id)
+
+                        for nested_tag_id in ifd_data:
+                            nested_tag = ExifTags.GPSTAGS.get(nested_tag_id,
+                                                              None) \
+                                or ExifTags.TAGS.get(nested_tag_id, None) \
+                                or nested_tag_id
+                            exif_feature_names.add(nested_tag)
+                    else:
+                        # get the tag name, instead of human unreadable tag id
+                        tag = TAGS.get(tag_id, tag_id)
+                        if tag not in image_dataset.columns:
                            exif_feature_names.add(tag)
+
    return list(exif_feature_names)


--- a/responsibleai_vision/tests/test_feature_extractors.py
+++ b/responsibleai_vision/tests/test_feature_extractors.py
@ -13,10 +13,22 @@ from responsibleai_vision.utils.feature_extractors import extract_features

 MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
 FRIDGE_METADATA_FEATURES = [
-    'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model',
-    'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning',
-    'Software', 'Orientation'
-]
+    'SensingMethod', 'GPSVersionID', 'ISOSpeedRatings', 'SceneType',
+    'SceneCaptureType', 'SubjectDistance', 'CustomRendered',
+    'SubjectDistanceRange', 'DigitalZoomRatio', 'ApertureValue',
+    'ImageWidth', 'GPSDOP', 'MaxApertureValue', 'ColorSpace',
+    'FocalLengthIn35mmFilm', 'ExposureMode', 'Saturation', 'ExposureTime',
+    'ExifImageHeight', 'FNumber', 'YCbCrPositioning', 'Make', 'MeteringMode',
+    'ExposureBiasValue', 'ExposureProgram', 'ComponentsConfiguration',
+    'ExifImageWidth', 'ExifInteroperabilityOffset', 'BrightnessValue',
+    'ImageLength', 'FlashPixVersion', 'SubsecTimeOriginal', 'Model',
+    'SubsecTimeDigitized', 'ResolutionUnit', 'DateTimeOriginal', 'XResolution',
+    'FocalLength', 'Sharpness', 'GPSLongitude', 'Contrast', 'Software',
+    'GPSLatitude', 'MakerNote', 'GPSDateStamp', 'GPSAltitude',
+    'GPSProcessingMethod', 'GPSTimeStamp', 'GPSLatitudeRef', 'WhiteBalance',
+    'GPSLongitudeRef', 'Flash', 'SubsecTime', 'YResolution',
+    'DateTimeDigitized', 'DateTime', 'GPSAltitudeRef', 'Orientation',
+    'ShutterSpeedValue', 'ExifVersion']


 def validate_extracted_features(extracted_features, feature_names,
--- a/responsibleai_vision/tests/test_image_utils.py
+++ b/responsibleai_vision/tests/test_image_utils.py
@ -96,8 +96,8 @@ class TestImageUtils(object):
    def test_get_all_exif_feature_names(self):
        image_dataset = load_fridge_object_detection_dataset().head(2)
        exif_feature_names = get_all_exif_feature_names(image_dataset)
-        assert len(exif_feature_names) == 10 if platform.system() == "Linux" \
-            else 11
+        num_features = 49 if platform.system() == "Linux" else 60
+        assert len(exif_feature_names) == num_features

    def test_generate_od_error_labels(self):
        true_y = np.array([[[3, 142, 257, 395, 463, 0]],