Exif Nested Metadata support for Image Dashboards (#2542)

* exif metadata extension

* requirement update for latest pillow attributes

* gate update to avoid py 3.7

* python lint fixes

* isort lint fixes

* test fixes

* python lint fixes

* modified gate for matplotlib on windows

* lint fixes

* gate fix

* gate fix

* reverted gate

* test fix based on OS

* isort fix

* auto lintfix

* adding matplotlib install

* tweaking matplotlib install

* gate cleanup

* removed byte decoding

* multiple click support

* IC test fixes

* test fixes

* test fixes

* test fixes

* test fix

* test fix

* test changes

* auto lint fixes

* comment fixes

* comment fixes

* auto lint fixes
This commit is contained in:
Advitya Gemawat 2024-03-05 13:53:14 -08:00 коммит произвёл GitHub
Родитель 7292ba3f6c
Коммит e2c7dd0a95
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
9 изменённых файлов: 89 добавлений и 52 удалений

Просмотреть файл

@ -23,7 +23,7 @@ export const FridgeImageClassificationModelDebugging = {
modelOverviewData: {
featureCohortView: {
firstFeatureToSelect: "mean_pixel_value",
multiFeatureCohorts: 3,
multiFeatureCohorts: 6,
secondFeatureToSelect: "Make",
singleFeatureCohorts: 3
},

Просмотреть файл

@ -23,7 +23,7 @@ export const FridgeMultilabelModelDebugging = {
modelOverviewData: {
featureCohortView: {
firstFeatureToSelect: "mean_pixel_value",
multiFeatureCohorts: 3,
multiFeatureCohorts: 6,
secondFeatureToSelect: "Make",
singleFeatureCohorts: 3
},

Просмотреть файл

@ -3,7 +3,7 @@
import { getOS } from "../../../../util/getOS";
const FeatureCohorts = getOS() === "Linux" ? [2, 3] : 3;
const FeatureCohorts = getOS() === "Linux" ? [3, 6] : 6;
export const FridgeObjectDetectionModelDebugging = {
causalAnalysisData: {

Просмотреть файл

@ -40,7 +40,8 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
assertNumberOfChartRowsEqual(
datasetShape,
selectedFeatures,
defaultVisibleChart
defaultVisibleChart,
isVision
);
}
}
@ -48,20 +49,21 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
function assertNumberOfChartRowsEqual(
datasetShape: IModelAssessmentData,
selectedFeatures: number,
chartIdentifier: Locators
chartIdentifier: Locators,
isVision: boolean
): void {
const featureCohortView = datasetShape.modelOverviewData?.featureCohortView;
let expectedNumberOfCohorts = featureCohortView?.singleFeatureCohorts;
if (selectedFeatures > 1) {
expectedNumberOfCohorts = featureCohortView?.multiFeatureCohorts;
}
console.log(selectedFeatures);
console.log(expectedNumberOfCohorts);
if (Array.isArray(expectedNumberOfCohorts)) {
cy.get(getChartItems(chartIdentifier))
.its("length")
.should("be.gte", expectedNumberOfCohorts[0])
.and("be.lte", expectedNumberOfCohorts[1]);
} else if (isVision) {
cy.get(getChartItems(chartIdentifier)).its("length").should("be.gt", 2);
} else {
cy.get(getChartItems(chartIdentifier)).should(
"have.length",

Просмотреть файл

@ -38,5 +38,5 @@ export function multiSelectComboBox(
item
)}`
)
.click();
.click({ multiple: true });
}

Просмотреть файл

@ -7,15 +7,16 @@ import warnings
from typing import Optional
import pandas as pd
from PIL import Image
from PIL import ExifTags, Image
from PIL.ExifTags import TAGS
from PIL.TiffImagePlugin import IFDRational
from tqdm import tqdm
from responsibleai.feature_metadata import FeatureMetadata
from responsibleai_vision.common.constants import (ExtractedFeatures,
ImageColumns)
from responsibleai_vision.utils.image_reader import (
get_all_exif_feature_names, get_image_from_path,
IFD_CODE_LOOKUP, get_all_exif_feature_names, get_image_from_path,
get_image_pointer_from_path)
MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
@ -91,35 +92,48 @@ def extract_features(image_dataset: pd.DataFrame,
return results, feature_names
def append_exif_features(image, row_feature_values, feature_names,
blacklisted_tags, feature_metadata):
def process_data(data,
tag,
feature_names,
feature_metadata,
row_feature_values,
blacklisted_tags):
if isinstance(data, IFDRational):
data = data.numerator / data.denominator
if isinstance(data, (str, int, float)):
if tag in feature_names:
if tag not in feature_metadata.categorical_features:
feature_metadata.categorical_features.append(tag)
row_feature_values[feature_names.index(tag)] = data
elif tag not in blacklisted_tags:
blacklisted_tags.add(tag)
warnings.warn(
f'Exif tag {tag} could not be found '
'in the feature names. Ignoring tag '
'from extracted metadata.')
def append_exif_features(image,
row_feature_values,
feature_names,
blacklisted_tags,
feature_metadata):
if isinstance(image, str):
image_pointer_path = get_image_pointer_from_path(image)
with Image.open(image_pointer_path) as im:
exifdata = im.getexif()
for tag_id in exifdata:
# get the tag name, instead of human unreadable tag id
tag = str(TAGS.get(tag_id, tag_id))
data = exifdata.get(tag_id)
# decode bytes
if isinstance(data, bytes):
data = data.decode()
if len(data) > MAX_CUSTOM_LEN:
data = data[:MAX_CUSTOM_LEN] + '...'
if isinstance(data, str):
if tag in feature_names:
if tag not in feature_metadata.categorical_features:
feature_metadata.categorical_features.append(tag)
tag_index = feature_names.index(tag)
row_feature_values[tag_index] = data
else:
# in theory this should now never happen with
# latest code, but adding this check for safety
if tag not in blacklisted_tags:
blacklisted_tags.add(tag)
warnings.warn(
f'Exif tag {tag} could not be found '
'in the feature names. Ignoring tag '
'from extracted metadata.')
elif isinstance(data, int) or isinstance(data, float):
row_feature_values[feature_names.index(tag)] = data
if tag_id in IFD_CODE_LOOKUP:
ifd_data = exifdata.get_ifd(tag_id)
for nested_tag_id, data in ifd_data.items():
tag = ExifTags.GPSTAGS.get(nested_tag_id, None) \
or ExifTags.TAGS.get(nested_tag_id, None) \
or nested_tag_id
process_data(data, tag, feature_names,
feature_metadata, row_feature_values,
blacklisted_tags)
else:
tag = str(TAGS.get(tag_id, tag_id))
data = exifdata.get(tag_id)
process_data(data, tag, feature_names, feature_metadata,
row_feature_values, blacklisted_tags)

Просмотреть файл

@ -10,7 +10,7 @@ from urllib.parse import urlparse
import requests
from numpy import asarray
from PIL import Image
from PIL import ExifTags, Image
from PIL.ExifTags import TAGS
from requests.adapters import HTTPAdapter, Retry
@ -20,6 +20,8 @@ from responsibleai_vision.common.constants import (AutoMLImagesModelIdentifier,
# domain mapped session for reuse
_requests_sessions = {}
IFD_CODE_LOOKUP = {t.value: t.name for t in ExifTags.IFD}
def _get_retry_session(url):
domain = urlparse(url.lower()).netloc
@ -88,15 +90,22 @@ def get_all_exif_feature_names(image_dataset):
with Image.open(image_pointer_path) as im:
exifdata = im.getexif()
for tag_id in exifdata:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
if tag not in image_dataset.columns:
data = exifdata.get(tag_id)
if isinstance(data, str) or \
isinstance(data, int) or \
isinstance(data, float) or \
isinstance(data, bytes):
# nesting for IFD block tags
if tag_id in IFD_CODE_LOOKUP:
ifd_data = exifdata.get_ifd(tag_id)
for nested_tag_id in ifd_data:
nested_tag = ExifTags.GPSTAGS.get(nested_tag_id,
None) \
or ExifTags.TAGS.get(nested_tag_id, None) \
or nested_tag_id
exif_feature_names.add(nested_tag)
else:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
if tag not in image_dataset.columns:
exif_feature_names.add(tag)
return list(exif_feature_names)

Просмотреть файл

@ -13,10 +13,22 @@ from responsibleai_vision.utils.feature_extractors import extract_features
MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
FRIDGE_METADATA_FEATURES = [
'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model',
'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning',
'Software', 'Orientation'
]
'SensingMethod', 'GPSVersionID', 'ISOSpeedRatings', 'SceneType',
'SceneCaptureType', 'SubjectDistance', 'CustomRendered',
'SubjectDistanceRange', 'DigitalZoomRatio', 'ApertureValue',
'ImageWidth', 'GPSDOP', 'MaxApertureValue', 'ColorSpace',
'FocalLengthIn35mmFilm', 'ExposureMode', 'Saturation', 'ExposureTime',
'ExifImageHeight', 'FNumber', 'YCbCrPositioning', 'Make', 'MeteringMode',
'ExposureBiasValue', 'ExposureProgram', 'ComponentsConfiguration',
'ExifImageWidth', 'ExifInteroperabilityOffset', 'BrightnessValue',
'ImageLength', 'FlashPixVersion', 'SubsecTimeOriginal', 'Model',
'SubsecTimeDigitized', 'ResolutionUnit', 'DateTimeOriginal', 'XResolution',
'FocalLength', 'Sharpness', 'GPSLongitude', 'Contrast', 'Software',
'GPSLatitude', 'MakerNote', 'GPSDateStamp', 'GPSAltitude',
'GPSProcessingMethod', 'GPSTimeStamp', 'GPSLatitudeRef', 'WhiteBalance',
'GPSLongitudeRef', 'Flash', 'SubsecTime', 'YResolution',
'DateTimeDigitized', 'DateTime', 'GPSAltitudeRef', 'Orientation',
'ShutterSpeedValue', 'ExifVersion']
def validate_extracted_features(extracted_features, feature_names,

Просмотреть файл

@ -96,8 +96,8 @@ class TestImageUtils(object):
def test_get_all_exif_feature_names(self):
image_dataset = load_fridge_object_detection_dataset().head(2)
exif_feature_names = get_all_exif_feature_names(image_dataset)
assert len(exif_feature_names) == 10 if platform.system() == "Linux" \
else 11
num_features = 49 if platform.system() == "Linux" else 60
assert len(exif_feature_names) == num_features
def test_generate_od_error_labels(self):
true_y = np.array([[[3, 142, 257, 395, 463, 0]],