Exif Nested Metadata support for Image Dashboards (#2542)
* exif metadata extension * requirement update for latest pillow attributes * gate update to avoid py 3.7 * python lint fixes * isort lint fixes * test fixes * python lint fixes * modified gate for matplotlib on windows * lint fixes * gate fix * gate fix * reverted gate * test fix based on OS * isort fix * auto lintfix * adding matplotlib install * tweaking matplotlib install * gate cleanup * removed byte decoding * multiple click support * IC test fixes * test fixes * test fixes * test fixes * test fix * test fix * test changes * auto lint fixes * comment fixes * comment fixes * auto lint fixes
This commit is contained in:
Родитель
7292ba3f6c
Коммит
e2c7dd0a95
|
@ -23,7 +23,7 @@ export const FridgeImageClassificationModelDebugging = {
|
|||
modelOverviewData: {
|
||||
featureCohortView: {
|
||||
firstFeatureToSelect: "mean_pixel_value",
|
||||
multiFeatureCohorts: 3,
|
||||
multiFeatureCohorts: 6,
|
||||
secondFeatureToSelect: "Make",
|
||||
singleFeatureCohorts: 3
|
||||
},
|
||||
|
|
|
@ -23,7 +23,7 @@ export const FridgeMultilabelModelDebugging = {
|
|||
modelOverviewData: {
|
||||
featureCohortView: {
|
||||
firstFeatureToSelect: "mean_pixel_value",
|
||||
multiFeatureCohorts: 3,
|
||||
multiFeatureCohorts: 6,
|
||||
secondFeatureToSelect: "Make",
|
||||
singleFeatureCohorts: 3
|
||||
},
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
import { getOS } from "../../../../util/getOS";
|
||||
|
||||
const FeatureCohorts = getOS() === "Linux" ? [2, 3] : 3;
|
||||
const FeatureCohorts = getOS() === "Linux" ? [3, 6] : 6;
|
||||
|
||||
export const FridgeObjectDetectionModelDebugging = {
|
||||
causalAnalysisData: {
|
||||
|
|
|
@ -40,7 +40,8 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
|
|||
assertNumberOfChartRowsEqual(
|
||||
datasetShape,
|
||||
selectedFeatures,
|
||||
defaultVisibleChart
|
||||
defaultVisibleChart,
|
||||
isVision
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -48,20 +49,21 @@ export function ensureAllModelOverviewFeatureCohortsViewElementsAfterSelectionAr
|
|||
function assertNumberOfChartRowsEqual(
|
||||
datasetShape: IModelAssessmentData,
|
||||
selectedFeatures: number,
|
||||
chartIdentifier: Locators
|
||||
chartIdentifier: Locators,
|
||||
isVision: boolean
|
||||
): void {
|
||||
const featureCohortView = datasetShape.modelOverviewData?.featureCohortView;
|
||||
let expectedNumberOfCohorts = featureCohortView?.singleFeatureCohorts;
|
||||
if (selectedFeatures > 1) {
|
||||
expectedNumberOfCohorts = featureCohortView?.multiFeatureCohorts;
|
||||
}
|
||||
console.log(selectedFeatures);
|
||||
console.log(expectedNumberOfCohorts);
|
||||
if (Array.isArray(expectedNumberOfCohorts)) {
|
||||
cy.get(getChartItems(chartIdentifier))
|
||||
.its("length")
|
||||
.should("be.gte", expectedNumberOfCohorts[0])
|
||||
.and("be.lte", expectedNumberOfCohorts[1]);
|
||||
} else if (isVision) {
|
||||
cy.get(getChartItems(chartIdentifier)).its("length").should("be.gt", 2);
|
||||
} else {
|
||||
cy.get(getChartItems(chartIdentifier)).should(
|
||||
"have.length",
|
||||
|
|
|
@ -38,5 +38,5 @@ export function multiSelectComboBox(
|
|||
item
|
||||
)}`
|
||||
)
|
||||
.click();
|
||||
.click({ multiple: true });
|
||||
}
|
||||
|
|
|
@ -7,15 +7,16 @@ import warnings
|
|||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
from PIL import Image
|
||||
from PIL import ExifTags, Image
|
||||
from PIL.ExifTags import TAGS
|
||||
from PIL.TiffImagePlugin import IFDRational
|
||||
from tqdm import tqdm
|
||||
|
||||
from responsibleai.feature_metadata import FeatureMetadata
|
||||
from responsibleai_vision.common.constants import (ExtractedFeatures,
|
||||
ImageColumns)
|
||||
from responsibleai_vision.utils.image_reader import (
|
||||
get_all_exif_feature_names, get_image_from_path,
|
||||
IFD_CODE_LOOKUP, get_all_exif_feature_names, get_image_from_path,
|
||||
get_image_pointer_from_path)
|
||||
|
||||
MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
|
||||
|
@ -91,35 +92,48 @@ def extract_features(image_dataset: pd.DataFrame,
|
|||
return results, feature_names
|
||||
|
||||
|
||||
def append_exif_features(image, row_feature_values, feature_names,
|
||||
blacklisted_tags, feature_metadata):
|
||||
def process_data(data,
|
||||
tag,
|
||||
feature_names,
|
||||
feature_metadata,
|
||||
row_feature_values,
|
||||
blacklisted_tags):
|
||||
if isinstance(data, IFDRational):
|
||||
data = data.numerator / data.denominator
|
||||
if isinstance(data, (str, int, float)):
|
||||
if tag in feature_names:
|
||||
if tag not in feature_metadata.categorical_features:
|
||||
feature_metadata.categorical_features.append(tag)
|
||||
row_feature_values[feature_names.index(tag)] = data
|
||||
elif tag not in blacklisted_tags:
|
||||
blacklisted_tags.add(tag)
|
||||
warnings.warn(
|
||||
f'Exif tag {tag} could not be found '
|
||||
'in the feature names. Ignoring tag '
|
||||
'from extracted metadata.')
|
||||
|
||||
|
||||
def append_exif_features(image,
|
||||
row_feature_values,
|
||||
feature_names,
|
||||
blacklisted_tags,
|
||||
feature_metadata):
|
||||
if isinstance(image, str):
|
||||
image_pointer_path = get_image_pointer_from_path(image)
|
||||
with Image.open(image_pointer_path) as im:
|
||||
exifdata = im.getexif()
|
||||
for tag_id in exifdata:
|
||||
# get the tag name, instead of human unreadable tag id
|
||||
tag = str(TAGS.get(tag_id, tag_id))
|
||||
data = exifdata.get(tag_id)
|
||||
# decode bytes
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode()
|
||||
if len(data) > MAX_CUSTOM_LEN:
|
||||
data = data[:MAX_CUSTOM_LEN] + '...'
|
||||
if isinstance(data, str):
|
||||
if tag in feature_names:
|
||||
if tag not in feature_metadata.categorical_features:
|
||||
feature_metadata.categorical_features.append(tag)
|
||||
tag_index = feature_names.index(tag)
|
||||
row_feature_values[tag_index] = data
|
||||
else:
|
||||
# in theory this should now never happen with
|
||||
# latest code, but adding this check for safety
|
||||
if tag not in blacklisted_tags:
|
||||
blacklisted_tags.add(tag)
|
||||
warnings.warn(
|
||||
f'Exif tag {tag} could not be found '
|
||||
'in the feature names. Ignoring tag '
|
||||
'from extracted metadata.')
|
||||
elif isinstance(data, int) or isinstance(data, float):
|
||||
row_feature_values[feature_names.index(tag)] = data
|
||||
if tag_id in IFD_CODE_LOOKUP:
|
||||
ifd_data = exifdata.get_ifd(tag_id)
|
||||
for nested_tag_id, data in ifd_data.items():
|
||||
tag = ExifTags.GPSTAGS.get(nested_tag_id, None) \
|
||||
or ExifTags.TAGS.get(nested_tag_id, None) \
|
||||
or nested_tag_id
|
||||
process_data(data, tag, feature_names,
|
||||
feature_metadata, row_feature_values,
|
||||
blacklisted_tags)
|
||||
else:
|
||||
tag = str(TAGS.get(tag_id, tag_id))
|
||||
data = exifdata.get(tag_id)
|
||||
process_data(data, tag, feature_names, feature_metadata,
|
||||
row_feature_values, blacklisted_tags)
|
||||
|
|
|
@ -10,7 +10,7 @@ from urllib.parse import urlparse
|
|||
|
||||
import requests
|
||||
from numpy import asarray
|
||||
from PIL import Image
|
||||
from PIL import ExifTags, Image
|
||||
from PIL.ExifTags import TAGS
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
|
@ -20,6 +20,8 @@ from responsibleai_vision.common.constants import (AutoMLImagesModelIdentifier,
|
|||
# domain mapped session for reuse
|
||||
_requests_sessions = {}
|
||||
|
||||
IFD_CODE_LOOKUP = {t.value: t.name for t in ExifTags.IFD}
|
||||
|
||||
|
||||
def _get_retry_session(url):
|
||||
domain = urlparse(url.lower()).netloc
|
||||
|
@ -88,15 +90,22 @@ def get_all_exif_feature_names(image_dataset):
|
|||
with Image.open(image_pointer_path) as im:
|
||||
exifdata = im.getexif()
|
||||
for tag_id in exifdata:
|
||||
# get the tag name, instead of human unreadable tag id
|
||||
tag = TAGS.get(tag_id, tag_id)
|
||||
if tag not in image_dataset.columns:
|
||||
data = exifdata.get(tag_id)
|
||||
if isinstance(data, str) or \
|
||||
isinstance(data, int) or \
|
||||
isinstance(data, float) or \
|
||||
isinstance(data, bytes):
|
||||
# nesting for IFD block tags
|
||||
if tag_id in IFD_CODE_LOOKUP:
|
||||
ifd_data = exifdata.get_ifd(tag_id)
|
||||
|
||||
for nested_tag_id in ifd_data:
|
||||
nested_tag = ExifTags.GPSTAGS.get(nested_tag_id,
|
||||
None) \
|
||||
or ExifTags.TAGS.get(nested_tag_id, None) \
|
||||
or nested_tag_id
|
||||
exif_feature_names.add(nested_tag)
|
||||
else:
|
||||
# get the tag name, instead of human unreadable tag id
|
||||
tag = TAGS.get(tag_id, tag_id)
|
||||
if tag not in image_dataset.columns:
|
||||
exif_feature_names.add(tag)
|
||||
|
||||
return list(exif_feature_names)
|
||||
|
||||
|
||||
|
|
|
@ -13,10 +13,22 @@ from responsibleai_vision.utils.feature_extractors import extract_features
|
|||
|
||||
MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
|
||||
FRIDGE_METADATA_FEATURES = [
|
||||
'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model',
|
||||
'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning',
|
||||
'Software', 'Orientation'
|
||||
]
|
||||
'SensingMethod', 'GPSVersionID', 'ISOSpeedRatings', 'SceneType',
|
||||
'SceneCaptureType', 'SubjectDistance', 'CustomRendered',
|
||||
'SubjectDistanceRange', 'DigitalZoomRatio', 'ApertureValue',
|
||||
'ImageWidth', 'GPSDOP', 'MaxApertureValue', 'ColorSpace',
|
||||
'FocalLengthIn35mmFilm', 'ExposureMode', 'Saturation', 'ExposureTime',
|
||||
'ExifImageHeight', 'FNumber', 'YCbCrPositioning', 'Make', 'MeteringMode',
|
||||
'ExposureBiasValue', 'ExposureProgram', 'ComponentsConfiguration',
|
||||
'ExifImageWidth', 'ExifInteroperabilityOffset', 'BrightnessValue',
|
||||
'ImageLength', 'FlashPixVersion', 'SubsecTimeOriginal', 'Model',
|
||||
'SubsecTimeDigitized', 'ResolutionUnit', 'DateTimeOriginal', 'XResolution',
|
||||
'FocalLength', 'Sharpness', 'GPSLongitude', 'Contrast', 'Software',
|
||||
'GPSLatitude', 'MakerNote', 'GPSDateStamp', 'GPSAltitude',
|
||||
'GPSProcessingMethod', 'GPSTimeStamp', 'GPSLatitudeRef', 'WhiteBalance',
|
||||
'GPSLongitudeRef', 'Flash', 'SubsecTime', 'YResolution',
|
||||
'DateTimeDigitized', 'DateTime', 'GPSAltitudeRef', 'Orientation',
|
||||
'ShutterSpeedValue', 'ExifVersion']
|
||||
|
||||
|
||||
def validate_extracted_features(extracted_features, feature_names,
|
||||
|
|
|
@ -96,8 +96,8 @@ class TestImageUtils(object):
|
|||
def test_get_all_exif_feature_names(self):
|
||||
image_dataset = load_fridge_object_detection_dataset().head(2)
|
||||
exif_feature_names = get_all_exif_feature_names(image_dataset)
|
||||
assert len(exif_feature_names) == 10 if platform.system() == "Linux" \
|
||||
else 11
|
||||
num_features = 49 if platform.system() == "Linux" else 60
|
||||
assert len(exif_feature_names) == num_features
|
||||
|
||||
def test_generate_od_error_labels(self):
|
||||
true_y = np.array([[[3, 142, 257, 395, 463, 0]],
|
||||
|
|
Загрузка…
Ссылка в новой задаче