fix error when extracting exif metadata features from images in RAI Vision dashboard (#2461)
This commit is contained in:
Родитель
0700cdcbb6
Коммит
6cb95c0dcc
|
@ -15,7 +15,6 @@ from common_text_utils import (COVID19_EVENTS_LABELS, EMOTION,
|
|||
create_text_classification_pipeline,
|
||||
load_covid19_emergency_event_dataset,
|
||||
load_emotion_dataset)
|
||||
from huggingface_hub.utils._validators import HFValidationError
|
||||
from rai_text_insights_validator import validate_rai_text_insights
|
||||
|
||||
from responsibleai._internal.constants import ManagerNames
|
||||
|
@ -134,8 +133,8 @@ class TestRAITextInsightsSaveAndLoadScenarios(object):
|
|||
match_msg = 'Can\'t load the configuration'
|
||||
expected_error = OSError
|
||||
else:
|
||||
match_msg = 'Repo id must'
|
||||
expected_error = HFValidationError
|
||||
match_msg = 'local folder'
|
||||
expected_error = OSError
|
||||
with pytest.raises(expected_error, match=match_msg):
|
||||
without_model_rai_insights = RAITextInsights.load(save_path)
|
||||
assert without_model_rai_insights.model is None
|
||||
|
|
|
@ -28,4 +28,5 @@ opencv-python
|
|||
|
||||
fastai
|
||||
mlflow
|
||||
pydantic<2.0.0
|
||||
pydantic<2.0.0
|
||||
piexif
|
|
@ -4,6 +4,12 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class ExtractedFeatures(str, Enum):
|
||||
"""Provide constants related to the extracted image features."""
|
||||
|
||||
MEAN_PIXEL_VALUE = 'mean_pixel_value'
|
||||
|
||||
|
||||
class ModelTask(str, Enum):
|
||||
"""Provide model task constants.
|
||||
|
||||
|
@ -28,6 +34,12 @@ class ImageColumns(str, Enum):
|
|||
IMAGE_DETAILS = 'image_details'
|
||||
|
||||
|
||||
class ImageModes(str, Enum):
|
||||
"""Provide constants related to the image modes."""
|
||||
|
||||
RGB = 'RGB'
|
||||
|
||||
|
||||
class ExplainabilityLiterals:
|
||||
"""Parameters for explainability method names."""
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ from responsibleai.rai_insights.rai_base_insights import RAIBaseInsights
|
|||
from responsibleai.serialization_utilities import serialize_json_safe
|
||||
from responsibleai_vision.common.constants import (CommonTags,
|
||||
ExplainabilityDefaults,
|
||||
ImageColumns,
|
||||
ImageColumns, ImageModes,
|
||||
MLFlowSchemaLiterals,
|
||||
ModelTask)
|
||||
from responsibleai_vision.managers.error_analysis_manager import \
|
||||
|
@ -135,7 +135,7 @@ class RAIVisionInsights(RAIBaseInsights):
|
|||
classes: Optional[np.ndarray] = None,
|
||||
serializer: Optional[Any] = None,
|
||||
maximum_rows_for_test: int = 5000,
|
||||
image_mode: str = "RGB",
|
||||
image_mode: str = ImageModes.RGB,
|
||||
test_data_path: Optional[str] = None,
|
||||
transformations: Optional[Any] = None,
|
||||
image_downloader: Optional[Any] = None,
|
||||
|
@ -267,7 +267,7 @@ class RAIVisionInsights(RAIBaseInsights):
|
|||
serializer)
|
||||
|
||||
ext_test, ext_features = extract_features(
|
||||
self.test, self.target_column, self.task_type,
|
||||
self.test, self.target_column,
|
||||
self.image_mode,
|
||||
self._feature_metadata)
|
||||
self._ext_test = ext_test
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
"""Defines the feature extractors."""
|
||||
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
|
@ -11,13 +12,17 @@ from PIL.ExifTags import TAGS
|
|||
from tqdm import tqdm
|
||||
|
||||
from responsibleai.feature_metadata import FeatureMetadata
|
||||
from responsibleai_vision.common.constants import ExtractedFeatures
|
||||
from responsibleai_vision.utils.image_reader import (
|
||||
get_all_exif_feature_names, get_image_from_path,
|
||||
get_image_pointer_from_path)
|
||||
|
||||
MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
|
||||
MAX_CUSTOM_LEN = 100
|
||||
|
||||
|
||||
def extract_features(image_dataset: pd.DataFrame,
|
||||
target_column: str, task_type: str,
|
||||
target_column: str,
|
||||
image_mode: str = None,
|
||||
feature_metadata: Optional[FeatureMetadata] = None):
|
||||
'''Extract tabular data features from the image dataset.
|
||||
|
@ -27,8 +32,6 @@ def extract_features(image_dataset: pd.DataFrame,
|
|||
:param target_column: The name of the label column or list of columns.
|
||||
This is a list of columns for multilabel models.
|
||||
:type target_column: str or list[str]
|
||||
:param task_type: The type of task to be performed.
|
||||
:type task_type: str
|
||||
:param image_mode: The mode to open the image in.
|
||||
See pillow documentation for all modes:
|
||||
https://pillow.readthedocs.io/en/stable/handbook/concepts.html
|
||||
|
@ -45,7 +48,7 @@ def extract_features(image_dataset: pd.DataFrame,
|
|||
if feature_metadata and feature_metadata.categorical_features is None:
|
||||
feature_metadata.categorical_features = []
|
||||
exif_feature_names = get_all_exif_feature_names(image_dataset)
|
||||
feature_names = ["mean_pixel_value"] + exif_feature_names
|
||||
feature_names = [MEAN_PIXEL_VALUE] + exif_feature_names
|
||||
|
||||
# append all feature names other than target column and label
|
||||
column_names = image_dataset.columns
|
||||
|
@ -58,6 +61,7 @@ def extract_features(image_dataset: pd.DataFrame,
|
|||
continue
|
||||
feature_names.append(column_names[j])
|
||||
|
||||
blacklisted_tags = {}
|
||||
# append all features
|
||||
for i in tqdm(range(image_dataset.shape[0])):
|
||||
image = image_dataset.iloc[i, 0]
|
||||
|
@ -81,9 +85,26 @@ def extract_features(image_dataset: pd.DataFrame,
|
|||
# decode bytes
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode()
|
||||
if len(data) > MAX_CUSTOM_LEN:
|
||||
data = data[:MAX_CUSTOM_LEN] + '...'
|
||||
if isinstance(data, str):
|
||||
feature_metadata.categorical_features.append(str(tag))
|
||||
row_feature_values[feature_names.index(tag)] = data
|
||||
if not feature_metadata:
|
||||
feature_metadata = FeatureMetadata()
|
||||
feature_metadata.categorical_features = []
|
||||
if tag in feature_names:
|
||||
feature_metadata.categorical_features.append(
|
||||
str(tag))
|
||||
tag_index = feature_names.index(tag)
|
||||
row_feature_values[tag_index] = data
|
||||
else:
|
||||
# in theory this should now never happen with
|
||||
# latest code, but adding this check for safety
|
||||
if tag not in blacklisted_tags:
|
||||
blacklisted_tags.add(tag)
|
||||
warnings.warn(
|
||||
f'Exif tag {tag} could not be found '
|
||||
'in the feature names. Ignoring tag '
|
||||
'from extracted metadata.')
|
||||
elif isinstance(data, int) or isinstance(data, float):
|
||||
row_feature_values[feature_names.index(tag)] = data
|
||||
|
||||
|
|
|
@ -94,7 +94,8 @@ def get_all_exif_feature_names(image_dataset):
|
|||
data = exifdata.get(tag_id)
|
||||
if isinstance(data, str) or \
|
||||
isinstance(data, int) or \
|
||||
isinstance(data, float):
|
||||
isinstance(data, float) or \
|
||||
isinstance(data, bytes):
|
||||
exif_feature_names.add(tag)
|
||||
return list(exif_feature_names)
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ from zipfile import ZipFile
|
|||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import piexif
|
||||
import shap
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
@ -172,7 +173,7 @@ def retrieve_unzip_file(download_url, data_file):
|
|||
os.remove(data_file)
|
||||
|
||||
|
||||
def load_fridge_dataset():
|
||||
def load_fridge_dataset(add_extra_mixed_metadata=False):
|
||||
# create data folder if it doesnt exist.
|
||||
os.makedirs("data", exist_ok=True)
|
||||
|
||||
|
@ -186,6 +187,14 @@ def load_fridge_dataset():
|
|||
for folder in os.listdir("./data/fridgeObjects"):
|
||||
for file in os.listdir("./data/fridgeObjects/" + folder):
|
||||
image_path = "./data/fridgeObjects/" + folder + "/" + file
|
||||
if add_extra_mixed_metadata and file.endswith("1.jpg"):
|
||||
with Image.open(image_path) as im:
|
||||
exif_dict = piexif.load(im.info['exif'])
|
||||
comment = 'Extra metadata for {}'.format(file).encode()
|
||||
exif_dict['0th'][piexif.ImageIFD.XPComment] = comment
|
||||
exif_dict['1st'][piexif.ImageIFD.XPComment] = comment
|
||||
exif_bytes = piexif.dump(exif_dict)
|
||||
im.save(image_path, exif=exif_bytes)
|
||||
data = data.append({IMAGE: image_path, LABEL: folder},
|
||||
ignore_index=True)
|
||||
return data
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
# Copyright (c) Microsoft Corporation
|
||||
# Licensed under the MIT License.
|
||||
|
||||
from common_vision_utils import (load_flowers_dataset, load_fridge_dataset,
|
||||
load_fridge_object_detection_dataset,
|
||||
load_imagenet_dataset)
|
||||
|
||||
from responsibleai_vision.common.constants import (ExtractedFeatures,
|
||||
ImageColumns, ImageModes)
|
||||
from responsibleai_vision.utils.feature_extractors import extract_features
|
||||
|
||||
MEAN_PIXEL_VALUE = ExtractedFeatures.MEAN_PIXEL_VALUE.value
|
||||
FRIDGE_METADATA_FEATURES = [
|
||||
'Make', 'ResolutionUnit', 'ImageLength', 'ExifOffset', 'Model',
|
||||
'GPSInfo', 'ImageWidth', 'DateTime', 'YCbCrPositioning',
|
||||
'Software', 'Orientation'
|
||||
]
|
||||
|
||||
|
||||
def validate_extracted_features(extracted_features, feature_names,
|
||||
expected_feature_names, data):
|
||||
assert len(extracted_features) == len(data)
|
||||
assert feature_names[0] == expected_feature_names[0]
|
||||
for i in range(1, len(feature_names)):
|
||||
assert feature_names[i] in expected_feature_names
|
||||
assert len(feature_names) == len(expected_feature_names)
|
||||
assert len(extracted_features[0]) == len(feature_names)
|
||||
|
||||
|
||||
def extract_dataset_features(data):
|
||||
return extract_features(data, ImageColumns.LABEL, ImageModes.RGB, None)
|
||||
|
||||
|
||||
class TestFeatureExtractors(object):
|
||||
def test_extract_features_fridge_object_detection(self):
|
||||
data = load_fridge_object_detection_dataset(automl_format=False)
|
||||
extracted_features, feature_names = extract_dataset_features(data)
|
||||
expected_feature_names = [MEAN_PIXEL_VALUE] + FRIDGE_METADATA_FEATURES
|
||||
validate_extracted_features(extracted_features, feature_names,
|
||||
expected_feature_names, data)
|
||||
|
||||
def test_extract_features_fridge_metadata(self):
|
||||
data = load_fridge_dataset()
|
||||
extracted_features, feature_names = extract_dataset_features(data)
|
||||
expected_feature_names = [MEAN_PIXEL_VALUE] + FRIDGE_METADATA_FEATURES
|
||||
validate_extracted_features(extracted_features, feature_names,
|
||||
expected_feature_names, data)
|
||||
|
||||
def test_extract_features_imagenet_metadata(self):
|
||||
data = load_imagenet_dataset()
|
||||
extracted_features, feature_names = extract_dataset_features(data)
|
||||
expected_feature_names = [MEAN_PIXEL_VALUE]
|
||||
validate_extracted_features(extracted_features, feature_names,
|
||||
expected_feature_names, data)
|
||||
|
||||
def test_extract_features_flowers_metadata(self):
|
||||
data = load_flowers_dataset(upscale=False)
|
||||
extracted_features, feature_names = extract_dataset_features(data)
|
||||
expected_feature_names = [MEAN_PIXEL_VALUE]
|
||||
validate_extracted_features(extracted_features, feature_names,
|
||||
expected_feature_names, data)
|
||||
|
||||
def test_extract_features_mixed_exif_XPComment_metadata(self):
|
||||
data = load_fridge_dataset(add_extra_mixed_metadata=True)
|
||||
extracted_features, feature_names = extract_dataset_features(data)
|
||||
expected_feature_names = [MEAN_PIXEL_VALUE, 'XPComment']
|
||||
expected_feature_names += FRIDGE_METADATA_FEATURES
|
||||
validate_extracted_features(extracted_features, feature_names,
|
||||
expected_feature_names, data)
|
|
@ -63,6 +63,7 @@ class TestImageUtils(object):
|
|||
assert label_j[5] == o_label_j[IS_CROWD]
|
||||
|
||||
def test_retry_sessions_match_domain_count(self):
|
||||
sessions_before_test = len(image_reader_requests_sessions)
|
||||
urls = [f"https://{i}.com/image.png" for i in range(10)]
|
||||
duplicates = urls.copy()
|
||||
urls.extend(duplicates)
|
||||
|
@ -72,7 +73,9 @@ class TestImageUtils(object):
|
|||
for url in urls:
|
||||
image_reader_get_retry_session(url)
|
||||
|
||||
assert len(image_reader_requests_sessions) == domain_unique_count
|
||||
new_session_count = len(image_reader_requests_sessions)
|
||||
new_session_count -= sessions_before_test
|
||||
assert new_session_count == domain_unique_count
|
||||
|
||||
@patch("urllib3.connectionpool.HTTPConnectionPool._make_request")
|
||||
def test_retry_sessions_retries_on_conn_failure(self, request_mock):
|
||||
|
|
|
@ -25,7 +25,7 @@ from rai_vision_insights_validator import validate_rai_vision_insights
|
|||
from responsibleai.feature_metadata import FeatureMetadata
|
||||
from responsibleai_vision import ModelTask, RAIVisionInsights
|
||||
from responsibleai_vision.common.constants import (ExplainabilityDefaults,
|
||||
ImageColumns)
|
||||
ImageColumns, ImageModes)
|
||||
|
||||
DEFAULT_MAX_EVALS = ExplainabilityDefaults.DEFAULT_MAX_EVALS
|
||||
DEFAULT_NUM_MASKS = ExplainabilityDefaults.DEFAULT_NUM_MASKS
|
||||
|
@ -40,7 +40,7 @@ class TestRAIVisionInsights(object):
|
|||
task_type = ModelTask.IMAGE_CLASSIFICATION
|
||||
class_names = load_imagenet_labels()
|
||||
run_rai_insights(pred, data[:3], ImageColumns.LABEL,
|
||||
task_type, class_names, image_mode='RGB')
|
||||
task_type, class_names, image_mode=ImageModes.RGB)
|
||||
|
||||
@pytest.mark.parametrize('max_evals', [None, 10, 200])
|
||||
def test_rai_insights_image_classification_max_evals(self, max_evals):
|
||||
|
@ -51,7 +51,7 @@ class TestRAIVisionInsights(object):
|
|||
# run on a single image to avoid running out of memory on
|
||||
# test machines
|
||||
run_rai_insights(pred, data[:1], ImageColumns.LABEL,
|
||||
task_type, class_names, image_mode='RGB',
|
||||
task_type, class_names, image_mode=ImageModes.RGB,
|
||||
test_explainer=True, max_evals=max_evals)
|
||||
|
||||
@pytest.mark.parametrize('max_evals', [-100, -1, 0])
|
||||
|
@ -63,7 +63,8 @@ class TestRAIVisionInsights(object):
|
|||
with pytest.raises(ValueError,
|
||||
match="max_evals must be greater than 0"):
|
||||
run_rai_insights(pred, data[:1], ImageColumns.LABEL,
|
||||
task_type, class_names, image_mode='RGB',
|
||||
task_type, class_names,
|
||||
image_mode=ImageModes.RGB,
|
||||
test_explainer=True, max_evals=max_evals)
|
||||
|
||||
def test_rai_insights_image_classification_fridge(self):
|
||||
|
|
Загрузка…
Ссылка в новой задаче