ENH: Add diagnosis for Tiff Conversion (#862)

This commit is contained in:
Kenza Bouzid 2023-03-31 15:10:16 +01:00 коммит произвёл GitHub
Родитель f3ea7173d7
Коммит 5c09c280a8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 34 добавлений и 2 удалений

Просмотреть файл

@ -55,6 +55,8 @@ class ConvertWSIToTiffd(MapTransform):
replace_ampersand_by: str = UNDERSCORE,
compression: COMPRESSION = COMPRESSION.ADOBE_DEFLATE,
tile_size: int = 512,
min_file_size: int = 0,
verbose: bool = False,
) -> None:
"""
:param output_folder: The directory where the tiff file will be saved.
@ -76,6 +78,9 @@ class ConvertWSIToTiffd(MapTransform):
aka ZLIB that is lossless compression. Make sure to use one of these options (RAW, LZW, JPEG, JPEG2000) so
that the converted files are readable by cucim.
:param tile_size: The size of the tiles that are used to write the tiff file, defaults to 512.
:param min_file_size: The minimum size of the tiff file in bytes. If the tiff file is smaller than this size, it
will get overwritten. Defaults to 0.
:param verbose: A flag to enable verbose logging, defaults to False.
"""
self.output_folder = output_folder
self.image_key = image_key
@ -88,6 +93,8 @@ class ConvertWSIToTiffd(MapTransform):
self.wsi_reader = WSIReader(backend=WSIBackend.OPENSLIDE)
self.compression = compression
self.tile_size = tile_size
self.min_file_size = min_file_size
self.verbose = verbose
def get_tiff_path(self, src_path: Path) -> Path:
"""Returns the path to the tiff file that will be created from the src file. The tiff file is saved in the
@ -242,6 +249,10 @@ class ConvertWSIToTiffd(MapTransform):
src_path = Path(data[self.image_key])
tiff_path = self.get_tiff_path(src_path)
# if the tiff file does not exist or if it exists but is empty, we convert the wsi to tiff
if not tiff_path.exists() or (tiff_path.exists() and tiff_path.stat().st_size == 0):
if not tiff_path.exists() or (tiff_path.exists() and tiff_path.stat().st_size <= self.min_file_size):
self.convert_wsi(src_path, tiff_path)
if self.verbose:
logging.info(f"Converted {src_path} to {tiff_path}")
logging.info(f"Source file size {src_path.stat().st_size / 1e6:.2f} MB")
logging.info(f"Tiff file size {tiff_path.stat().st_size / 1e6:.2f} MB")
return data

Просмотреть файл

@ -62,6 +62,16 @@ class TiffConversionConfig(param.Parameterized):
doc="The name of the new dataset csv file that will be created for the converted data. If None, the default "
"name of the original dataset will be used.",
)
min_file_size: int = param.Integer(
default=0,
doc="The minimum size of the tiff file in bytes. If the tiff file is smaller than this size, it will get "
"overwritten. Defaults to 0.",
)
verbose: bool = param.Boolean(
default=False,
doc="If True, the progress of the conversion will be logged including src and tiff file sizes. "
"Defaults to False.",
)
def get_transform(self, output_folder: Path) -> ConvertWSIToTiffd:
"""Get the transform that will be used to convert the src files to tiff files."""
@ -74,6 +84,8 @@ class TiffConversionConfig(param.Parameterized):
replace_ampersand_by=self.replace_ampersand_by,
compression=self.compression,
tile_size=self.tile_size,
min_file_size=self.min_file_size,
verbose=self.verbose,
)
def create_dataset_csv_for_converted_data(self, output_folder: Path) -> None:

Просмотреть файл

@ -7,6 +7,7 @@ import numpy as np
import pytest
from pathlib import Path
from pytest import LogCaptureFixture
from monai.data.wsi_reader import WSIReader
from health_cpath.datasets.panda_dataset import PandaDataset
from health_cpath.preprocessing.loading import WSIBackend
@ -213,13 +214,17 @@ def test_convert_wsi_to_tiff(add_low_mag: bool, wsi_samples: WSISamplesType, tmp
@pytest.mark.gpu
@skipif_no_gpu() # cucim is not available on cpu
def test_convert_wsi_to_tiff_existing_empty_file(wsi_samples: WSISamplesType, tmp_path: Path) -> None:
def test_convert_wsi_to_tiff_existing_empty_file(
wsi_samples: WSISamplesType, tmp_path: Path, caplog: LogCaptureFixture
) -> None:
target_mag = 2.5
transform = ConvertWSIToTiffd(
output_folder=tmp_path,
target_magnifications=[target_mag],
default_base_objective_power=target_mag,
tile_size=16,
min_file_size=0,
verbose=True,
)
tiff_path = transform.get_tiff_path(wsi_samples[0][SlideKey.IMAGE])
# Create an empty file
@ -230,6 +235,10 @@ def test_convert_wsi_to_tiff_existing_empty_file(wsi_samples: WSISamplesType, tm
for sample in wsi_samples:
transform(sample)
assert tiff_path.stat().st_size > 0
messages = caplog.messages
assert "Converted" in messages[0]
assert "Source file size 0.02 MB" in messages[1]
assert "Tiff file size 0.01 MB" in messages[2]
def test_tiff_conversion_config(mock_panda_slides_root_dir: Path, tmp_path: Path) -> None: