This commit is contained in:
Adam J. Stewart 2021-06-17 15:30:50 +00:00
Родитель ed5e83abb0
Коммит 01f42ca3e7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: C66C0675661156FC
5 изменённых файлов: 62 добавлений и 31 удалений

Просмотреть файл

@ -75,6 +75,7 @@ class _COWC(VisionDataset, abc.ABC):
split: str = "train",
transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None,
download: bool = False,
checksum: bool = False,
) -> None:
"""Initialize a new COWC dataset instance.
@ -84,6 +85,7 @@ class _COWC(VisionDataset, abc.ABC):
transforms: a function/transform that takes input sample and its target as
entry and returns a transformed version
download: if True, download dataset and store it in the root directory
checksum: if True, check the MD5 of the downloaded files (may be slow)
Raises:
AssertionError: if ``split`` argument is invalid
@ -93,7 +95,9 @@ class _COWC(VisionDataset, abc.ABC):
assert split in ["train", "test"]
self.root = root
self.split = split
self.transforms = transforms
self.checksum = checksum
if download:
self.download()
@ -176,11 +180,11 @@ class _COWC(VisionDataset, abc.ABC):
"""Check integrity of dataset.
Returns:
True if dataset MD5s match, else False
True if dataset files are found and/or MD5s match, else False
"""
for filename, md5 in zip(self.filenames, self.md5s):
filepath = os.path.join(self.root, self.base_folder, filename)
if not check_integrity(filepath, md5):
if not check_integrity(filepath, md5 if self.checksum else None):
return False
return True
@ -196,7 +200,7 @@ class _COWC(VisionDataset, abc.ABC):
self.base_url + filename,
os.path.join(self.root, self.base_folder),
filename=filename,
md5=md5,
md5=md5 if self.checksum else None,
)
if filename.endswith(".tbz"):
filepath = os.path.join(self.root, self.base_folder, filename)

Просмотреть файл

@ -109,6 +109,7 @@ class CV4AKenyaCropType(VisionDataset):
transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None,
download: bool = False,
api_key: Optional[str] = None,
checksum: bool = False,
verbose: bool = False,
) -> None:
"""Initialize a new CV4A Kenya Crop Type Dataset instance.
@ -123,16 +124,21 @@ class CV4AKenyaCropType(VisionDataset):
entry and returns a transformed version
download: if True, download dataset and store it in the root directory
api_key: a RadiantEarth MLHub API key to use for downloading the dataset
checksum: if True, check the MD5 of the downloaded files (may be slow)
verbose: if True, print messages when new tiles are loaded
Raises:
RuntimeError: if download=True but api_key=None, or download=False but
dataset is missing or checksum fails
RuntimeError: if ``download=True`` but ``api_key=None``, or
``download=False`` but dataset is missing or checksum fails
"""
self._validate_bands(bands)
self.root = root
self.chip_size = chip_size
self.stride = stride
self.bands = bands
self.transforms = transforms
self.checksum = checksum
self.verbose = verbose
if download:
@ -151,8 +157,6 @@ class CV4AKenyaCropType(VisionDataset):
)
# Calculate the indices that we will use over all tiles
self.bands = bands
self.chip_size = chip_size
self.chips_metadata = []
for tile_index in range(len(self.tile_names)):
for y in list(range(0, self.tile_height - self.chip_size, stride)) + [
@ -215,7 +219,7 @@ class CV4AKenyaCropType(VisionDataset):
tuple of labels and field ids
Raises:
AssertionError: if tile_name is invalid
AssertionError: if ``tile_name`` is invalid
"""
assert tile_name in self.tile_names
@ -246,7 +250,7 @@ class CV4AKenyaCropType(VisionDataset):
bands: user-provided tuple of bands to load
Raises:
AssertionError: if bands is not a tuple
AssertionError: if ``bands`` is not a tuple
ValueError: if an invalid band name is provided
"""
@ -271,7 +275,7 @@ class CV4AKenyaCropType(VisionDataset):
points in time, 3035 is the tile height, and 2016 is the tile width
Raises:
AssertionError: if tile_name is invalid
AssertionError: if ``tile_name`` is invalid
"""
assert tile_name in self.tile_names
@ -307,7 +311,7 @@ class CV4AKenyaCropType(VisionDataset):
array containing a single image tile
Raises:
AssertionError: if tile_name or date is invalid
AssertionError: if ``tile_name`` or ``date`` is invalid
"""
assert tile_name in self.tile_names
assert date in self.dates
@ -339,16 +343,16 @@ class CV4AKenyaCropType(VisionDataset):
"""Check integrity of dataset.
Returns:
True if the MD5s of the dataset's archives match, else False
True if dataset files are found and/or MD5s match, else False
"""
images: bool = check_integrity(
os.path.join(self.root, self.base_folder, self.image_meta["filename"]),
self.image_meta["md5"],
self.image_meta["md5"] if self.checksum else None,
)
targets: bool = check_integrity(
os.path.join(self.root, self.base_folder, self.target_meta["filename"]),
self.target_meta["md5"],
self.target_meta["md5"] if self.checksum else None,
)
return images and targets

Просмотреть файл

@ -59,6 +59,7 @@ class LandCoverAI(VisionDataset):
split: str = "train",
transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None,
download: bool = False,
checksum: bool = False,
) -> None:
"""Initialize a new LandCover.ai dataset instance.
@ -68,6 +69,7 @@ class LandCoverAI(VisionDataset):
transforms: a function/transform that takes input sample and its target as
entry and returns a transformed version
download: if True, download dataset and store it in the root directory
checksum: if True, check the MD5 of the downloaded files (may be slow)
Raises:
AssertionError: if ``split`` argument is invalid
@ -77,7 +79,9 @@ class LandCoverAI(VisionDataset):
assert split in ["train", "val", "test"]
self.root = root
self.split = split
self.transforms = transforms
self.checksum = checksum
if download:
self.download()
@ -155,11 +159,11 @@ class LandCoverAI(VisionDataset):
"""Check integrity of dataset.
Returns:
True if dataset MD5s match, else False
True if dataset files are found and/or MD5s match, else False
"""
integrity: bool = check_integrity(
os.path.join(self.root, self.base_folder, self.filename),
self.md5,
self.md5 if self.checksum else None,
)
return integrity
@ -175,7 +179,7 @@ class LandCoverAI(VisionDataset):
self.url,
os.path.join(self.root, self.base_folder),
filename=self.filename,
md5=self.md5,
md5=self.md5 if self.checksum else None,
)
# Generate train/val/test splits

Просмотреть файл

@ -80,6 +80,7 @@ class VHR10(VisionDataset):
split: str = "positive",
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
download: bool = False,
checksum: bool = False,
) -> None:
"""Initialize a new VHR-10 dataset instance.
@ -89,6 +90,7 @@ class VHR10(VisionDataset):
transforms: a function/transform that takes input sample and its target as
entry and returns a transformed version
download: if True, download dataset and store it in the root directory
checksum: if True, check the MD5 of the downloaded files (may be slow)
Raises:
AssertionError: if ``split`` argument is invalid
@ -100,6 +102,7 @@ class VHR10(VisionDataset):
self.root = root
self.split = split
self.transforms = transforms
self.checksum = checksum
if download:
self.download()
@ -199,11 +202,11 @@ class VHR10(VisionDataset):
"""Check integrity of dataset.
Returns:
True if dataset MD5s match, else False
True if dataset files are found and/or MD5s match, else False
"""
image: bool = check_integrity(
os.path.join(self.root, self.base_folder, self.image_meta["filename"]),
self.image_meta["md5"],
self.image_meta["md5"] if self.checksum else None,
)
# Annotations only needed for "positive" image set
@ -216,7 +219,7 @@ class VHR10(VisionDataset):
"NWPU VHR-10 dataset",
self.target_meta["filename"],
),
self.target_meta["md5"],
self.target_meta["md5"] if self.checksum else None,
)
return image and target
@ -233,7 +236,7 @@ class VHR10(VisionDataset):
self.image_meta["file_id"],
os.path.join(self.root, self.base_folder),
self.image_meta["filename"],
self.image_meta["md5"],
self.image_meta["md5"] if self.checksum else None,
)
# Must be installed to extract RAR file
@ -251,5 +254,5 @@ class VHR10(VisionDataset):
self.target_meta["url"],
os.path.join(self.root, self.base_folder, "NWPU VHR-10 dataset"),
self.target_meta["filename"],
self.target_meta["md5"],
self.target_meta["md5"] if self.checksum else None,
)

Просмотреть файл

@ -36,14 +36,11 @@ class SEN12MS(GeoDataset):
.. code-block: bash
wget "ftp://m1474000:m1474000@dataserv.ub.tum.de/checksum.sha512"
for season in 1158_spring 1868_summer 1970_fall 2017_winter
do
for source in lc s1 s2
do
wget "ftp://m1474000:m1474000@dataserv.ub.tum.de/ROIs${season}_${source}.tar.gz"
shasum -a 512 "ROIs${season}_${source}.tar.gz"
tar xvzf "ROIs${season}_${source}.tar.gz"
done
done
@ -55,8 +52,7 @@ class SEN12MS(GeoDataset):
or manually downloaded from https://dataserv.ub.tum.de/s/m1474000
and https://github.com/schmitt-muc/SEN12MS/tree/master/splits.
This download will likely take several hours. The checksums.sha512
file should be used to confirm the integrity of the downloads.
This download will likely take several hours.
""" # noqa: E501
base_folder = "sen12ms"
@ -76,12 +72,29 @@ class SEN12MS(GeoDataset):
"train_list.txt",
"test_list.txt",
]
md5s = [
"6e2e8fa8b8cba77ddab49fd20ff5c37b",
"fba019bb27a08c1db96b31f718c34d79",
"d58af2c15a16f376eb3308dc9b685af2",
"2c5bd80244440b6f9d54957c6b1f23d4",
"01044b7f58d33570c6b57fec28a3d449",
"4dbaf72ecb704a4794036fe691427ff3",
"9b126a68b0e3af260071b3139cb57cee",
"19132e0aab9d4d6862fd42e8e6760847",
"b8f117818878da86b5f5e06400eb1866",
"0fa0420ef7bcfe4387c7e6fe226dc728",
"bb8cbfc16b95a4f054a3d5380e0130ed",
"3807545661288dcca312c9c538537b63",
"0a68d4e1eb24f128fccdb930000b2546",
"c7faad064001e646445c4c634169484d",
]
def __init__(
self,
root: str = "data",
split: str = "train",
transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None,
checksum: bool = False,
) -> None:
"""Initialize a new SEN12MS dataset instance.
@ -90,6 +103,7 @@ class SEN12MS(GeoDataset):
split: one of "train" or "test"
transforms: a function/transform that takes input sample and its target as
entry and returns a transformed version
checksum: if True, check the MD5 of the downloaded files (may be slow)
Raises:
AssertionError: if ``split`` argument is invalid
@ -98,7 +112,9 @@ class SEN12MS(GeoDataset):
assert split in ["train", "test"]
self.root = root
self.split = split
self.transforms = transforms
self.checksum = checksum
if not self._check_integrity():
raise RuntimeError("Dataset not found.")
@ -169,10 +185,10 @@ class SEN12MS(GeoDataset):
"""Check integrity of dataset.
Returns:
True if files exist, else False
True if dataset files are found and/or MD5s match, else False
"""
# We could also check md5s, but it would take ~30 min to compute
for filename in self.filenames:
if not check_integrity(os.path.join(self.root, self.base_folder, filename)):
for filename, md5 in zip(self.filenames, self.md5s):
filepath = os.path.join(self.root, self.base_folder, filename)
if not check_integrity(filepath, md5 if self.checksum else None):
return False
return True