зеркало из https://github.com/microsoft/torchgeo.git
Datasets: support os.PathLike (#2273)
This commit is contained in:
Родитель
d4a7b7286f
Коммит
891f192637
|
@ -38,7 +38,7 @@ class CustomGeoDataset(GeoDataset):
|
|||
bounds: BoundingBox = BoundingBox(0, 1, 2, 3, 4, 5),
|
||||
crs: CRS = CRS.from_epsg(4087),
|
||||
res: float = 1,
|
||||
paths: str | Path | Iterable[str | Path] | None = None,
|
||||
paths: str | os.PathLike[str] | Iterable[str | os.PathLike[str]] | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.index.insert(0, tuple(bounds))
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any
|
||||
|
||||
|
@ -106,7 +105,7 @@ class AbovegroundLiveWoodyBiomassDensity(RasterDataset):
|
|||
|
||||
def _download(self) -> None:
|
||||
"""Download the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
download_url(self.url, self.paths, self.base_filename)
|
||||
|
||||
with open(os.path.join(self.paths, self.base_filename)) as f:
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
"""AgriFieldNet India Challenge dataset."""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from typing import Any, ClassVar, cast
|
||||
|
@ -181,10 +180,10 @@ class AgriFieldNet(RasterDataset):
|
|||
Returns:
|
||||
data, label, and field ids at that index
|
||||
"""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
filepaths = cast(list[Path], [hit.object for hit in hits])
|
||||
filepaths = cast(list[str], [hit.object for hit in hits])
|
||||
|
||||
if not filepaths:
|
||||
raise IndexError(
|
||||
|
@ -246,7 +245,7 @@ class AgriFieldNet(RasterDataset):
|
|||
|
||||
def _download(self) -> None:
|
||||
"""Download the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
os.makedirs(self.paths, exist_ok=True)
|
||||
azcopy = which('azcopy')
|
||||
azcopy('sync', f'{self.url}', self.paths, '--recursive=true')
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
"""Canadian Building Footprints dataset."""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any
|
||||
|
||||
|
@ -105,7 +104,7 @@ class CanadianBuildingFootprints(VectorDataset):
|
|||
Returns:
|
||||
True if dataset files are found and/or MD5s match, else False
|
||||
"""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
||||
filepath = os.path.join(self.paths, prov_terr + '.zip')
|
||||
if not check_integrity(filepath, md5 if self.checksum else None):
|
||||
|
@ -117,7 +116,7 @@ class CanadianBuildingFootprints(VectorDataset):
|
|||
if self._check_integrity():
|
||||
print('Files already downloaded and verified')
|
||||
return
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
||||
download_and_extract_archive(
|
||||
self.url + prov_terr + '.zip',
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
"""CDL dataset."""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, ClassVar
|
||||
|
||||
|
@ -295,7 +294,7 @@ class CDL(RasterDataset):
|
|||
|
||||
# Check if the zip files have already been downloaded
|
||||
exists = []
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
for year in self.years:
|
||||
pathname = os.path.join(
|
||||
self.paths, self.zipfile_glob.replace('*', str(year))
|
||||
|
@ -328,7 +327,7 @@ class CDL(RasterDataset):
|
|||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
for year in self.years:
|
||||
zipfile_name = self.zipfile_glob.replace('*', str(year))
|
||||
pathname = os.path.join(self.paths, zipfile_name)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
|
@ -173,7 +172,7 @@ class Chesapeake(RasterDataset, ABC):
|
|||
return
|
||||
|
||||
# Check if the zip file has already been downloaded
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
if glob.glob(os.path.join(self.paths, '**', '*.zip'), recursive=True):
|
||||
self._extract()
|
||||
return
|
||||
|
@ -195,7 +194,7 @@ class Chesapeake(RasterDataset, ABC):
|
|||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
for file in glob.iglob(os.path.join(self.paths, '**', '*.zip'), recursive=True):
|
||||
extract_archive(file)
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
"""CMS Global Mangrove Canopy dataset."""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
|
@ -229,7 +228,7 @@ class CMSGlobalMangroveCanopy(RasterDataset):
|
|||
return
|
||||
|
||||
# Check if the zip file has already been downloaded
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, self.zipfile)
|
||||
if os.path.exists(pathname):
|
||||
if self.checksum and not check_integrity(pathname, self.md5):
|
||||
|
@ -241,7 +240,7 @@ class CMSGlobalMangroveCanopy(RasterDataset):
|
|||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, self.zipfile)
|
||||
extract_archive(pathname)
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any
|
||||
|
||||
|
@ -113,7 +112,7 @@ class Esri2020(RasterDataset):
|
|||
return
|
||||
|
||||
# Check if the zip files have already been downloaded
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, self.zipfile)
|
||||
if glob.glob(pathname):
|
||||
self._extract()
|
||||
|
@ -133,7 +132,7 @@ class Esri2020(RasterDataset):
|
|||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
extract_archive(os.path.join(self.paths, self.zipfile))
|
||||
|
||||
def plot(
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, ClassVar
|
||||
|
||||
|
@ -117,7 +116,7 @@ class EUDEM(RasterDataset):
|
|||
return
|
||||
|
||||
# Check if the zip files have already been downloaded
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, self.zipfile_glob)
|
||||
if glob.glob(pathname):
|
||||
for zipfile in glob.iglob(pathname):
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import csv
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any
|
||||
|
||||
|
@ -140,7 +139,7 @@ class EuroCrops(VectorDataset):
|
|||
if self.files and not self.checksum:
|
||||
return True
|
||||
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
|
||||
filepath = os.path.join(self.paths, self.hcat_fname)
|
||||
if not check_integrity(filepath, self.hcat_md5 if self.checksum else None):
|
||||
|
@ -157,7 +156,7 @@ class EuroCrops(VectorDataset):
|
|||
if self._check_integrity():
|
||||
print('Files already downloaded and verified')
|
||||
return
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
download_url(
|
||||
self.base_url + self.hcat_fname,
|
||||
self.paths,
|
||||
|
@ -179,7 +178,7 @@ class EuroCrops(VectorDataset):
|
|||
(defaults to all classes)
|
||||
"""
|
||||
if not classes:
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
classes = []
|
||||
filepath = os.path.join(self.paths, self.hcat_fname)
|
||||
with open(filepath) as f:
|
||||
|
|
|
@ -8,7 +8,6 @@ import fnmatch
|
|||
import functools
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
import warnings
|
||||
|
@ -300,7 +299,7 @@ class GeoDataset(Dataset[dict[str, Any]], abc.ABC):
|
|||
.. versionadded:: 0.5
|
||||
"""
|
||||
# Make iterable
|
||||
if isinstance(self.paths, str | pathlib.Path):
|
||||
if isinstance(self.paths, str | os.PathLike):
|
||||
paths: Iterable[Path] = [self.paths]
|
||||
else:
|
||||
paths = self.paths
|
||||
|
@ -521,7 +520,7 @@ class RasterDataset(GeoDataset):
|
|||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
filepaths = cast(list[Path], [hit.object for hit in hits])
|
||||
filepaths = cast(list[str], [hit.object for hit in hits])
|
||||
|
||||
if not filepaths:
|
||||
raise IndexError(
|
||||
|
@ -564,7 +563,7 @@ class RasterDataset(GeoDataset):
|
|||
|
||||
def _merge_files(
|
||||
self,
|
||||
filepaths: Sequence[Path],
|
||||
filepaths: Sequence[str],
|
||||
query: BoundingBox,
|
||||
band_indexes: Sequence[int] | None = None,
|
||||
) -> Tensor:
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, ClassVar, cast
|
||||
|
||||
|
@ -193,7 +192,7 @@ class GlobBiomass(RasterDataset):
|
|||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
filepaths = cast(list[Path], [hit.object for hit in hits])
|
||||
filepaths = cast(list[str], [hit.object for hit in hits])
|
||||
|
||||
if not filepaths:
|
||||
raise IndexError(
|
||||
|
@ -221,7 +220,7 @@ class GlobBiomass(RasterDataset):
|
|||
return
|
||||
|
||||
# Check if the zip files have already been downloaded
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, f'*_{self.measurement}.zip')
|
||||
if glob.glob(pathname):
|
||||
for zipfile in glob.iglob(pathname):
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from typing import Any, ClassVar, cast
|
||||
|
@ -94,7 +93,7 @@ class L7IrishMask(RasterDataset):
|
|||
filename_regex = re.compile(L7IrishImage.filename_regex, re.VERBOSE)
|
||||
index = Index(interleaved=False, properties=Property(dimension=3))
|
||||
for hit in self.index.intersection(self.index.bounds, objects=True):
|
||||
dirname = os.path.dirname(cast(Path, hit.object))
|
||||
dirname = os.path.dirname(cast(str, hit.object))
|
||||
image = glob.glob(os.path.join(dirname, L7IrishImage.filename_glob))[0]
|
||||
minx, maxx, miny, maxy, mint, maxt = hit.bounds
|
||||
if match := re.match(filename_regex, os.path.basename(image)):
|
||||
|
@ -229,7 +228,7 @@ class L7Irish(IntersectionDataset):
|
|||
def _verify(self) -> None:
|
||||
"""Verify the integrity of the dataset."""
|
||||
# Check if the extracted files already exist
|
||||
if not isinstance(self.paths, str | pathlib.Path):
|
||||
if not isinstance(self.paths, str | os.PathLike):
|
||||
return
|
||||
|
||||
for classname in [L7IrishImage, L7IrishMask]:
|
||||
|
@ -262,7 +261,7 @@ class L7Irish(IntersectionDataset):
|
|||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, '*.tar.gz')
|
||||
for tarfile in glob.iglob(pathname):
|
||||
extract_archive(tarfile)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from typing import Any, ClassVar
|
||||
|
||||
|
@ -174,7 +173,7 @@ class L8Biome(IntersectionDataset):
|
|||
def _verify(self) -> None:
|
||||
"""Verify the integrity of the dataset."""
|
||||
# Check if the extracted files already exist
|
||||
if not isinstance(self.paths, str | pathlib.Path):
|
||||
if not isinstance(self.paths, str | os.PathLike):
|
||||
return
|
||||
|
||||
for classname in [L8BiomeImage, L8BiomeMask]:
|
||||
|
@ -207,7 +206,7 @@ class L8Biome(IntersectionDataset):
|
|||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, '*.tar.gz')
|
||||
for tarfile in glob.iglob(pathname):
|
||||
extract_archive(tarfile)
|
||||
|
|
|
@ -254,7 +254,7 @@ class LandCoverAIGeo(LandCoverAIBase, RasterDataset):
|
|||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
img_filepaths = cast(list[Path], [hit.object for hit in hits])
|
||||
img_filepaths = cast(list[str], [hit.object for hit in hits])
|
||||
mask_filepaths = [
|
||||
str(path).replace('images', 'masks') for path in img_filepaths
|
||||
]
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, ClassVar
|
||||
|
||||
|
@ -192,7 +191,7 @@ class NLCD(RasterDataset):
|
|||
exists = []
|
||||
for year in self.years:
|
||||
zipfile_year = self.zipfile_glob.replace('*', str(year), 1)
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, '**', zipfile_year)
|
||||
if glob.glob(pathname, recursive=True):
|
||||
exists.append(True)
|
||||
|
@ -224,7 +223,7 @@ class NLCD(RasterDataset):
|
|||
"""Extract the dataset."""
|
||||
for year in self.years:
|
||||
zipfile_name = self.zipfile_glob.replace('*', str(year), 1)
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, '**', zipfile_name)
|
||||
extract_archive(glob.glob(pathname, recursive=True)[0], self.paths)
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
import glob
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, ClassVar, cast
|
||||
|
@ -242,7 +241,7 @@ class OpenBuildings(VectorDataset):
|
|||
# Create an R-tree to index the dataset using the polygon centroid as bounds
|
||||
self.index = Index(interleaved=False, properties=Property(dimension=3))
|
||||
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
with open(os.path.join(self.paths, 'tiles.geojson')) as f:
|
||||
data = json.load(f)
|
||||
|
||||
|
@ -305,7 +304,7 @@ class OpenBuildings(VectorDataset):
|
|||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
filepaths = cast(list[Path], [hit.object for hit in hits])
|
||||
filepaths = cast(list[str], [hit.object for hit in hits])
|
||||
|
||||
if not filepaths:
|
||||
raise IndexError(
|
||||
|
@ -336,7 +335,7 @@ class OpenBuildings(VectorDataset):
|
|||
return sample
|
||||
|
||||
def _filter_geometries(
|
||||
self, query: BoundingBox, filepaths: list[Path]
|
||||
self, query: BoundingBox, filepaths: list[str]
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Filters a df read from the polygon csv file based on query and conf thresh.
|
||||
|
||||
|
@ -398,7 +397,7 @@ class OpenBuildings(VectorDataset):
|
|||
def _verify(self) -> None:
|
||||
"""Verify the integrity of the dataset."""
|
||||
# Check if the zip files have already been downloaded and checksum
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
pathname = os.path.join(self.paths, self.zipfile_glob)
|
||||
i = 0
|
||||
for zipfile in glob.iglob(pathname):
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
"""South Africa Crop Type Competition Dataset."""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from typing import Any, ClassVar, cast
|
||||
|
@ -161,11 +160,11 @@ class SouthAfricaCropType(RasterDataset):
|
|||
Returns:
|
||||
data and labels at that index
|
||||
"""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
|
||||
# Get all files matching the given query
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
filepaths = cast(list[Path], [hit.object for hit in hits])
|
||||
filepaths = cast(list[str], [hit.object for hit in hits])
|
||||
|
||||
if not filepaths:
|
||||
raise IndexError(
|
||||
|
@ -253,7 +252,7 @@ class SouthAfricaCropType(RasterDataset):
|
|||
|
||||
def _download(self) -> None:
|
||||
"""Download the dataset."""
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
os.makedirs(self.paths, exist_ok=True)
|
||||
azcopy = which('azcopy')
|
||||
azcopy('sync', f'{self.url}', self.paths, '--recursive=true')
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
"""South America Soybean Dataset."""
|
||||
|
||||
import pathlib
|
||||
import os
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, ClassVar
|
||||
|
||||
|
@ -113,7 +113,7 @@ class SouthAmericaSoybean(RasterDataset):
|
|||
# Check if the extracted files already exist
|
||||
if self.files:
|
||||
return
|
||||
assert isinstance(self.paths, str | pathlib.Path)
|
||||
assert isinstance(self.paths, str | os.PathLike)
|
||||
|
||||
# Check if the user requested to download the dataset
|
||||
if not self.download:
|
||||
|
|
|
@ -10,7 +10,6 @@ import collections
|
|||
import contextlib
|
||||
import importlib
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -42,7 +41,7 @@ __all__ = (
|
|||
)
|
||||
|
||||
|
||||
Path: TypeAlias = str | pathlib.Path
|
||||
Path: TypeAlias = str | os.PathLike[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
|
Загрузка…
Ссылка в новой задаче