зеркало из https://github.com/microsoft/torchgeo.git
Feature/refactor vector root to paths (#1597)
* Make RasterDataset accept list of files * Fix check if str * Use isdir and isfile * Rename root to paths and update type hint * Update children of RasterDataset methods using root * Fix check to cast str to list * Update conf files for RasterDatasets * Add initial suggested test * Add workaround for lists LandCoverAIBase * Add method handle_nonlocal_path for users to override * Raise RuntimeError to support existing tests * Remove reduntand cast to set * Remove required os.exists for paths * Revert "Remove required os.exists for paths" This reverts commit 84bf62b944326c33d5ba8efdcab615c65b124792. * Use arg as potitional argument not kwarg * Improve comments and logs about arg paths * Remove misleading comment * Change type hint of 'paths' to Iterable * Change type hint of 'paths' to Iterable * Remove premature handling of non-local paths * Replace root with paths in docstrings * Add versionadded to list_files docstring * Add versionchanged to docstrings * Update type of paths in childred of Raster * Replace docstring for paths in all raster * Swap root with paths for conf files for raster * Add newline before versionchanged * Revert name to root in conf for ChesapeakeCVPR * Simplify EUDEM tests * paths must be a string if you want autodownload support * Convert list_files to a property * Fix type hints * Test with a real empty directory * Move property `files` up to GeoDataset * Rename root to paths for VectorDataset * Fix mypy * Fix tests * Delete duplicate code * Delete duplicate code * Fix test coverage * Document name change --------- Co-authored-by: Adrian Tofting <adriantofting@mobmob14994.hq.k.grp> Co-authored-by: Adrian Tofting <adrian@vake.ai> Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
This commit is contained in:
Родитель
6ae0d78448
Коммит
3532f78383
|
@ -61,7 +61,7 @@ class TestCanadianBuildingFootprints:
|
||||||
assert isinstance(ds, UnionDataset)
|
assert isinstance(ds, UnionDataset)
|
||||||
|
|
||||||
def test_already_downloaded(self, dataset: CanadianBuildingFootprints) -> None:
|
def test_already_downloaded(self, dataset: CanadianBuildingFootprints) -> None:
|
||||||
CanadianBuildingFootprints(root=dataset.root, download=True)
|
CanadianBuildingFootprints(dataset.paths, download=True)
|
||||||
|
|
||||||
def test_plot(self, dataset: CanadianBuildingFootprints) -> None:
|
def test_plot(self, dataset: CanadianBuildingFootprints) -> None:
|
||||||
query = dataset.bounds
|
query = dataset.bounds
|
||||||
|
|
|
@ -141,7 +141,7 @@ class TestChesapeakeCVPR:
|
||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
ChesapeakeCVPR,
|
ChesapeakeCVPR,
|
||||||
"files",
|
"_files",
|
||||||
["de_1m_2013_extended-debuffered-test_tiles", "spatial_index.geojson"],
|
["de_1m_2013_extended-debuffered-test_tiles", "spatial_index.geojson"],
|
||||||
)
|
)
|
||||||
root = str(tmp_path)
|
root = str(tmp_path)
|
||||||
|
|
|
@ -47,7 +47,7 @@ class TestEnviroAtlas:
|
||||||
)
|
)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
EnviroAtlas,
|
EnviroAtlas,
|
||||||
"files",
|
"_files",
|
||||||
["pittsburgh_pa-2010_1m-train_tiles-debuffered", "spatial_index.geojson"],
|
["pittsburgh_pa-2010_1m-train_tiles-debuffered", "spatial_index.geojson"],
|
||||||
)
|
)
|
||||||
root = str(tmp_path)
|
root = str(tmp_path)
|
||||||
|
|
|
@ -37,7 +37,7 @@ class TestOpenBuildings:
|
||||||
|
|
||||||
monkeypatch.setattr(OpenBuildings, "md5s", md5s)
|
monkeypatch.setattr(OpenBuildings, "md5s", md5s)
|
||||||
transforms = nn.Identity()
|
transforms = nn.Identity()
|
||||||
return OpenBuildings(root=root, transforms=transforms)
|
return OpenBuildings(root, transforms=transforms)
|
||||||
|
|
||||||
def test_no_shapes_to_rasterize(
|
def test_no_shapes_to_rasterize(
|
||||||
self, dataset: OpenBuildings, tmp_path: Path
|
self, dataset: OpenBuildings, tmp_path: Path
|
||||||
|
@ -61,19 +61,19 @@ class TestOpenBuildings:
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
RuntimeError, match="have manually downloaded the dataset as suggested "
|
RuntimeError, match="have manually downloaded the dataset as suggested "
|
||||||
):
|
):
|
||||||
OpenBuildings(root=false_root)
|
OpenBuildings(false_root)
|
||||||
|
|
||||||
def test_corrupted(self, dataset: OpenBuildings, tmp_path: Path) -> None:
|
def test_corrupted(self, dataset: OpenBuildings, tmp_path: Path) -> None:
|
||||||
with open(os.path.join(tmp_path, "000_buildings.csv.gz"), "w") as f:
|
with open(os.path.join(tmp_path, "000_buildings.csv.gz"), "w") as f:
|
||||||
f.write("bad")
|
f.write("bad")
|
||||||
with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
|
with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
|
||||||
OpenBuildings(dataset.root, checksum=True)
|
OpenBuildings(dataset.paths, checksum=True)
|
||||||
|
|
||||||
def test_no_meta_data_found(self, tmp_path: Path) -> None:
|
def test_no_meta_data_found(self, tmp_path: Path) -> None:
|
||||||
false_root = os.path.join(tmp_path, "empty")
|
false_root = os.path.join(tmp_path, "empty")
|
||||||
os.makedirs(false_root)
|
os.makedirs(false_root)
|
||||||
with pytest.raises(FileNotFoundError, match="Meta data file"):
|
with pytest.raises(FileNotFoundError, match="Meta data file"):
|
||||||
OpenBuildings(root=false_root)
|
OpenBuildings(false_root)
|
||||||
|
|
||||||
def test_nothing_in_index(self, dataset: OpenBuildings, tmp_path: Path) -> None:
|
def test_nothing_in_index(self, dataset: OpenBuildings, tmp_path: Path) -> None:
|
||||||
# change meta data to another 'title_url' so that there is no match found
|
# change meta data to another 'title_url' so that there is no match found
|
||||||
|
@ -85,7 +85,7 @@ class TestOpenBuildings:
|
||||||
json.dump(content, f)
|
json.dump(content, f)
|
||||||
|
|
||||||
with pytest.raises(FileNotFoundError, match="data was found in"):
|
with pytest.raises(FileNotFoundError, match="data was found in"):
|
||||||
OpenBuildings(dataset.root)
|
OpenBuildings(dataset.paths)
|
||||||
|
|
||||||
def test_getitem(self, dataset: OpenBuildings) -> None:
|
def test_getitem(self, dataset: OpenBuildings) -> None:
|
||||||
x = dataset[dataset.bounds]
|
x = dataset[dataset.bounds]
|
||||||
|
|
|
@ -4,7 +4,8 @@
|
||||||
"""Canadian Building Footprints dataset."""
|
"""Canadian Building Footprints dataset."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import Any, Callable, Optional
|
from collections.abc import Iterable
|
||||||
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.figure import Figure
|
from matplotlib.figure import Figure
|
||||||
|
@ -60,7 +61,7 @@ class CanadianBuildingFootprints(VectorDataset):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
root: str = "data",
|
paths: Union[str, Iterable[str]] = "data",
|
||||||
crs: Optional[CRS] = None,
|
crs: Optional[CRS] = None,
|
||||||
res: float = 0.00001,
|
res: float = 0.00001,
|
||||||
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
||||||
|
@ -70,7 +71,7 @@ class CanadianBuildingFootprints(VectorDataset):
|
||||||
"""Initialize a new Dataset instance.
|
"""Initialize a new Dataset instance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
root: root directory where dataset can be found
|
paths: one or more root directories to search or files to load
|
||||||
crs: :term:`coordinate reference system (CRS)` to warp to
|
crs: :term:`coordinate reference system (CRS)` to warp to
|
||||||
(defaults to the CRS of the first file found)
|
(defaults to the CRS of the first file found)
|
||||||
res: resolution of the dataset in units of CRS
|
res: resolution of the dataset in units of CRS
|
||||||
|
@ -83,8 +84,11 @@ class CanadianBuildingFootprints(VectorDataset):
|
||||||
FileNotFoundError: if no files are found in ``root``
|
FileNotFoundError: if no files are found in ``root``
|
||||||
RuntimeError: if ``download=False`` and data is not found, or
|
RuntimeError: if ``download=False`` and data is not found, or
|
||||||
``checksum=True`` and checksums don't match
|
``checksum=True`` and checksums don't match
|
||||||
|
|
||||||
|
.. versionchanged:: 0.5
|
||||||
|
*root* was renamed to *paths*.
|
||||||
"""
|
"""
|
||||||
self.root = root
|
self.paths = paths
|
||||||
self.checksum = checksum
|
self.checksum = checksum
|
||||||
|
|
||||||
if download:
|
if download:
|
||||||
|
@ -96,7 +100,7 @@ class CanadianBuildingFootprints(VectorDataset):
|
||||||
+ "You can use download=True to download it"
|
+ "You can use download=True to download it"
|
||||||
)
|
)
|
||||||
|
|
||||||
super().__init__(root, crs, res, transforms)
|
super().__init__(paths, crs, res, transforms)
|
||||||
|
|
||||||
def _check_integrity(self) -> bool:
|
def _check_integrity(self) -> bool:
|
||||||
"""Check integrity of dataset.
|
"""Check integrity of dataset.
|
||||||
|
@ -104,8 +108,9 @@ class CanadianBuildingFootprints(VectorDataset):
|
||||||
Returns:
|
Returns:
|
||||||
True if dataset files are found and/or MD5s match, else False
|
True if dataset files are found and/or MD5s match, else False
|
||||||
"""
|
"""
|
||||||
|
assert isinstance(self.paths, str)
|
||||||
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
||||||
filepath = os.path.join(self.root, prov_terr + ".zip")
|
filepath = os.path.join(self.paths, prov_terr + ".zip")
|
||||||
if not check_integrity(filepath, md5 if self.checksum else None):
|
if not check_integrity(filepath, md5 if self.checksum else None):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
@ -115,11 +120,11 @@ class CanadianBuildingFootprints(VectorDataset):
|
||||||
if self._check_integrity():
|
if self._check_integrity():
|
||||||
print("Files already downloaded and verified")
|
print("Files already downloaded and verified")
|
||||||
return
|
return
|
||||||
|
assert isinstance(self.paths, str)
|
||||||
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
||||||
download_and_extract_archive(
|
download_and_extract_archive(
|
||||||
self.url + prov_terr + ".zip",
|
self.url + prov_terr + ".zip",
|
||||||
self.root,
|
self.paths,
|
||||||
md5=md5 if self.checksum else None,
|
md5=md5 if self.checksum else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -495,7 +495,7 @@ class ChesapeakeCVPR(GeoDataset):
|
||||||
)
|
)
|
||||||
|
|
||||||
# these are used to check the integrity of the dataset
|
# these are used to check the integrity of the dataset
|
||||||
files = [
|
_files = [
|
||||||
"de_1m_2013_extended-debuffered-test_tiles",
|
"de_1m_2013_extended-debuffered-test_tiles",
|
||||||
"de_1m_2013_extended-debuffered-train_tiles",
|
"de_1m_2013_extended-debuffered-train_tiles",
|
||||||
"de_1m_2013_extended-debuffered-val_tiles",
|
"de_1m_2013_extended-debuffered-val_tiles",
|
||||||
|
@ -704,7 +704,7 @@ class ChesapeakeCVPR(GeoDataset):
|
||||||
return os.path.exists(os.path.join(self.root, filename))
|
return os.path.exists(os.path.join(self.root, filename))
|
||||||
|
|
||||||
# Check if the extracted files already exist
|
# Check if the extracted files already exist
|
||||||
if all(map(exists, self.files)):
|
if all(map(exists, self._files)):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check if the zip files have already been downloaded
|
# Check if the zip files have already been downloaded
|
||||||
|
|
|
@ -80,7 +80,7 @@ class EnviroAtlas(GeoDataset):
|
||||||
)
|
)
|
||||||
|
|
||||||
# these are used to check the integrity of the dataset
|
# these are used to check the integrity of the dataset
|
||||||
files = [
|
_files = [
|
||||||
"austin_tx-2012_1m-test_tiles-debuffered",
|
"austin_tx-2012_1m-test_tiles-debuffered",
|
||||||
"austin_tx-2012_1m-val5_tiles-debuffered",
|
"austin_tx-2012_1m-val5_tiles-debuffered",
|
||||||
"durham_nc-2012_1m-test_tiles-debuffered",
|
"durham_nc-2012_1m-test_tiles-debuffered",
|
||||||
|
@ -422,7 +422,7 @@ class EnviroAtlas(GeoDataset):
|
||||||
return os.path.exists(os.path.join(self.root, "enviroatlas_lotp", filename))
|
return os.path.exists(os.path.join(self.root, "enviroatlas_lotp", filename))
|
||||||
|
|
||||||
# Check if the extracted files already exist
|
# Check if the extracted files already exist
|
||||||
if all(map(exists, self.files)):
|
if all(map(exists, self._files)):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check if the zip files have already been downloaded
|
# Check if the zip files have already been downloaded
|
||||||
|
|
|
@ -72,9 +72,17 @@ class GeoDataset(Dataset[dict[str, Any]], abc.ABC):
|
||||||
dataset = landsat7 | landsat8
|
dataset = landsat7 | landsat8
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
paths: Union[str, Iterable[str]]
|
||||||
_crs = CRS.from_epsg(4326)
|
_crs = CRS.from_epsg(4326)
|
||||||
_res = 0.0
|
_res = 0.0
|
||||||
|
|
||||||
|
#: Glob expression used to search for files.
|
||||||
|
#:
|
||||||
|
#: This expression should be specific enough that it will not pick up files from
|
||||||
|
#: other datasets. It should not include a file extension, as the dataset may be in
|
||||||
|
#: a different file format than what it was originally downloaded as.
|
||||||
|
filename_glob = "*"
|
||||||
|
|
||||||
# NOTE: according to the Python docs:
|
# NOTE: according to the Python docs:
|
||||||
#
|
#
|
||||||
# * https://docs.python.org/3/library/exceptions.html#NotImplementedError
|
# * https://docs.python.org/3/library/exceptions.html#NotImplementedError
|
||||||
|
@ -269,17 +277,36 @@ class GeoDataset(Dataset[dict[str, Any]], abc.ABC):
|
||||||
print(f"Converting {self.__class__.__name__} res from {self.res} to {new_res}")
|
print(f"Converting {self.__class__.__name__} res from {self.res} to {new_res}")
|
||||||
self._res = new_res
|
self._res = new_res
|
||||||
|
|
||||||
|
@property
|
||||||
|
def files(self) -> set[str]:
|
||||||
|
"""A list of all files in the dataset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
All files in the dataset.
|
||||||
|
|
||||||
|
.. versionadded:: 0.5
|
||||||
|
"""
|
||||||
|
# Make iterable
|
||||||
|
if isinstance(self.paths, str):
|
||||||
|
paths: Iterable[str] = [self.paths]
|
||||||
|
else:
|
||||||
|
paths = self.paths
|
||||||
|
|
||||||
|
# Using set to remove any duplicates if directories are overlapping
|
||||||
|
files: set[str] = set()
|
||||||
|
for path in paths:
|
||||||
|
if os.path.isdir(path):
|
||||||
|
pathname = os.path.join(path, "**", self.filename_glob)
|
||||||
|
files |= set(glob.iglob(pathname, recursive=True))
|
||||||
|
else:
|
||||||
|
files.add(path)
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
class RasterDataset(GeoDataset):
|
class RasterDataset(GeoDataset):
|
||||||
"""Abstract base class for :class:`GeoDataset` stored as raster files."""
|
"""Abstract base class for :class:`GeoDataset` stored as raster files."""
|
||||||
|
|
||||||
#: Glob expression used to search for files.
|
|
||||||
#:
|
|
||||||
#: This expression should be specific enough that it will not pick up files from
|
|
||||||
#: other datasets. It should not include a file extension, as the dataset may be in
|
|
||||||
#: a different file format than what it was originally downloaded as.
|
|
||||||
filename_glob = "*"
|
|
||||||
|
|
||||||
#: Regular expression used to extract date from filename.
|
#: Regular expression used to extract date from filename.
|
||||||
#:
|
#:
|
||||||
#: The expression should use named groups. The expression may contain any number of
|
#: The expression should use named groups. The expression may contain any number of
|
||||||
|
@ -423,32 +450,6 @@ class RasterDataset(GeoDataset):
|
||||||
self._crs = cast(CRS, crs)
|
self._crs = cast(CRS, crs)
|
||||||
self._res = cast(float, res)
|
self._res = cast(float, res)
|
||||||
|
|
||||||
@property
|
|
||||||
def files(self) -> set[str]:
|
|
||||||
"""A list of all files in the dataset.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
All files in the dataset.
|
|
||||||
|
|
||||||
.. versionadded:: 0.5
|
|
||||||
"""
|
|
||||||
# Make iterable
|
|
||||||
if isinstance(self.paths, str):
|
|
||||||
paths: Iterable[str] = [self.paths]
|
|
||||||
else:
|
|
||||||
paths = self.paths
|
|
||||||
|
|
||||||
# Using set to remove any duplicates if directories are overlapping
|
|
||||||
files: set[str] = set()
|
|
||||||
for path in paths:
|
|
||||||
if os.path.isdir(path):
|
|
||||||
pathname = os.path.join(path, "**", self.filename_glob)
|
|
||||||
files |= set(glob.iglob(pathname, recursive=True))
|
|
||||||
else:
|
|
||||||
files.add(path)
|
|
||||||
|
|
||||||
return files
|
|
||||||
|
|
||||||
def __getitem__(self, query: BoundingBox) -> dict[str, Any]:
|
def __getitem__(self, query: BoundingBox) -> dict[str, Any]:
|
||||||
"""Retrieve image/mask and metadata indexed by query.
|
"""Retrieve image/mask and metadata indexed by query.
|
||||||
|
|
||||||
|
@ -571,16 +572,9 @@ class RasterDataset(GeoDataset):
|
||||||
class VectorDataset(GeoDataset):
|
class VectorDataset(GeoDataset):
|
||||||
"""Abstract base class for :class:`GeoDataset` stored as vector files."""
|
"""Abstract base class for :class:`GeoDataset` stored as vector files."""
|
||||||
|
|
||||||
#: Glob expression used to search for files.
|
|
||||||
#:
|
|
||||||
#: This expression should be specific enough that it will not pick up files from
|
|
||||||
#: other datasets. It should not include a file extension, as the dataset may be in
|
|
||||||
#: a different file format than what it was originally downloaded as.
|
|
||||||
filename_glob = "*"
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
root: str = "data",
|
paths: Union[str, Iterable[str]] = "data",
|
||||||
crs: Optional[CRS] = None,
|
crs: Optional[CRS] = None,
|
||||||
res: float = 0.0001,
|
res: float = 0.0001,
|
||||||
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
||||||
|
@ -589,7 +583,7 @@ class VectorDataset(GeoDataset):
|
||||||
"""Initialize a new Dataset instance.
|
"""Initialize a new Dataset instance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
root: root directory where dataset can be found
|
paths: one or more root directories to search or files to load
|
||||||
crs: :term:`coordinate reference system (CRS)` to warp to
|
crs: :term:`coordinate reference system (CRS)` to warp to
|
||||||
(defaults to the CRS of the first file found)
|
(defaults to the CRS of the first file found)
|
||||||
res: resolution of the dataset in units of CRS
|
res: resolution of the dataset in units of CRS
|
||||||
|
@ -603,16 +597,18 @@ class VectorDataset(GeoDataset):
|
||||||
|
|
||||||
.. versionadded:: 0.4
|
.. versionadded:: 0.4
|
||||||
The *label_name* parameter.
|
The *label_name* parameter.
|
||||||
|
|
||||||
|
.. versionchanged:: 0.5
|
||||||
|
*root* was renamed to *paths*.
|
||||||
"""
|
"""
|
||||||
super().__init__(transforms)
|
super().__init__(transforms)
|
||||||
|
|
||||||
self.root = root
|
self.paths = paths
|
||||||
self.label_name = label_name
|
self.label_name = label_name
|
||||||
|
|
||||||
# Populate the dataset index
|
# Populate the dataset index
|
||||||
i = 0
|
i = 0
|
||||||
pathname = os.path.join(root, "**", self.filename_glob)
|
for filepath in self.files:
|
||||||
for filepath in glob.iglob(pathname, recursive=True):
|
|
||||||
try:
|
try:
|
||||||
with fiona.open(filepath) as src:
|
with fiona.open(filepath) as src:
|
||||||
if crs is None:
|
if crs is None:
|
||||||
|
@ -633,7 +629,7 @@ class VectorDataset(GeoDataset):
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
if i == 0:
|
if i == 0:
|
||||||
msg = f"No {self.__class__.__name__} data was found in `root='{root}'`"
|
msg = f"No {self.__class__.__name__} data was found in `root='{paths}'`"
|
||||||
raise FileNotFoundError(msg)
|
raise FileNotFoundError(msg)
|
||||||
|
|
||||||
self._crs = crs
|
self._crs = crs
|
||||||
|
|
|
@ -7,7 +7,8 @@ import glob
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Callable, Optional, cast
|
from collections.abc import Iterable
|
||||||
|
from typing import Any, Callable, Optional, Union, cast
|
||||||
|
|
||||||
import fiona
|
import fiona
|
||||||
import fiona.transform
|
import fiona.transform
|
||||||
|
@ -205,7 +206,7 @@ class OpenBuildings(VectorDataset):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
root: str = "data",
|
paths: Union[str, Iterable[str]] = "data",
|
||||||
crs: Optional[CRS] = None,
|
crs: Optional[CRS] = None,
|
||||||
res: float = 0.0001,
|
res: float = 0.0001,
|
||||||
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
||||||
|
@ -214,7 +215,7 @@ class OpenBuildings(VectorDataset):
|
||||||
"""Initialize a new Dataset instance.
|
"""Initialize a new Dataset instance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
root: root directory where dataset can be found
|
paths: one or more root directories to search or files to load
|
||||||
crs: :term:`coordinate reference system (CRS)` to warp to
|
crs: :term:`coordinate reference system (CRS)` to warp to
|
||||||
(defaults to the CRS of the first file found)
|
(defaults to the CRS of the first file found)
|
||||||
res: resolution of the dataset in units of CRS
|
res: resolution of the dataset in units of CRS
|
||||||
|
@ -224,11 +225,13 @@ class OpenBuildings(VectorDataset):
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
FileNotFoundError: if no files are found in ``root``
|
FileNotFoundError: if no files are found in ``root``
|
||||||
|
|
||||||
|
.. versionchanged:: 0.5
|
||||||
|
*root* was renamed to *paths*.
|
||||||
"""
|
"""
|
||||||
self.root = root
|
self.paths = paths
|
||||||
self.res = res
|
self.res = res
|
||||||
self.checksum = checksum
|
self.checksum = checksum
|
||||||
self.root = root
|
|
||||||
self.res = res
|
self.res = res
|
||||||
self.transforms = transforms
|
self.transforms = transforms
|
||||||
|
|
||||||
|
@ -237,7 +240,8 @@ class OpenBuildings(VectorDataset):
|
||||||
# Create an R-tree to index the dataset using the polygon centroid as bounds
|
# Create an R-tree to index the dataset using the polygon centroid as bounds
|
||||||
self.index = Index(interleaved=False, properties=Property(dimension=3))
|
self.index = Index(interleaved=False, properties=Property(dimension=3))
|
||||||
|
|
||||||
with open(os.path.join(root, "tiles.geojson")) as f:
|
assert isinstance(self.paths, str)
|
||||||
|
with open(os.path.join(self.paths, "tiles.geojson")) as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
features = data["features"]
|
features = data["features"]
|
||||||
|
@ -245,7 +249,7 @@ class OpenBuildings(VectorDataset):
|
||||||
feature["properties"]["tile_url"].split("/")[-1] for feature in features
|
feature["properties"]["tile_url"].split("/")[-1] for feature in features
|
||||||
] # get csv filename
|
] # get csv filename
|
||||||
|
|
||||||
polygon_files = glob.glob(os.path.join(self.root, self.zipfile_glob))
|
polygon_files = glob.glob(os.path.join(self.paths, self.zipfile_glob))
|
||||||
polygon_filenames = [f.split(os.sep)[-1] for f in polygon_files]
|
polygon_filenames = [f.split(os.sep)[-1] for f in polygon_files]
|
||||||
|
|
||||||
matched_features = [
|
matched_features = [
|
||||||
|
@ -274,14 +278,14 @@ class OpenBuildings(VectorDataset):
|
||||||
coords = (minx, maxx, miny, maxy, mint, maxt)
|
coords = (minx, maxx, miny, maxy, mint, maxt)
|
||||||
|
|
||||||
filepath = os.path.join(
|
filepath = os.path.join(
|
||||||
self.root, feature["properties"]["tile_url"].split("/")[-1]
|
self.paths, feature["properties"]["tile_url"].split("/")[-1]
|
||||||
)
|
)
|
||||||
self.index.insert(i, coords, filepath)
|
self.index.insert(i, coords, filepath)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
if i == 0:
|
if i == 0:
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"No {self.__class__.__name__} data was found in '{self.root}'"
|
f"No {self.__class__.__name__} data was found in '{self.paths}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
self._crs = crs
|
self._crs = crs
|
||||||
|
@ -398,7 +402,8 @@ class OpenBuildings(VectorDataset):
|
||||||
FileNotFoundError: if metadata file is not found in root
|
FileNotFoundError: if metadata file is not found in root
|
||||||
"""
|
"""
|
||||||
# Check if the zip files have already been downloaded and checksum
|
# Check if the zip files have already been downloaded and checksum
|
||||||
pathname = os.path.join(self.root, self.zipfile_glob)
|
assert isinstance(self.paths, str)
|
||||||
|
pathname = os.path.join(self.paths, self.zipfile_glob)
|
||||||
i = 0
|
i = 0
|
||||||
for zipfile in glob.iglob(pathname):
|
for zipfile in glob.iglob(pathname):
|
||||||
filename = os.path.basename(zipfile)
|
filename = os.path.basename(zipfile)
|
||||||
|
@ -410,14 +415,14 @@ class OpenBuildings(VectorDataset):
|
||||||
return
|
return
|
||||||
|
|
||||||
# check if the metadata file has been downloaded
|
# check if the metadata file has been downloaded
|
||||||
if not os.path.exists(os.path.join(self.root, self.meta_data_filename)):
|
if not os.path.exists(os.path.join(self.paths, self.meta_data_filename)):
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"Meta data file {self.meta_data_filename} "
|
f"Meta data file {self.meta_data_filename} "
|
||||||
f"not found in in `root={self.root}`."
|
f"not found in in `root={self.paths}`."
|
||||||
)
|
)
|
||||||
|
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Dataset not found in `root={self.root}` "
|
f"Dataset not found in `root={self.paths}` "
|
||||||
"either specify a different `root` directory or make sure you "
|
"either specify a different `root` directory or make sure you "
|
||||||
"have manually downloaded the dataset as suggested in the documentation."
|
"have manually downloaded the dataset as suggested in the documentation."
|
||||||
)
|
)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче