зеркало из https://github.com/microsoft/torchgeo.git
Add CanadianBuildingFootprints dataset
This commit is contained in:
Родитель
b5db261dce
Коммит
54d42b9022
|
@ -10,6 +10,11 @@ Geospatial Datasets
|
||||||
|
|
||||||
:class:`GeoDataset` is designed for datasets that contain geospatial information, like latitude, longitude, coordinate system, and projection. Datasets containing this kind of information can be combined using :class:`ZipDataset`.
|
:class:`GeoDataset` is designed for datasets that contain geospatial information, like latitude, longitude, coordinate system, and projection. Datasets containing this kind of information can be combined using :class:`ZipDataset`.
|
||||||
|
|
||||||
|
Canadian Building Footprints
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. autoclass:: CanadianBuildingFootprints
|
||||||
|
|
||||||
Chesapeake Bay High-Resolution Land Cover Project
|
Chesapeake Bay High-Resolution Land Cover Project
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@ dependencies:
|
||||||
- pip:
|
- pip:
|
||||||
- affine
|
- affine
|
||||||
- black[colorama]>=21b
|
- black[colorama]>=21b
|
||||||
|
- fiona
|
||||||
- flake8
|
- flake8
|
||||||
- isort[colors]>=4.3.5
|
- isort[colors]>=4.3.5
|
||||||
- mypy>=0.900
|
- mypy>=0.900
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
affine
|
affine
|
||||||
black[colorama]>=21b
|
black[colorama]>=21b
|
||||||
|
fiona
|
||||||
flake8
|
flake8
|
||||||
h5py
|
h5py
|
||||||
isort[colors]>=4.3.5
|
isort[colors]>=4.3.5
|
||||||
|
|
|
@ -26,6 +26,7 @@ setup_requires =
|
||||||
setuptools>=42
|
setuptools>=42
|
||||||
install_requires =
|
install_requires =
|
||||||
affine
|
affine
|
||||||
|
fiona
|
||||||
matplotlib
|
matplotlib
|
||||||
numpy
|
numpy
|
||||||
pillow
|
pillow
|
||||||
|
|
|
@ -5,6 +5,7 @@ spack:
|
||||||
- "python@3.7:+bz2"
|
- "python@3.7:+bz2"
|
||||||
- py-affine
|
- py-affine
|
||||||
- "py-black@21:+colorama"
|
- "py-black@21:+colorama"
|
||||||
|
- py-fiona
|
||||||
- py-flake8
|
- py-flake8
|
||||||
- py-h5py
|
- py-h5py
|
||||||
- "py-isort@4.3.5:+colors"
|
- "py-isort@4.3.5:+colors"
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
"""TorchGeo datasets."""
|
"""TorchGeo datasets."""
|
||||||
|
|
||||||
from .benin_cashews import BeninSmallHolderCashews
|
from .benin_cashews import BeninSmallHolderCashews
|
||||||
|
from .cbf import CanadianBuildingFootprints
|
||||||
from .cdl import CDL
|
from .cdl import CDL
|
||||||
from .chesapeake import (
|
from .chesapeake import (
|
||||||
Chesapeake,
|
Chesapeake,
|
||||||
|
@ -42,6 +43,7 @@ from .utils import BoundingBox, collate_dict
|
||||||
__all__ = (
|
__all__ = (
|
||||||
"BeninSmallHolderCashews",
|
"BeninSmallHolderCashews",
|
||||||
"BoundingBox",
|
"BoundingBox",
|
||||||
|
"CanadianBuildingFootprints",
|
||||||
"CDL",
|
"CDL",
|
||||||
"collate_dict",
|
"collate_dict",
|
||||||
"Chesapeake",
|
"Chesapeake",
|
||||||
|
|
|
@ -0,0 +1,207 @@
|
||||||
|
"""Canadian Building Footprints dataset."""
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Any, Callable, Dict, Optional
|
||||||
|
|
||||||
|
import fiona
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import torch
|
||||||
|
from fiona.transform import transform, transform_geom
|
||||||
|
from rasterio.crs import CRS
|
||||||
|
from rasterio.features import rasterize
|
||||||
|
from rtree.index import Index, Property
|
||||||
|
from torch import Tensor
|
||||||
|
|
||||||
|
from .geo import GeoDataset
|
||||||
|
from .utils import BoundingBox, check_integrity, download_and_extract_archive
|
||||||
|
|
||||||
|
_crs = CRS.from_epsg(4326)
|
||||||
|
|
||||||
|
|
||||||
|
class CanadianBuildingFootprints(GeoDataset):
|
||||||
|
"""Canadian Building Footprints dataset.
|
||||||
|
|
||||||
|
The `Canadian Building Footprints
|
||||||
|
<https://github.com/Microsoft/CanadianBuildingFootprints>`_ dataset contains
|
||||||
|
11,842,186 computer generated building footprints in all Canadian provinces and
|
||||||
|
territories in GeoJSON format. This data is freely available for download and use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: how does one cite this dataset?
|
||||||
|
# https://github.com/microsoft/CanadianBuildingFootprints/issues/11
|
||||||
|
|
||||||
|
url = "https://usbuildingdata.blob.core.windows.net/canadian-buildings-v2/"
|
||||||
|
provinces_territories = [
|
||||||
|
"Alberta",
|
||||||
|
"BritishColumbia",
|
||||||
|
"Manitoba",
|
||||||
|
"NewBrunswick",
|
||||||
|
"NewfoundlandAndLabrador",
|
||||||
|
"NorthwestTerritories",
|
||||||
|
"NovaScotia",
|
||||||
|
"Nunavut",
|
||||||
|
"Ontario",
|
||||||
|
"PrinceEdwardIsland",
|
||||||
|
"Quebec",
|
||||||
|
"Saskatchewan",
|
||||||
|
"YukonTerritory",
|
||||||
|
]
|
||||||
|
md5s = [
|
||||||
|
"8b4190424e57bb0902bd8ecb95a9235b",
|
||||||
|
"fea05d6eb0006710729c675de63db839",
|
||||||
|
"adf11187362624d68f9c69aaa693c46f",
|
||||||
|
"44269d4ec89521735389ef9752ee8642",
|
||||||
|
"65dd92b1f3f5f7222ae5edfad616d266",
|
||||||
|
"346d70a682b95b451b81b47f660fd0e2",
|
||||||
|
"bd57cb1a7822d72610215fca20a12602",
|
||||||
|
"c1f29b73cdff9a6a9dd7d086b31ef2cf",
|
||||||
|
"76ba4b7059c5717989ce34977cad42b2",
|
||||||
|
"2e4a3fa47b3558503e61572c59ac5963",
|
||||||
|
"9ff4417ae00354d39a0cf193c8df592c",
|
||||||
|
"a51078d8e60082c7d3a3818240da6dd5",
|
||||||
|
"c11f3bd914ecabd7cac2cb2871ec0261",
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
root: str = "data",
|
||||||
|
crs: CRS = _crs,
|
||||||
|
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
|
||||||
|
download: bool = False,
|
||||||
|
checksum: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize a new Canadian Building Footprints dataset.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
root: root directory where dataset can be found
|
||||||
|
crs: :term:`coordinate reference system (CRS)` to project to
|
||||||
|
transforms: a function/transform that takes input sample and its target as
|
||||||
|
entry and returns a transformed version
|
||||||
|
download: if True, download dataset and store it in the root directory
|
||||||
|
checksum: if True, check the MD5 of the downloaded files (may be slow)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: if ``download=False`` and data is not found, or
|
||||||
|
``checksum=True`` and checksums don't match
|
||||||
|
"""
|
||||||
|
self.root = root
|
||||||
|
self.crs = crs
|
||||||
|
self.transforms = transforms
|
||||||
|
self.checksum = checksum
|
||||||
|
|
||||||
|
if download:
|
||||||
|
self._download()
|
||||||
|
|
||||||
|
if not self._check_integrity():
|
||||||
|
raise RuntimeError(
|
||||||
|
"Dataset not found or corrupted. "
|
||||||
|
+ "You can use download=True to download it"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an R-tree to index the dataset
|
||||||
|
self.index = Index(interleaved=False, properties=Property(dimension=3))
|
||||||
|
fileglob = os.path.join(root, "**.geojson")
|
||||||
|
for i, filename in enumerate(glob.iglob(fileglob, recursive=True)):
|
||||||
|
with fiona.open(filename) as src:
|
||||||
|
minx, miny, maxx, maxy = src.bounds
|
||||||
|
(minx, maxx), (miny, maxy) = transform(
|
||||||
|
src.crs, crs.to_dict(), [minx, maxx], [miny, maxy]
|
||||||
|
)
|
||||||
|
mint = 0
|
||||||
|
maxt = sys.maxsize
|
||||||
|
coords = (minx, maxx, miny, maxy, mint, maxt)
|
||||||
|
self.index.insert(i, coords, filename)
|
||||||
|
|
||||||
|
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
|
||||||
|
"""Retrieve image and metadata indexed by query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
sample of labels and metadata at that index
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
IndexError: if query is not within bounds of the index
|
||||||
|
"""
|
||||||
|
if not query.intersects(self.bounds):
|
||||||
|
raise IndexError(
|
||||||
|
f"query: {query} is not within bounds of the index: {self.bounds}"
|
||||||
|
)
|
||||||
|
|
||||||
|
hits = self.index.intersection(query, objects=True)
|
||||||
|
filename = next(hits).object # TODO: this assumes there is only a single hit
|
||||||
|
shapes = []
|
||||||
|
with fiona.open(filename) as src:
|
||||||
|
# We need to know the bounding box of the query in the source CRS
|
||||||
|
(minx, maxx), (miny, maxy) = transform(
|
||||||
|
self.crs.to_dict(),
|
||||||
|
src.crs,
|
||||||
|
[query.minx, query.maxx],
|
||||||
|
[query.miny, query.maxy],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter geometries to those that intersect with the bounding box
|
||||||
|
for feature in src.filter((minx, miny, maxx, maxy)):
|
||||||
|
# Warp geometries to requested CRS
|
||||||
|
shape = transform_geom(src.crs, self.crs.to_dict(), feature["geometry"])
|
||||||
|
shapes.append(shape)
|
||||||
|
|
||||||
|
# Rasterize geometries
|
||||||
|
masks = rasterize(shapes)
|
||||||
|
|
||||||
|
# Clip to bounding box
|
||||||
|
# TODO: how to do this without creating a new dataset?
|
||||||
|
|
||||||
|
sample = {
|
||||||
|
"masks": torch.tensor(masks), # type: ignore[attr-defined]
|
||||||
|
"crs": self.crs,
|
||||||
|
"bbox": query,
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.transforms is not None:
|
||||||
|
sample = self.transforms(sample)
|
||||||
|
|
||||||
|
return sample
|
||||||
|
|
||||||
|
def _check_integrity(self) -> bool:
|
||||||
|
"""Check integrity of dataset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if dataset files are found and/or MD5s match, else False
|
||||||
|
"""
|
||||||
|
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
||||||
|
filepath = os.path.join(self.root, prov_terr + ".zip")
|
||||||
|
if not check_integrity(filepath, md5 if self.checksum else None):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _download(self) -> None:
|
||||||
|
"""Download the dataset and extract it."""
|
||||||
|
if self._check_integrity():
|
||||||
|
print("Files already downloaded and verified")
|
||||||
|
return
|
||||||
|
|
||||||
|
for prov_terr, md5 in zip(self.provinces_territories, self.md5s):
|
||||||
|
download_and_extract_archive(
|
||||||
|
self.url + prov_terr + ".zip",
|
||||||
|
self.root,
|
||||||
|
md5=md5 if self.checksum else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
def plot(self, image: Tensor) -> None:
|
||||||
|
"""Plot an image on a map.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image: the image to plot
|
||||||
|
"""
|
||||||
|
array = image.squeeze().numpy()
|
||||||
|
|
||||||
|
# Plot the image
|
||||||
|
ax = plt.axes()
|
||||||
|
ax.imshow(array)
|
||||||
|
ax.axis("off")
|
||||||
|
plt.show()
|
||||||
|
plt.close()
|
Загрузка…
Ссылка в новой задаче