зеркало из https://github.com/microsoft/torchgeo.git
Formatting
This commit is contained in:
Родитель
b8ba2ebc2b
Коммит
04355ecc2f
|
@ -13,7 +13,7 @@ dependencies:
|
|||
- pytorch-gpu>=1.7
|
||||
- rarfile>=3
|
||||
- rasterio>=1.0.16
|
||||
- shapely>1.3.0
|
||||
- shapely>=1.3.0
|
||||
- torchvision>=0.3
|
||||
- pip:
|
||||
- black>=21.4b0
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import math
|
||||
|
||||
import pyproj
|
||||
import shapely.ops
|
||||
|
||||
|
||||
def test_crs_with_pyproj() -> None:
|
||||
src_crs = pyproj.CRS("epsg:4326")
|
||||
dst_crs = pyproj.CRS(src_crs)
|
||||
|
||||
project = pyproj.Transformer.from_crs(src_crs, dst_crs, always_xy=True).transform
|
||||
|
||||
geom = {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[-125.068359375, 45.920587344733654],
|
||||
[-116.56494140625001, 45.920587344733654],
|
||||
[-116.56494140625001, 49.095452162534826],
|
||||
[-125.068359375, 49.095452162534826],
|
||||
[-125.068359375, 45.920587344733654],
|
||||
]
|
||||
],
|
||||
}
|
||||
geom_transformed = shapely.ops.transform(project, shapely.geometry.shape(geom))
|
||||
|
||||
bounds = geom_transformed.bounds
|
||||
expected_bounds = (
|
||||
-125.068359375,
|
||||
45.920587344733654,
|
||||
-116.56494140625001,
|
||||
49.095452162534826,
|
||||
)
|
||||
|
||||
for i in range(4):
|
||||
assert math.isclose(bounds[i], expected_bounds[i])
|
|
@ -10,6 +10,7 @@ from .chesapeake import (
|
|||
Chesapeake,
|
||||
Chesapeake7,
|
||||
Chesapeake13,
|
||||
ChesapeakeCVPR,
|
||||
ChesapeakeDC,
|
||||
ChesapeakeDE,
|
||||
ChesapeakeMD,
|
||||
|
@ -20,7 +21,6 @@ from .chesapeake import (
|
|||
)
|
||||
from .cowc import COWC, COWCCounting, COWCDetection
|
||||
from .cv4a_kenya_crop_type import CV4AKenyaCropType
|
||||
from .cvpr_chesapeake import CVPRChesapeake
|
||||
from .cyclone import TropicalCycloneWindEstimation
|
||||
from .geo import GeoDataset, RasterDataset, VectorDataset, VisionDataset, ZipDataset
|
||||
from .landcoverai import LandCoverAI
|
||||
|
@ -58,7 +58,7 @@ __all__ = (
|
|||
"ChesapeakePA",
|
||||
"ChesapeakeVA",
|
||||
"ChesapeakeWV",
|
||||
"CVPRChesapeake",
|
||||
"ChesapeakeCVPR",
|
||||
"Landsat",
|
||||
"Landsat1",
|
||||
"Landsat2",
|
||||
|
|
|
@ -1,16 +1,23 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""Chesapeake Bay High-Resolution Land Cover Project dataset."""
|
||||
"""Chesapeake Bay High-Resolution Land Cover Project datasets."""
|
||||
|
||||
import abc
|
||||
import os
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import fiona
|
||||
import pyproj
|
||||
import rasterio
|
||||
import rasterio.mask
|
||||
import shapely.geometry
|
||||
import shapely.ops
|
||||
from rasterio.crs import CRS
|
||||
|
||||
from .geo import RasterDataset
|
||||
from .utils import check_integrity, download_and_extract_archive
|
||||
from .geo import GeoDataset, RasterDataset
|
||||
from .utils import BoundingBox, check_integrity, download_and_extract_archive
|
||||
|
||||
|
||||
class Chesapeake(RasterDataset, abc.ABC):
|
||||
|
@ -262,3 +269,205 @@ class ChesapeakeWV(Chesapeake):
|
|||
filename = "WV_STATEWIDE.tif"
|
||||
zipfile = "_WV_STATEWIDE.zip"
|
||||
md5 = "350621ea293651fbc557a1c3e3c64cc3"
|
||||
|
||||
|
||||
class ChesapeakeCVPR(GeoDataset):
|
||||
"""CVPR 2019 Chesapeake Land Cover dataset.
|
||||
|
||||
The `CVPR 2019 Chesapeake Land Cover
|
||||
<https://lila.science/datasets/chesapeakelandcover>`_ dataset contains two layers of
|
||||
NAIP aerial imagery, Landsat 8 leaf-on and leaf-off imagery, Chesapeake Bay land
|
||||
cover labels, NLCD land cover labels, and Microsoft building footprint labels.
|
||||
|
||||
This dataset was organized to accompany the 2019 CVPR paper, "Large Scale
|
||||
High-Resolution Land Cover Mapping with Multi-Resolution Data".
|
||||
|
||||
If you use this dataset in your research, please cite the following paper:
|
||||
|
||||
* https://doi.org/10.1109/cvpr.2019.01301
|
||||
"""
|
||||
|
||||
url = "https://lilablobssc.blob.core.windows.net/lcmcvpr2019/cvpr_chesapeake_landcover.zip" # noqa: E501
|
||||
filename = "cvpr_chesapeake_landcover.zip"
|
||||
md5 = "0ea5e7cb861be3fb8a06fedaaaf91af9"
|
||||
|
||||
valid_layers = [
|
||||
"naip-new",
|
||||
"naip-old",
|
||||
"landsat-leaf-on",
|
||||
"landsat-leaf-off",
|
||||
"nlcd",
|
||||
"lc",
|
||||
"buildings",
|
||||
]
|
||||
states = ["de", "md", "va", "wv", "pa", "ny"]
|
||||
splits = (
|
||||
[f"{state}-train" for state in states]
|
||||
+ [f"{state}-val" for state in states]
|
||||
+ [f"{state}-test" for state in states]
|
||||
)
|
||||
|
||||
p_src_crs = pyproj.CRS("epsg:3857")
|
||||
p_transformers = {
|
||||
"epsg:26917": pyproj.Transformer.from_crs(
|
||||
p_src_crs, pyproj.CRS("epsg:26917"), always_xy=True
|
||||
).transform,
|
||||
"epsg:26918": pyproj.Transformer.from_crs(
|
||||
p_src_crs, pyproj.CRS("epsg:26918"), always_xy=True
|
||||
).transform,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: str = "data",
|
||||
split: str = "de-train",
|
||||
layers: List[str] = ["naip-new", "lc"],
|
||||
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
|
||||
cache: bool = True,
|
||||
download: bool = False,
|
||||
checksum: bool = False,
|
||||
) -> None:
|
||||
"""Initialize a new Dataset instance.
|
||||
|
||||
Args:
|
||||
root: root directory where dataset can be found
|
||||
split: a string in the format "{state}-{train,val,test}" indicating the
|
||||
subset of data to use, for example "ny-train"
|
||||
layers: a list containing a subset of "naip-new", "naip-old", "lc", "nlcd",
|
||||
"landsat-leaf-on", "landsat-leaf-off", "buildings" indicating which
|
||||
layers to load
|
||||
transforms: a function/transform that takes an input sample
|
||||
and returns a transformed version
|
||||
cache: if True, cache file handle to speed up repeated sampling
|
||||
download: if True, download dataset and store it in the root directory
|
||||
checksum: if True, check the MD5 of the downloaded files (may be slow)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: if no files are found in ``root``
|
||||
RuntimeError: if ``download=False`` but dataset is missing or checksum fails
|
||||
"""
|
||||
assert split in self.splits
|
||||
assert all([layer in self.valid_layers for layer in layers])
|
||||
super().__init__(transforms) # creates self.index and self.transform
|
||||
self.root = root
|
||||
self.layers = layers
|
||||
self.cache = cache
|
||||
self.checksum = checksum
|
||||
|
||||
if download:
|
||||
self._download()
|
||||
|
||||
if not self._check_integrity():
|
||||
raise RuntimeError(
|
||||
"Dataset not found or corrupted. "
|
||||
+ "You can use download=True to download it"
|
||||
)
|
||||
|
||||
# Add all tiles into the index in epsg:3857 based on the included geojson
|
||||
mint: float = 0
|
||||
maxt: float = sys.maxsize
|
||||
with fiona.open(os.path.join(root, "spatial_index.geojson"), "r") as f:
|
||||
for i, row in enumerate(f):
|
||||
if row["properties"]["split"] == split:
|
||||
box = shapely.geometry.shape(row["geometry"])
|
||||
minx, miny, maxx, maxy = box.bounds
|
||||
coords = (minx, maxx, miny, maxy, mint, maxt)
|
||||
self.index.insert(
|
||||
i,
|
||||
coords,
|
||||
{
|
||||
"naip-new": row["properties"]["naip-new"],
|
||||
"naip-old": row["properties"]["naip-old"],
|
||||
"landsat-leaf-on": row["properties"]["landsat-leaf-on"],
|
||||
"landsat-leaf-off": row["properties"]["landsat-leaf-off"],
|
||||
"lc": row["properties"]["lc"],
|
||||
"nlcd": row["properties"]["nlcd"],
|
||||
"buildings": row["properties"]["buildings"],
|
||||
},
|
||||
)
|
||||
|
||||
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
|
||||
"""Retrieve image/mask and metadata indexed by query.
|
||||
|
||||
Args:
|
||||
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
|
||||
|
||||
Returns:
|
||||
sample of image/mask and metadata at that index
|
||||
|
||||
Raises:
|
||||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(query, objects=True)
|
||||
filepaths = [hit.object for hit in hits]
|
||||
|
||||
sample = {
|
||||
"crs": self.crs,
|
||||
"bbox": query,
|
||||
}
|
||||
|
||||
if len(filepaths) == 0:
|
||||
raise IndexError(
|
||||
f"query: {query} not found in index with bounds: {self.bounds}"
|
||||
)
|
||||
elif len(filepaths) == 1:
|
||||
filenames = filepaths[0]
|
||||
query_geom_transformed = None # is set by the first layer
|
||||
|
||||
minx, maxx, miny, maxy, mint, maxt = query
|
||||
query_box = shapely.geometry.box(minx, miny, maxx, maxy)
|
||||
|
||||
for layer in self.layers:
|
||||
|
||||
fn = filenames[layer]
|
||||
|
||||
with rasterio.open(os.path.join(self.root, fn)) as f:
|
||||
dst_crs = f.crs.to_string().lower()
|
||||
|
||||
if query_geom_transformed is None:
|
||||
query_box_transformed = shapely.ops.transform(
|
||||
self.p_transformers[dst_crs], query_box
|
||||
).envelope
|
||||
query_geom_transformed = shapely.geometry.mapping(
|
||||
query_box_transformed
|
||||
)
|
||||
|
||||
data, _ = rasterio.mask.mask(
|
||||
f, [query_geom_transformed], crop=True, all_touched=True
|
||||
)
|
||||
|
||||
sample[layer] = data.squeeze()
|
||||
|
||||
else:
|
||||
raise IndexError(f"query: {query} spans multiple tiles which is not valid")
|
||||
|
||||
if self.transforms is not None:
|
||||
sample = self.transforms(sample)
|
||||
|
||||
return sample
|
||||
|
||||
def _check_integrity(self) -> bool:
|
||||
"""Check integrity of dataset.
|
||||
|
||||
Returns:
|
||||
True if dataset files are found and/or MD5s match, else False
|
||||
"""
|
||||
integrity: bool = check_integrity(
|
||||
os.path.join(self.root, self.filename),
|
||||
self.md5 if self.checksum else None,
|
||||
)
|
||||
|
||||
return integrity
|
||||
|
||||
def _download(self) -> None:
|
||||
"""Download the dataset and extract it."""
|
||||
if self._check_integrity():
|
||||
print("Files already downloaded and verified")
|
||||
return
|
||||
|
||||
download_and_extract_archive(
|
||||
self.url,
|
||||
self.root,
|
||||
filename=self.filename,
|
||||
md5=self.md5,
|
||||
)
|
||||
|
|
|
@ -1,224 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""CVPR 2019 Chesapeake Land Cover dataset."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import fiona
|
||||
import pyproj
|
||||
import rasterio
|
||||
import rasterio.mask
|
||||
import shapely.geometry
|
||||
import shapely.ops
|
||||
from rasterio.crs import CRS
|
||||
|
||||
from .geo import GeoDataset
|
||||
from .utils import BoundingBox, check_integrity, download_and_extract_archive
|
||||
|
||||
|
||||
class CVPRChesapeake(GeoDataset):
|
||||
"""CVPR 2019 Chesapeake Land Cover dataset.
|
||||
|
||||
The `CVPR 2019 Chesapeake Land Cover
|
||||
<https://lila.science/datasets/chesapeakelandcover>`_ dataset contains two layers of
|
||||
NAIP aerial imagery, Landsat 8 leaf-on and leaf-off imagery, Chesapeake Bay land
|
||||
cover labels, NLCD land cover labels, and Microsoft building footprint labels.
|
||||
|
||||
This dataset was organized to accompany the 2019 CVPR paper, "Large Scale
|
||||
High-Resolution Land Cover Mapping with Multi-Resolution Data".
|
||||
|
||||
If you use this dataset in your research, please cite the following paper:
|
||||
|
||||
* https://doi.org/10.1109/cvpr.2019.01301
|
||||
"""
|
||||
|
||||
url = "https://lilablobssc.blob.core.windows.net/lcmcvpr2019/cvpr_chesapeake_landcover.zip" # noqa: E501
|
||||
filename = "cvpr_chesapeake_landcover.zip"
|
||||
md5 = "0ea5e7cb861be3fb8a06fedaaaf91af9"
|
||||
|
||||
crs = CRS.from_epsg(3857)
|
||||
res = 1
|
||||
|
||||
valid_layers = [
|
||||
"naip-new",
|
||||
"naip-old",
|
||||
"landsat-leaf-on",
|
||||
"landsat-leaf-off",
|
||||
"nlcd",
|
||||
"lc",
|
||||
"buildings",
|
||||
]
|
||||
states = ["de", "md", "va", "wv", "pa", "ny"]
|
||||
splits = (
|
||||
[f"{state}-train" for state in states]
|
||||
+ [f"{state}-val" for state in states]
|
||||
+ [f"{state}-test" for state in states]
|
||||
)
|
||||
|
||||
p_src_crs = pyproj.CRS("epsg:3857")
|
||||
p_transformers = {
|
||||
"epsg:26917": pyproj.Transformer.from_crs(
|
||||
p_src_crs, pyproj.CRS("epsg:26917"), always_xy=True
|
||||
).transform,
|
||||
"epsg:26918": pyproj.Transformer.from_crs(
|
||||
p_src_crs, pyproj.CRS("epsg:26918"), always_xy=True
|
||||
).transform,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: str = "data",
|
||||
split: str = "de-train",
|
||||
layers: List[str] = ["naip-new", "lc"],
|
||||
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
|
||||
cache: bool = True,
|
||||
download: bool = False,
|
||||
checksum: bool = False,
|
||||
) -> None:
|
||||
"""Initialize a new Dataset instance.
|
||||
|
||||
Args:
|
||||
root: root directory where dataset can be found
|
||||
split: a string in the format "{state}-{train,val,test}" indicating the
|
||||
subset of data to use
|
||||
layers: a list containing a subset of "naip-new", "naip-old", "lc", "nlcd",
|
||||
"landsat-leaf-on", "landsat-leaf-off", "buildings" indicating which
|
||||
layers to load
|
||||
transforms: a function/transform that takes an input sample
|
||||
and returns a transformed version
|
||||
cache: if True, cache file handle to speed up repeated sampling
|
||||
download: if True, download dataset and store it in the root directory
|
||||
checksum: if True, check the MD5 of the downloaded files (may be slow)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: if no files are found in ``root``
|
||||
RuntimeError: if ``download=False`` but dataset is missing or checksum fails
|
||||
"""
|
||||
assert split in self.splits
|
||||
assert all([layer in self.valid_layers for layer in layers])
|
||||
super().__init__(transforms) # creates self.index and self.transform
|
||||
self.root = root
|
||||
self.layers = layers
|
||||
self.cache = cache
|
||||
self.checksum = checksum
|
||||
|
||||
if download:
|
||||
self._download()
|
||||
|
||||
if not self._check_integrity():
|
||||
raise RuntimeError(
|
||||
"Dataset not found or corrupted. "
|
||||
+ "You can use download=True to download it"
|
||||
)
|
||||
|
||||
# Add all tiles into the index in epsg:3857 based on the included geojson
|
||||
mint: float = 0
|
||||
maxt: float = sys.maxsize
|
||||
with fiona.open(os.path.join(root, "spatial_index.geojson"), "r") as f:
|
||||
for i, row in enumerate(f):
|
||||
if row["properties"]["split"] == split:
|
||||
box = shapely.geometry.shape(row["geometry"])
|
||||
minx, miny, maxx, maxy = box.bounds
|
||||
coords = (minx, maxx, miny, maxy, mint, maxt)
|
||||
self.index.insert(
|
||||
i,
|
||||
coords,
|
||||
{
|
||||
"naip-new": row["properties"]["naip-new"],
|
||||
"naip-old": row["properties"]["naip-old"],
|
||||
"landsat-leaf-on": row["properties"]["landsat-leaf-on"],
|
||||
"landsat-leaf-off": row["properties"]["landsat-leaf-off"],
|
||||
"lc": row["properties"]["lc"],
|
||||
"nlcd": row["properties"]["nlcd"],
|
||||
"buildings": row["properties"]["buildings"],
|
||||
},
|
||||
)
|
||||
|
||||
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
|
||||
"""Retrieve image/mask and metadata indexed by query.
|
||||
|
||||
Args:
|
||||
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
|
||||
|
||||
Returns:
|
||||
sample of image/mask and metadata at that index
|
||||
|
||||
Raises:
|
||||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(query, objects=True)
|
||||
filepaths = [hit.object for hit in hits]
|
||||
|
||||
sample = {
|
||||
"crs": self.crs,
|
||||
"bbox": query,
|
||||
}
|
||||
|
||||
if len(filepaths) == 0:
|
||||
raise IndexError(
|
||||
f"query: {query} not found in index with bounds: {self.bounds}"
|
||||
)
|
||||
elif len(filepaths) == 1:
|
||||
filenames = filepaths[0]
|
||||
query_geom_transformed = None # is set by the first layer
|
||||
|
||||
minx, maxx, miny, maxy, mint, maxt = query
|
||||
query_box = shapely.geometry.box(minx, miny, maxx, maxy)
|
||||
|
||||
for layer in self.layers:
|
||||
|
||||
fn = filenames[layer]
|
||||
|
||||
with rasterio.open(os.path.join(self.root, fn)) as f:
|
||||
dst_crs = f.crs.to_string().lower()
|
||||
|
||||
if query_geom_transformed is None:
|
||||
query_box_transformed = shapely.ops.transform(
|
||||
self.p_transformers[dst_crs], query_box
|
||||
).envelope
|
||||
query_geom_transformed = shapely.geometry.mapping(
|
||||
query_box_transformed
|
||||
)
|
||||
|
||||
data, _ = rasterio.mask.mask(
|
||||
f, [query_geom_transformed], crop=True, all_touched=True
|
||||
)
|
||||
|
||||
sample[layer] = data.squeeze()
|
||||
|
||||
else:
|
||||
raise IndexError(f"query: {query} spans multiple tiles which is not valid")
|
||||
|
||||
if self.transforms is not None:
|
||||
sample = self.transforms(sample)
|
||||
|
||||
return sample
|
||||
|
||||
def _check_integrity(self) -> bool:
|
||||
"""Check integrity of dataset.
|
||||
|
||||
Returns:
|
||||
True if dataset files are found and/or MD5s match, else False
|
||||
"""
|
||||
integrity: bool = check_integrity(
|
||||
os.path.join(self.root, self.filename),
|
||||
self.md5 if self.checksum else None,
|
||||
)
|
||||
|
||||
return integrity
|
||||
|
||||
def _download(self) -> None:
|
||||
"""Download the dataset and extract it."""
|
||||
if self._check_integrity():
|
||||
print("Files already downloaded and verified")
|
||||
return
|
||||
|
||||
download_and_extract_archive(
|
||||
self.url,
|
||||
self.root,
|
||||
filename=self.filename,
|
||||
md5=self.md5,
|
||||
)
|
Загрузка…
Ссылка в новой задаче