зеркало из https://github.com/microsoft/torchgeo.git
Add CMS Global Mangrove Canopy dataset (#391)
* CMS dataset * dynamically set filename * add warning in documentation * requested changes and data.py * single zip file and camel case * md5 check added * correct error messages * compression smaller test file Co-authored-by: Caleb Robinson <calebrob6@gmail.com>
This commit is contained in:
Родитель
89277dc325
Коммит
9cf36fac12
|
@ -32,6 +32,11 @@ Chesapeake Bay High-Resolution Land Cover Project
|
|||
.. autoclass:: ChesapeakeWV
|
||||
.. autoclass:: ChesapeakeCVPR
|
||||
|
||||
CMS Global Mangrove Canopy Dataset
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. autoclass:: CMSGlobalMangroveCanopy
|
||||
|
||||
Cropland Data Layer (CDL)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
|
Двоичный файл не отображается.
Двоичные данные
tests/data/cms_mangrove_canopy/CMS_Global_Map_Mangrove_Canopy_1665/data/Mangrove_agb_Angola.tif
Normal file
Двоичные данные
tests/data/cms_mangrove_canopy/CMS_Global_Map_Mangrove_Canopy_1665/data/Mangrove_agb_Angola.tif
Normal file
Двоичный файл не отображается.
Двоичные данные
tests/data/cms_mangrove_canopy/CMS_Global_Map_Mangrove_Canopy_1665/data/Mangrove_hba95_Angola.tif
Normal file
Двоичные данные
tests/data/cms_mangrove_canopy/CMS_Global_Map_Mangrove_Canopy_1665/data/Mangrove_hba95_Angola.tif
Normal file
Двоичный файл не отображается.
Двоичные данные
tests/data/cms_mangrove_canopy/CMS_Global_Map_Mangrove_Canopy_1665/data/Mangrove_hmax95_Angola.tif
Normal file
Двоичные данные
tests/data/cms_mangrove_canopy/CMS_Global_Map_Mangrove_Canopy_1665/data/Mangrove_hmax95_Angola.tif
Normal file
Двоичный файл не отображается.
|
@ -0,0 +1,68 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
|
||||
np.random.seed(0)
|
||||
random.seed(0)
|
||||
|
||||
SIZE = 64
|
||||
|
||||
|
||||
files = [
|
||||
{"image": "Mangrove_agb_Angola.tif"},
|
||||
{"image": "Mangrove_hba95_Angola.tif"},
|
||||
{"image": "Mangrove_hmax95_Angola.tif"},
|
||||
]
|
||||
|
||||
|
||||
def create_file(path: str, dtype: str, num_channels: int) -> None:
|
||||
profile = {}
|
||||
profile["driver"] = "GTiff"
|
||||
profile["dtype"] = dtype
|
||||
profile["count"] = num_channels
|
||||
profile["crs"] = "epsg:4326"
|
||||
profile["transform"] = rasterio.transform.from_bounds(0, 0, 1, 1, 1, 1)
|
||||
profile["height"] = SIZE
|
||||
profile["width"] = SIZE
|
||||
profile["compress"] = "lzw"
|
||||
profile["predictor"] = 2
|
||||
|
||||
Z = np.random.randint(
|
||||
np.iinfo(profile["dtype"]).max, size=(1, SIZE, SIZE), dtype=profile["dtype"]
|
||||
)
|
||||
src = rasterio.open(path, "w", **profile)
|
||||
src.write(Z)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
directory = "CMS_Global_Map_Mangrove_Canopy_1665"
|
||||
|
||||
# Remove old data
|
||||
if os.path.isdir(directory):
|
||||
shutil.rmtree(directory)
|
||||
|
||||
os.makedirs(os.path.join(directory, "data"), exist_ok=True)
|
||||
|
||||
for file_dict in files:
|
||||
# Create mask file
|
||||
path = file_dict["image"]
|
||||
create_file(
|
||||
os.path.join(directory, "data", path), dtype="int32", num_channels=1
|
||||
)
|
||||
|
||||
# Compress data
|
||||
shutil.make_archive(directory.replace(".zip", ""), "zip", ".", directory)
|
||||
|
||||
# Compute checksums
|
||||
with open(directory + ".zip", "rb") as f:
|
||||
md5 = hashlib.md5(f.read()).hexdigest()
|
||||
print(f"{directory}: {md5}")
|
|
@ -0,0 +1,93 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Generator
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from rasterio.crs import CRS
|
||||
|
||||
from torchgeo.datasets import CMSGlobalMangroveCanopy, IntersectionDataset, UnionDataset
|
||||
|
||||
|
||||
def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
|
||||
shutil.copy(url, root)
|
||||
|
||||
|
||||
class TestCMSGlobalMangroveCanopy:
|
||||
@pytest.fixture
|
||||
def dataset(
|
||||
self, monkeypatch: Generator[MonkeyPatch, None, None], tmp_path: Path
|
||||
) -> CMSGlobalMangroveCanopy:
|
||||
zipfile = "CMS_Global_Map_Mangrove_Canopy_1665.zip"
|
||||
monkeypatch.setattr( # type: ignore[attr-defined]
|
||||
CMSGlobalMangroveCanopy, "zipfile", zipfile
|
||||
)
|
||||
|
||||
md5 = "d6894fa6293cc9c0f3f95a810e842de5"
|
||||
monkeypatch.setattr( # type: ignore[attr-defined]
|
||||
CMSGlobalMangroveCanopy, "md5", md5
|
||||
)
|
||||
|
||||
root = os.path.join("tests", "data", "cms_mangrove_canopy")
|
||||
transforms = nn.Identity() # type: ignore[attr-defined]
|
||||
country = "Angola"
|
||||
|
||||
return CMSGlobalMangroveCanopy(
|
||||
root, country=country, transforms=transforms, checksum=True
|
||||
)
|
||||
|
||||
def test_getitem(self, dataset: CMSGlobalMangroveCanopy) -> None:
|
||||
x = dataset[dataset.bounds]
|
||||
assert isinstance(x, dict)
|
||||
assert isinstance(x["crs"], CRS)
|
||||
assert isinstance(x["mask"], torch.Tensor)
|
||||
|
||||
def test_no_dataset(self) -> None:
|
||||
with pytest.raises(RuntimeError, match="Dataset not found in."):
|
||||
CMSGlobalMangroveCanopy(root="/test")
|
||||
|
||||
def test_already_downloaded(self, tmp_path: Path) -> None:
|
||||
pathname = os.path.join(
|
||||
"tests",
|
||||
"data",
|
||||
"cms_mangrove_canopy",
|
||||
"CMS_Global_Map_Mangrove_Canopy_1665.zip",
|
||||
)
|
||||
root = str(tmp_path)
|
||||
shutil.copy(pathname, root)
|
||||
CMSGlobalMangroveCanopy(root, country="Angola")
|
||||
|
||||
def test_corrupted(self, tmp_path: Path) -> None:
|
||||
with open(
|
||||
os.path.join(tmp_path, "CMS_Global_Map_Mangrove_Canopy_1665.zip"), "w"
|
||||
) as f:
|
||||
f.write("bad")
|
||||
with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
|
||||
CMSGlobalMangroveCanopy(root=str(tmp_path), country="Angola", checksum=True)
|
||||
|
||||
def test_invalid_country(self) -> None:
|
||||
with pytest.raises(AssertionError):
|
||||
CMSGlobalMangroveCanopy(country="fakeCountry")
|
||||
|
||||
def test_invalid_measurement(self) -> None:
|
||||
with pytest.raises(AssertionError):
|
||||
CMSGlobalMangroveCanopy(measurement="wrongMeasurement")
|
||||
|
||||
def test_and(self, dataset: CMSGlobalMangroveCanopy) -> None:
|
||||
ds = dataset & dataset
|
||||
assert isinstance(ds, IntersectionDataset)
|
||||
|
||||
def test_or(self, dataset: CMSGlobalMangroveCanopy) -> None:
|
||||
ds = dataset | dataset
|
||||
assert isinstance(ds, UnionDataset)
|
||||
|
||||
def test_plot(self, dataset: CMSGlobalMangroveCanopy) -> None:
|
||||
query = dataset.bounds
|
||||
x = dataset[query]
|
||||
dataset.plot(x["mask"])
|
|
@ -21,6 +21,7 @@ from .chesapeake import (
|
|||
ChesapeakeVA,
|
||||
ChesapeakeWV,
|
||||
)
|
||||
from .cms_mangrove_canopy import CMSGlobalMangroveCanopy
|
||||
from .cowc import COWC, COWCCounting, COWCDetection
|
||||
from .cv4a_kenya_crop_type import CV4AKenyaCropType
|
||||
from .cyclone import TropicalCycloneWindEstimation
|
||||
|
@ -97,6 +98,7 @@ __all__ = (
|
|||
"ChesapeakeVA",
|
||||
"ChesapeakeWV",
|
||||
"ChesapeakeCVPR",
|
||||
"CMSGlobalMangroveCanopy",
|
||||
"Esri2020",
|
||||
"Landsat",
|
||||
"Landsat1",
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""CMS Global Mangrove Canopy dataset."""
|
||||
|
||||
import glob
|
||||
import os
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
from rasterio.crs import CRS
|
||||
|
||||
from .geo import RasterDataset
|
||||
from .utils import check_integrity, extract_archive
|
||||
|
||||
|
||||
class CMSGlobalMangroveCanopy(RasterDataset):
|
||||
"""CMS Global Mangrove Canopy dataset.
|
||||
|
||||
The `CMS Global Mangrove Canopy dataset
|
||||
<https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1665>`_
|
||||
consists of a single band map at 30m resolution of either aboveground biomass (agb),
|
||||
basal area weighted height (hba95), or maximum canopy height (hmax95).
|
||||
|
||||
The dataset needs to be manually dowloaded from the above link, where you can make
|
||||
an account and subsequently download the dataset.
|
||||
|
||||
.. versionadded:: 0.3
|
||||
"""
|
||||
|
||||
is_image = False
|
||||
|
||||
filename_regex = r"""^
|
||||
(?P<mangrove>[A-Za-z]{8})
|
||||
_(?P<variable>[a-z0-9]*)
|
||||
_(?P<country>[A-Za-z][^.]*)
|
||||
"""
|
||||
|
||||
zipfile = "CMS_Global_Map_Mangrove_Canopy_1665.zip"
|
||||
md5 = "3e7f9f23bf971c25e828b36e6c5496e3"
|
||||
|
||||
all_countries = [
|
||||
"AndamanAndNicobar",
|
||||
"Angola",
|
||||
"Anguilla",
|
||||
"AntiguaAndBarbuda",
|
||||
"Aruba",
|
||||
"Australia",
|
||||
"Bahamas",
|
||||
"Bahrain",
|
||||
"Bangladesh",
|
||||
"Barbados",
|
||||
"Belize",
|
||||
"Benin",
|
||||
"Brazil",
|
||||
"BritishVirginIslands",
|
||||
"Brunei",
|
||||
"Cambodia",
|
||||
"Cameroon",
|
||||
"CarribeanCaymanIslands",
|
||||
"China",
|
||||
"Colombia",
|
||||
"Comoros",
|
||||
"CostaRica",
|
||||
"Cote",
|
||||
"CoteDivoire",
|
||||
"CotedIvoire",
|
||||
"Cuba",
|
||||
"DemocraticRepublicOfCongo",
|
||||
"Djibouti",
|
||||
"DominicanRepublic",
|
||||
"EcuadorWithGalapagos",
|
||||
"Egypt",
|
||||
"ElSalvador",
|
||||
"EquatorialGuinea",
|
||||
"Eritrea",
|
||||
"EuropaIsland",
|
||||
"Fiji",
|
||||
"Fiji2",
|
||||
"FrenchGuiana",
|
||||
"FrenchGuyana",
|
||||
"FrenchPolynesia",
|
||||
"Gabon",
|
||||
"Gambia",
|
||||
"Ghana",
|
||||
"Grenada",
|
||||
"Guadeloupe",
|
||||
"Guam",
|
||||
"Guatemala",
|
||||
"Guinea",
|
||||
"GuineaBissau",
|
||||
"Guyana",
|
||||
"Haiti",
|
||||
"Hawaii",
|
||||
"Honduras",
|
||||
"HongKong",
|
||||
"India",
|
||||
"Indonesia",
|
||||
"Iran",
|
||||
"Jamaica",
|
||||
"Japan",
|
||||
"Kenya",
|
||||
"Liberia",
|
||||
"Macau",
|
||||
"Madagascar",
|
||||
"Malaysia",
|
||||
"Martinique",
|
||||
"Mauritania",
|
||||
"Mayotte",
|
||||
"Mexico",
|
||||
"Micronesia",
|
||||
"Mozambique",
|
||||
"Myanmar",
|
||||
"NewCaledonia",
|
||||
"NewZealand",
|
||||
"Newzealand",
|
||||
"Nicaragua",
|
||||
"Nigeria",
|
||||
"NorthernMarianaIslands",
|
||||
"Oman",
|
||||
"Pakistan",
|
||||
"Palau",
|
||||
"Panama",
|
||||
"PapuaNewGuinea",
|
||||
"Peru",
|
||||
"Philipines",
|
||||
"PuertoRico",
|
||||
"Qatar",
|
||||
"ReunionAndMauritius",
|
||||
"SaintKittsAndNevis",
|
||||
"SaintLucia",
|
||||
"SaintVincentAndTheGrenadines",
|
||||
"Samoa",
|
||||
"SaudiArabia",
|
||||
"Senegal",
|
||||
"Seychelles",
|
||||
"SierraLeone",
|
||||
"Singapore",
|
||||
"SolomonIslands",
|
||||
"Somalia",
|
||||
"Somalia2",
|
||||
"Soudan",
|
||||
"SouthAfrica",
|
||||
"SriLanka",
|
||||
"Sudan",
|
||||
"Suriname",
|
||||
"Taiwan",
|
||||
"Tanzania",
|
||||
"Thailand",
|
||||
"TimorLeste",
|
||||
"Togo",
|
||||
"Tonga",
|
||||
"TrinidadAndTobago",
|
||||
"TurksAndCaicosIslands",
|
||||
"Tuvalu",
|
||||
"UnitedArabEmirates",
|
||||
"UnitedStates",
|
||||
"Vanuatu",
|
||||
"Venezuela",
|
||||
"Vietnam",
|
||||
"VirginIslandsUs",
|
||||
"WallisAndFutuna",
|
||||
"Yemen",
|
||||
]
|
||||
|
||||
measurements = ["agb", "hba95", "hmax95"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: str = "data",
|
||||
crs: Optional[CRS] = None,
|
||||
res: Optional[float] = None,
|
||||
measurement: str = "agb",
|
||||
country: str = all_countries[0],
|
||||
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
|
||||
cache: bool = True,
|
||||
checksum: bool = False,
|
||||
) -> None:
|
||||
"""Initialize a new Dataset instance.
|
||||
|
||||
Args:
|
||||
root: root directory where dataset can be found
|
||||
crs: :term:`coordinate reference system (CRS)` to warp to
|
||||
(defaults to the CRS of the first file found)
|
||||
res: resolution of the dataset in units of CRS
|
||||
(defaults to the resolution of the first file found)
|
||||
measurement: which of the three measurements, 'agb', 'hba95', or 'hmax95'
|
||||
country: country for which to retrieve data
|
||||
transforms: a function/transform that takes an input sample
|
||||
and returns a transformed version
|
||||
cache: if True, cache file handle to speed up repeated sampling
|
||||
checksum: if True, check the MD5 of the downloaded files (may be slow)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: if no files are found in ``root``
|
||||
RuntimeError: if dataset is missing or checksum fails
|
||||
AssertionError: if country or measurement arg are not str or invalid
|
||||
"""
|
||||
self.root = root
|
||||
self.checksum = checksum
|
||||
|
||||
assert isinstance(country, str), "Country argument must be a str."
|
||||
assert (
|
||||
country in self.all_countries
|
||||
), "You have selected an invalid country, please choose one of {}".format(
|
||||
self.all_countries
|
||||
)
|
||||
self.country = country
|
||||
|
||||
assert isinstance(measurement, str), "Measurement must be a string."
|
||||
assert (
|
||||
measurement in self.measurements
|
||||
), "You have entered an invalid measurement, please choose one of {}.".format(
|
||||
self.measurements
|
||||
)
|
||||
self.measurement = measurement
|
||||
|
||||
self.filename_glob = "**/Mangrove_{}_{}*".format(self.measurement, self.country)
|
||||
|
||||
self._verify()
|
||||
|
||||
super().__init__(root, crs, res, transforms, cache)
|
||||
|
||||
def _verify(self) -> None:
|
||||
"""Verify the integrity of the dataset.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if dataset is missing or checksum fails
|
||||
"""
|
||||
# Check if the extracted files already exist
|
||||
pathname = os.path.join(self.root, "**", self.filename_glob)
|
||||
if glob.glob(pathname):
|
||||
return
|
||||
|
||||
# Check if the zip file has already been downloaded
|
||||
pathname = os.path.join(self.root, self.zipfile)
|
||||
if os.path.exists(pathname):
|
||||
if self.checksum and not check_integrity(pathname, self.md5):
|
||||
raise RuntimeError("Dataset found, but corrupted.")
|
||||
self._extract()
|
||||
return
|
||||
|
||||
raise RuntimeError(
|
||||
f"Dataset not found in `root={self.root}` "
|
||||
"either specify a different `root` directory or make sure you "
|
||||
"have manually downloaded the dataset as instructed in the documentation."
|
||||
)
|
||||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
pathname = os.path.join(self.root, self.zipfile)
|
||||
extract_archive(pathname)
|
Загрузка…
Ссылка в новой задаче