зеркало из https://github.com/microsoft/torchgeo.git
Resolve NCCM checksum error and add years args (#1870)
* add new download links, years Args, and new test data * remove download test file * include all years by default * sort year and verify
This commit is contained in:
Родитель
8af188c72e
Коммит
f3270ca6ae
Двоичные данные
tests/data/nccm/13090442.zip
Двоичные данные
tests/data/nccm/13090442.zip
Двоичный файл не отображается.
Двоичные данные
tests/data/nccm/13090442/CDL2017_clip.tif
Двоичные данные
tests/data/nccm/13090442/CDL2017_clip.tif
Двоичный файл не отображается.
Двоичные данные
tests/data/nccm/13090442/CDL2018_clip1.tif
Двоичные данные
tests/data/nccm/13090442/CDL2018_clip1.tif
Двоичный файл не отображается.
Двоичные данные
tests/data/nccm/13090442/CDL2019_clip.tif
Двоичные данные
tests/data/nccm/13090442/CDL2019_clip.tif
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -5,7 +5,6 @@
|
|||
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
|
@ -48,20 +47,14 @@ def create_file(path: str, dtype: str):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dir = os.path.join(os.getcwd(), "13090442")
|
||||
|
||||
if os.path.exists(dir) and os.path.isdir(dir):
|
||||
shutil.rmtree(dir)
|
||||
|
||||
dir = os.path.join(os.getcwd())
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
|
||||
for file in files:
|
||||
create_file(os.path.join(dir, file), dtype="int8")
|
||||
|
||||
# Compress data
|
||||
shutil.make_archive("13090442", "zip", ".", dir)
|
||||
|
||||
# Compute checksums
|
||||
with open("13090442.zip", "rb") as f:
|
||||
md5 = hashlib.md5(f.read()).hexdigest()
|
||||
print(f"13090442.zip: {md5}")
|
||||
for file in files:
|
||||
with open(file, "rb") as f:
|
||||
md5 = hashlib.md5(f.read()).hexdigest()
|
||||
print(f"{file}: {md5}")
|
||||
|
|
|
@ -25,9 +25,19 @@ class TestNCCM:
|
|||
@pytest.fixture
|
||||
def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> NCCM:
|
||||
monkeypatch.setattr(torchgeo.datasets.nccm, "download_url", download_url)
|
||||
url = os.path.join("tests", "data", "nccm", "13090442.zip")
|
||||
md5s = {
|
||||
2017: "ae5c390d0ffb8970d544b8a09142759f",
|
||||
2018: "0d453bdb8ea5b7318c33e62513760580",
|
||||
2019: "d4ab7ab00bb57623eafb6b27747e5639",
|
||||
}
|
||||
monkeypatch.setattr(NCCM, "md5s", md5s)
|
||||
urls = {
|
||||
2017: os.path.join("tests", "data", "nccm", "CDL2017_clip.tif"),
|
||||
2018: os.path.join("tests", "data", "nccm", "CDL2018_clip1.tif"),
|
||||
2019: os.path.join("tests", "data", "nccm", "CDL2019_clip.tif"),
|
||||
}
|
||||
monkeypatch.setattr(NCCM, "urls", urls)
|
||||
transforms = nn.Identity()
|
||||
monkeypatch.setattr(NCCM, "url", url)
|
||||
root = str(tmp_path)
|
||||
return NCCM(root, transforms=transforms, download=True, checksum=True)
|
||||
|
||||
|
@ -48,11 +58,8 @@ class TestNCCM:
|
|||
def test_already_extracted(self, dataset: NCCM) -> None:
|
||||
NCCM(dataset.paths, download=True)
|
||||
|
||||
def test_already_downloaded(self, tmp_path: Path) -> None:
|
||||
pathname = os.path.join("tests", "data", "nccm", "13090442.zip")
|
||||
root = str(tmp_path)
|
||||
shutil.copy(pathname, root)
|
||||
NCCM(root)
|
||||
def test_already_downloaded(self, dataset: NCCM) -> None:
|
||||
NCCM(dataset.paths, download=True)
|
||||
|
||||
def test_plot(self, dataset: NCCM) -> None:
|
||||
query = dataset.bounds
|
||||
|
|
|
@ -3,8 +3,6 @@
|
|||
|
||||
"""Northeastern China Crop Map Dataset."""
|
||||
|
||||
import glob
|
||||
import os
|
||||
from collections.abc import Iterable
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
|
@ -14,7 +12,7 @@ from matplotlib.figure import Figure
|
|||
from rasterio.crs import CRS
|
||||
|
||||
from .geo import RasterDataset
|
||||
from .utils import BoundingBox, DatasetNotFoundError, download_url, extract_archive
|
||||
from .utils import BoundingBox, DatasetNotFoundError, download_url
|
||||
|
||||
|
||||
class NCCM(RasterDataset):
|
||||
|
@ -55,12 +53,24 @@ class NCCM(RasterDataset):
|
|||
|
||||
filename_regex = r"CDL(?P<year>\d{4})_clip"
|
||||
filename_glob = "CDL*.*"
|
||||
zipfile_glob = "13090442.zip"
|
||||
|
||||
date_format = "%Y"
|
||||
is_image = False
|
||||
url = "https://figshare.com/ndownloader/articles/13090442/versions/1"
|
||||
md5 = "eae952f1b346d7e649d027e8139a76f5"
|
||||
urls = {
|
||||
2019: "https://figshare.com/ndownloader/files/25070540",
|
||||
2018: "https://figshare.com/ndownloader/files/25070624",
|
||||
2017: "https://figshare.com/ndownloader/files/25070582",
|
||||
}
|
||||
md5s = {
|
||||
2019: "0d062bbd42e483fdc8239d22dba7020f",
|
||||
2018: "b3bb4894478d10786aa798fb11693ec1",
|
||||
2017: "d047fbe4a85341fa6248fd7e0badab6c",
|
||||
}
|
||||
fnames = {
|
||||
2019: "CDL2019_clip.tif",
|
||||
2018: "CDL2018_clip1.tif",
|
||||
2017: "CDL2017_clip.tif",
|
||||
}
|
||||
|
||||
cmap = {
|
||||
0: (0, 255, 0, 255),
|
||||
|
@ -75,6 +85,7 @@ class NCCM(RasterDataset):
|
|||
paths: Union[str, Iterable[str]] = "data",
|
||||
crs: Optional[CRS] = None,
|
||||
res: Optional[float] = None,
|
||||
years: list[int] = [2019],
|
||||
transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
|
||||
cache: bool = True,
|
||||
download: bool = False,
|
||||
|
@ -88,6 +99,7 @@ class NCCM(RasterDataset):
|
|||
(defaults to the CRS of the first file found)
|
||||
res: resolution of the dataset in units of CRS
|
||||
(defaults to the resolution of the first file found)
|
||||
years: list of years for which to use nccm layers
|
||||
transforms: a function/transform that takes an input sample
|
||||
and returns a transformed version
|
||||
cache: if True, cache file handle to speed up repeated sampling
|
||||
|
@ -97,7 +109,12 @@ class NCCM(RasterDataset):
|
|||
Raises:
|
||||
DatasetNotFoundError: If dataset is not found and *download* is False.
|
||||
"""
|
||||
assert set(years) <= self.md5s.keys(), (
|
||||
"NCCM data product only exists for the following years: "
|
||||
f"{list(self.md5s.keys())}."
|
||||
)
|
||||
self.paths = paths
|
||||
self.years = years
|
||||
self.download = download
|
||||
self.checksum = checksum
|
||||
self.ordinal_map = torch.full((max(self.cmap.keys()) + 1,), 4, dtype=self.dtype)
|
||||
|
@ -128,37 +145,26 @@ class NCCM(RasterDataset):
|
|||
|
||||
def _verify(self) -> None:
|
||||
"""Verify the integrity of the dataset."""
|
||||
# Check if the extracted files already exist
|
||||
# Check if the files already exist
|
||||
if self.files:
|
||||
return
|
||||
|
||||
# Check if the zip file has already been downloaded
|
||||
assert isinstance(self.paths, str)
|
||||
pathname = os.path.join(self.paths, "**", self.zipfile_glob)
|
||||
if glob.glob(pathname, recursive=True):
|
||||
self._extract()
|
||||
return
|
||||
|
||||
# Check if the user requested to download the dataset
|
||||
if not self.download:
|
||||
raise DatasetNotFoundError(self)
|
||||
|
||||
# Download the dataset
|
||||
self._download()
|
||||
self._extract()
|
||||
|
||||
def _download(self) -> None:
|
||||
"""Download the dataset."""
|
||||
filename = "13090442.zip"
|
||||
download_url(
|
||||
self.url, self.paths, filename, md5=self.md5 if self.checksum else None
|
||||
)
|
||||
|
||||
def _extract(self) -> None:
|
||||
"""Extract the dataset."""
|
||||
assert isinstance(self.paths, str)
|
||||
pathname = os.path.join(self.paths, "**", self.zipfile_glob)
|
||||
extract_archive(glob.glob(pathname, recursive=True)[0], self.paths)
|
||||
for year in self.years:
|
||||
download_url(
|
||||
self.urls[year],
|
||||
self.paths,
|
||||
filename=self.fnames[year],
|
||||
md5=self.md5s[year] if self.checksum else None,
|
||||
)
|
||||
|
||||
def plot(
|
||||
self,
|
||||
|
|
Загрузка…
Ссылка в новой задаче