зеркало из https://github.com/microsoft/torchgeo.git
Add EDDMapS dataset (#533)
* Add EDDMapS dataset * Mypy hack * Test fix
This commit is contained in:
Родитель
369b36122a
Коммит
827985ad0a
|
@ -52,6 +52,11 @@ Cropland Data Layer (CDL)
|
||||||
|
|
||||||
.. autoclass:: CDL
|
.. autoclass:: CDL
|
||||||
|
|
||||||
|
EDDMapS
|
||||||
|
^^^^^^^
|
||||||
|
|
||||||
|
.. autoclass:: EDDMapS
|
||||||
|
|
||||||
EnviroAtlas
|
EnviroAtlas
|
||||||
^^^^^^^^^^^
|
^^^^^^^^^^^
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
filename = "mappings.csv"
|
||||||
|
|
||||||
|
size = 3
|
||||||
|
data = {
|
||||||
|
"gbifID": [""] * size,
|
||||||
|
"decimalLatitude": [41.881832] * size,
|
||||||
|
"decimalLongitude": [""] + [-87.623177] * (size - 1),
|
||||||
|
"objectid": [""] * size,
|
||||||
|
"reporter": [""] * size,
|
||||||
|
"RecOwner": [""] * size,
|
||||||
|
"SciName": ["Homo sapiens"] * size,
|
||||||
|
"ComName": ["human"] * size,
|
||||||
|
"Nativity": ["Native"] * size,
|
||||||
|
"OccStatus": ["Detected"] * size,
|
||||||
|
"Status": ["Positive"] * size,
|
||||||
|
"ObsDate": ["", "", "05-07-22"],
|
||||||
|
"DateEnt": ["05-07-22"] * size,
|
||||||
|
"DateUp": ["05-07-22"] * size,
|
||||||
|
"Location": ["Chicago, Illinois, United States"] * size,
|
||||||
|
"Latitude": [41.881832] * size,
|
||||||
|
"Longitude": [""] + [-87.623177] * (size - 1),
|
||||||
|
"Datum": ["WGS84"] * size,
|
||||||
|
"Method": [""] * size,
|
||||||
|
"CoordAcc": [""] * size,
|
||||||
|
"DataType": [""] * size,
|
||||||
|
"Centroid": [""] * size,
|
||||||
|
"Abundance": [""] * size,
|
||||||
|
"InfestAcre": [""] * size,
|
||||||
|
"GrossAcre": [""] * size,
|
||||||
|
"Percentcov": [""] * size,
|
||||||
|
"Density": [""] * size,
|
||||||
|
"Quantity": [""] * size,
|
||||||
|
"QuantityU": [""] * size,
|
||||||
|
"APPXQuant": [""] * size,
|
||||||
|
"NumCollect": [""] * size,
|
||||||
|
"Smallest": [""] * size,
|
||||||
|
"Largest": [""] * size,
|
||||||
|
"Incidence": [""] * size,
|
||||||
|
"Severity": [""] * size,
|
||||||
|
"Host": [""] * size,
|
||||||
|
"Host_Name": [""] * size,
|
||||||
|
"HostPheno": [""] * size,
|
||||||
|
"HostDamage": [""] * size,
|
||||||
|
"ManageStat": ["Unknown"] * size,
|
||||||
|
"PopStat": [""] * size,
|
||||||
|
"Habitat": [""] * size,
|
||||||
|
"LocalOwner": [""] * size,
|
||||||
|
"Site": [""] * size,
|
||||||
|
"RecBasis": [""] * size,
|
||||||
|
"Museum": [""] * size,
|
||||||
|
"MuseumRec": [""] * size,
|
||||||
|
"Voucher": [""] * size,
|
||||||
|
"ObsIDer": [""] * size,
|
||||||
|
"CollectTme": [""] * size,
|
||||||
|
"UUID": [""] * size,
|
||||||
|
"OrgSrcID": [""] * size,
|
||||||
|
"OrigName": ["Homo sapiens"] * size,
|
||||||
|
"RecSrcTyp": ["Bulk Data"] * size,
|
||||||
|
"Surveyor": [""] * size,
|
||||||
|
"DateAcc": [""] * size,
|
||||||
|
"VisitType": [""] * size,
|
||||||
|
"DataMthd": [""] * size,
|
||||||
|
"TrapType": [""] * size,
|
||||||
|
"NumTraps": [""] * size,
|
||||||
|
"TargetName": [""] * size,
|
||||||
|
"TargetCnt": [""] * size,
|
||||||
|
"TargetRnge": [""] * size,
|
||||||
|
"Phenology": [""] * size,
|
||||||
|
"LifeStatus": [""] * size,
|
||||||
|
"Sex": [""] * size,
|
||||||
|
"PID": [""] * size,
|
||||||
|
"WaterName": [""] * size,
|
||||||
|
"WaterType": [""] * size,
|
||||||
|
"Substrate": [""] * size,
|
||||||
|
"TreatArea": [""] * size,
|
||||||
|
"PlantTreat": [""] * size,
|
||||||
|
"TreatComm": [""] * size,
|
||||||
|
"Reference": [""] * size,
|
||||||
|
"Locality": [""] * size,
|
||||||
|
"Comments": [""] * size,
|
||||||
|
"ReviewDate": ["05-07-22"] * size,
|
||||||
|
"Reviewer": ["Charles Darwin"] * size,
|
||||||
|
"VerifyMthd": ["Bulk Verified"] * size,
|
||||||
|
"Verified": ["Verified"] * size,
|
||||||
|
"IDCred": ["Credible"] * size,
|
||||||
|
"ReviewComm": [""] * size,
|
||||||
|
}
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
df.to_csv(filename, index=False)
|
|
@ -0,0 +1,4 @@
|
||||||
|
gbifID,decimalLatitude,decimalLongitude,objectid,reporter,RecOwner,SciName,ComName,Nativity,OccStatus,Status,ObsDate,DateEnt,DateUp,Location,Latitude,Longitude,Datum,Method,CoordAcc,DataType,Centroid,Abundance,InfestAcre,GrossAcre,Percentcov,Density,Quantity,QuantityU,APPXQuant,NumCollect,Smallest,Largest,Incidence,Severity,Host,Host_Name,HostPheno,HostDamage,ManageStat,PopStat,Habitat,LocalOwner,Site,RecBasis,Museum,MuseumRec,Voucher,ObsIDer,CollectTme,UUID,OrgSrcID,OrigName,RecSrcTyp,Surveyor,DateAcc,VisitType,DataMthd,TrapType,NumTraps,TargetName,TargetCnt,TargetRnge,Phenology,LifeStatus,Sex,PID,WaterName,WaterType,Substrate,TreatArea,PlantTreat,TreatComm,Reference,Locality,Comments,ReviewDate,Reviewer,VerifyMthd,Verified,IDCred,ReviewComm
|
||||||
|
,41.881832,,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
|
||||||
|
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
|
||||||
|
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,05-07-22,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
import builtins
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from _pytest.monkeypatch import MonkeyPatch
|
||||||
|
|
||||||
|
from torchgeo.datasets import BoundingBox, EDDMapS, IntersectionDataset, UnionDataset
|
||||||
|
|
||||||
|
pytest.importorskip("pandas", minversion="0.23.2")
|
||||||
|
|
||||||
|
|
||||||
|
class TestEDDMapS:
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def dataset(self) -> EDDMapS:
|
||||||
|
root = os.path.join("tests", "data", "eddmaps")
|
||||||
|
return EDDMapS(root)
|
||||||
|
|
||||||
|
def test_getitem(self, dataset: EDDMapS) -> None:
|
||||||
|
x = dataset[dataset.bounds]
|
||||||
|
assert isinstance(x, dict)
|
||||||
|
|
||||||
|
def test_len(self, dataset: EDDMapS) -> None:
|
||||||
|
assert len(dataset) == 2
|
||||||
|
|
||||||
|
def test_and(self, dataset: EDDMapS) -> None:
|
||||||
|
ds = dataset & dataset
|
||||||
|
assert isinstance(ds, IntersectionDataset)
|
||||||
|
|
||||||
|
def test_or(self, dataset: EDDMapS) -> None:
|
||||||
|
ds = dataset | dataset
|
||||||
|
assert isinstance(ds, UnionDataset)
|
||||||
|
|
||||||
|
def test_no_data(self, tmp_path: Path) -> None:
|
||||||
|
with pytest.raises(FileNotFoundError, match="Dataset not found"):
|
||||||
|
EDDMapS(str(tmp_path))
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_missing_module(self, monkeypatch: MonkeyPatch) -> None:
|
||||||
|
import_orig = builtins.__import__
|
||||||
|
|
||||||
|
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any:
|
||||||
|
if name == "pandas":
|
||||||
|
raise ImportError()
|
||||||
|
return import_orig(name, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(builtins, "__import__", mocked_import)
|
||||||
|
|
||||||
|
def test_mock_missing_module(
|
||||||
|
self, dataset: EDDMapS, mock_missing_module: None
|
||||||
|
) -> None:
|
||||||
|
with pytest.raises(
|
||||||
|
ImportError,
|
||||||
|
match="pandas is not installed and is required to use this dataset",
|
||||||
|
):
|
||||||
|
EDDMapS(dataset.root)
|
||||||
|
|
||||||
|
def test_invalid_query(self, dataset: EDDMapS) -> None:
|
||||||
|
query = BoundingBox(0, 0, 0, 0, 0, 0)
|
||||||
|
with pytest.raises(
|
||||||
|
IndexError, match="query: .* not found in index with bounds:"
|
||||||
|
):
|
||||||
|
dataset[query]
|
|
@ -28,6 +28,7 @@ from .cowc import COWC, COWCCounting, COWCDetection
|
||||||
from .cv4a_kenya_crop_type import CV4AKenyaCropType
|
from .cv4a_kenya_crop_type import CV4AKenyaCropType
|
||||||
from .cyclone import TropicalCycloneWindEstimation
|
from .cyclone import TropicalCycloneWindEstimation
|
||||||
from .dfc2022 import DFC2022
|
from .dfc2022 import DFC2022
|
||||||
|
from .eddmaps import EDDMapS
|
||||||
from .enviroatlas import EnviroAtlas
|
from .enviroatlas import EnviroAtlas
|
||||||
from .esri2020 import Esri2020
|
from .esri2020 import Esri2020
|
||||||
from .etci2021 import ETCI2021
|
from .etci2021 import ETCI2021
|
||||||
|
@ -118,6 +119,7 @@ __all__ = (
|
||||||
"ChesapeakeWV",
|
"ChesapeakeWV",
|
||||||
"ChesapeakeCVPR",
|
"ChesapeakeCVPR",
|
||||||
"CMSGlobalMangroveCanopy",
|
"CMSGlobalMangroveCanopy",
|
||||||
|
"EDDMapS",
|
||||||
"Esri2020",
|
"Esri2020",
|
||||||
"EUDEM",
|
"EUDEM",
|
||||||
"GBIF",
|
"GBIF",
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
"""Dataset for EDDMapS."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from rasterio.crs import CRS
|
||||||
|
|
||||||
|
from .geo import GeoDataset
|
||||||
|
from .utils import BoundingBox, disambiguate_timestamp
|
||||||
|
|
||||||
|
|
||||||
|
class EDDMapS(GeoDataset):
|
||||||
|
"""Dataset for EDDMapS.
|
||||||
|
|
||||||
|
`EDDMapS <https://www.eddmaps.org/>`_, Early Detection and Distribution Mapping
|
||||||
|
System, is a web-based mapping system for documenting invasive species and pest
|
||||||
|
distribution. Launched in 2005 by the Center for Invasive Species and Ecosystem
|
||||||
|
Health at the University of Georgia, it was originally designed as a tool for
|
||||||
|
state Exotic Pest Plant Councils to develop more complete distribution data of
|
||||||
|
invasive species. Since then, the program has expanded to include the entire US
|
||||||
|
and Canada as well as to document certain native pest species.
|
||||||
|
|
||||||
|
EDDMapS query results can be downloaded in CSV, KML, or Shapefile format. This
|
||||||
|
dataset currently only supports CSV files.
|
||||||
|
|
||||||
|
If you use an EDDMapS dataset in your research, please cite it like so:
|
||||||
|
|
||||||
|
* EDDMapS. *YEAR*. Early Detection & Distribution Mapping System. The University of
|
||||||
|
Georgia - Center for Invasive Species and Ecosystem Health. Available online at
|
||||||
|
http://www.eddmaps.org/; last accessed *DATE*.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This dataset requires the following additional library to be installed:
|
||||||
|
|
||||||
|
* `pandas <https://pypi.org/project/pandas/>`_ to load CSV files
|
||||||
|
|
||||||
|
.. versionadded:: 0.3
|
||||||
|
"""
|
||||||
|
|
||||||
|
res = 0
|
||||||
|
_crs = CRS.from_epsg(4326) # Lat/Lon
|
||||||
|
|
||||||
|
def __init__(self, root: str = "data") -> None:
|
||||||
|
"""Initialize a new Dataset instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
root: root directory where dataset can be found
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: if no files are found in ``root``
|
||||||
|
ImportError: if pandas is not installed
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.root = root
|
||||||
|
|
||||||
|
filepath = os.path.join(root, "mappings.csv")
|
||||||
|
if not os.path.exists(filepath):
|
||||||
|
raise FileNotFoundError(f"Dataset not found in `root={self.root}`")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pandas as pd # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"pandas is not installed and is required to use this dataset"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Read CSV file
|
||||||
|
data = pd.read_csv(
|
||||||
|
filepath, engine="c", usecols=["ObsDate", "Latitude", "Longitude"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert from pandas DataFrame to rtree Index
|
||||||
|
i = 0
|
||||||
|
for date, y, x in data.itertuples(index=False, name=None):
|
||||||
|
# Skip rows without lat/lon
|
||||||
|
if np.isnan(y) or np.isnan(x):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not pd.isna(date):
|
||||||
|
mint, maxt = disambiguate_timestamp(date, "%m-%d-%y")
|
||||||
|
else:
|
||||||
|
mint, maxt = 0, sys.maxsize
|
||||||
|
|
||||||
|
coords = (x, x, y, y, mint, maxt)
|
||||||
|
self.index.insert(i, coords)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
|
||||||
|
"""Retrieve metadata indexed by query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
sample of metadata at that index
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
IndexError: if query is not found in the index
|
||||||
|
"""
|
||||||
|
hits = self.index.intersection(tuple(query), objects=True)
|
||||||
|
bboxes = [hit.bbox for hit in hits]
|
||||||
|
|
||||||
|
if not bboxes:
|
||||||
|
raise IndexError(
|
||||||
|
f"query: {query} not found in index with bounds: {self.bounds}"
|
||||||
|
)
|
||||||
|
|
||||||
|
sample = {"crs": self.crs, "bbox": bboxes}
|
||||||
|
|
||||||
|
return sample
|
Загрузка…
Ссылка в новой задаче