* Add EDDMapS dataset

* Mypy hack

* Test fix
This commit is contained in:
Adam J. Stewart 2022-05-14 21:29:47 -05:00 коммит произвёл GitHub
Родитель 369b36122a
Коммит 827985ad0a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 291 добавлений и 0 удалений

Просмотреть файл

@ -52,6 +52,11 @@ Cropland Data Layer (CDL)
.. autoclass:: CDL
EDDMapS
^^^^^^^
.. autoclass:: EDDMapS
EnviroAtlas
^^^^^^^^^^^

97
tests/data/eddmaps/data.py Executable file
Просмотреть файл

@ -0,0 +1,97 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import pandas as pd
filename = "mappings.csv"
size = 3
data = {
"gbifID": [""] * size,
"decimalLatitude": [41.881832] * size,
"decimalLongitude": [""] + [-87.623177] * (size - 1),
"objectid": [""] * size,
"reporter": [""] * size,
"RecOwner": [""] * size,
"SciName": ["Homo sapiens"] * size,
"ComName": ["human"] * size,
"Nativity": ["Native"] * size,
"OccStatus": ["Detected"] * size,
"Status": ["Positive"] * size,
"ObsDate": ["", "", "05-07-22"],
"DateEnt": ["05-07-22"] * size,
"DateUp": ["05-07-22"] * size,
"Location": ["Chicago, Illinois, United States"] * size,
"Latitude": [41.881832] * size,
"Longitude": [""] + [-87.623177] * (size - 1),
"Datum": ["WGS84"] * size,
"Method": [""] * size,
"CoordAcc": [""] * size,
"DataType": [""] * size,
"Centroid": [""] * size,
"Abundance": [""] * size,
"InfestAcre": [""] * size,
"GrossAcre": [""] * size,
"Percentcov": [""] * size,
"Density": [""] * size,
"Quantity": [""] * size,
"QuantityU": [""] * size,
"APPXQuant": [""] * size,
"NumCollect": [""] * size,
"Smallest": [""] * size,
"Largest": [""] * size,
"Incidence": [""] * size,
"Severity": [""] * size,
"Host": [""] * size,
"Host_Name": [""] * size,
"HostPheno": [""] * size,
"HostDamage": [""] * size,
"ManageStat": ["Unknown"] * size,
"PopStat": [""] * size,
"Habitat": [""] * size,
"LocalOwner": [""] * size,
"Site": [""] * size,
"RecBasis": [""] * size,
"Museum": [""] * size,
"MuseumRec": [""] * size,
"Voucher": [""] * size,
"ObsIDer": [""] * size,
"CollectTme": [""] * size,
"UUID": [""] * size,
"OrgSrcID": [""] * size,
"OrigName": ["Homo sapiens"] * size,
"RecSrcTyp": ["Bulk Data"] * size,
"Surveyor": [""] * size,
"DateAcc": [""] * size,
"VisitType": [""] * size,
"DataMthd": [""] * size,
"TrapType": [""] * size,
"NumTraps": [""] * size,
"TargetName": [""] * size,
"TargetCnt": [""] * size,
"TargetRnge": [""] * size,
"Phenology": [""] * size,
"LifeStatus": [""] * size,
"Sex": [""] * size,
"PID": [""] * size,
"WaterName": [""] * size,
"WaterType": [""] * size,
"Substrate": [""] * size,
"TreatArea": [""] * size,
"PlantTreat": [""] * size,
"TreatComm": [""] * size,
"Reference": [""] * size,
"Locality": [""] * size,
"Comments": [""] * size,
"ReviewDate": ["05-07-22"] * size,
"Reviewer": ["Charles Darwin"] * size,
"VerifyMthd": ["Bulk Verified"] * size,
"Verified": ["Verified"] * size,
"IDCred": ["Credible"] * size,
"ReviewComm": [""] * size,
}
df = pd.DataFrame(data)
df.to_csv(filename, index=False)

Просмотреть файл

@ -0,0 +1,4 @@
gbifID,decimalLatitude,decimalLongitude,objectid,reporter,RecOwner,SciName,ComName,Nativity,OccStatus,Status,ObsDate,DateEnt,DateUp,Location,Latitude,Longitude,Datum,Method,CoordAcc,DataType,Centroid,Abundance,InfestAcre,GrossAcre,Percentcov,Density,Quantity,QuantityU,APPXQuant,NumCollect,Smallest,Largest,Incidence,Severity,Host,Host_Name,HostPheno,HostDamage,ManageStat,PopStat,Habitat,LocalOwner,Site,RecBasis,Museum,MuseumRec,Voucher,ObsIDer,CollectTme,UUID,OrgSrcID,OrigName,RecSrcTyp,Surveyor,DateAcc,VisitType,DataMthd,TrapType,NumTraps,TargetName,TargetCnt,TargetRnge,Phenology,LifeStatus,Sex,PID,WaterName,WaterType,Substrate,TreatArea,PlantTreat,TreatComm,Reference,Locality,Comments,ReviewDate,Reviewer,VerifyMthd,Verified,IDCred,ReviewComm
,41.881832,,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,05-07-22,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
1 gbifID decimalLatitude decimalLongitude objectid reporter RecOwner SciName ComName Nativity OccStatus Status ObsDate DateEnt DateUp Location Latitude Longitude Datum Method CoordAcc DataType Centroid Abundance InfestAcre GrossAcre Percentcov Density Quantity QuantityU APPXQuant NumCollect Smallest Largest Incidence Severity Host Host_Name HostPheno HostDamage ManageStat PopStat Habitat LocalOwner Site RecBasis Museum MuseumRec Voucher ObsIDer CollectTme UUID OrgSrcID OrigName RecSrcTyp Surveyor DateAcc VisitType DataMthd TrapType NumTraps TargetName TargetCnt TargetRnge Phenology LifeStatus Sex PID WaterName WaterType Substrate TreatArea PlantTreat TreatComm Reference Locality Comments ReviewDate Reviewer VerifyMthd Verified IDCred ReviewComm
2 41.881832 Homo sapiens human Native Detected Positive 05-07-22 05-07-22 Chicago, Illinois, United States 41.881832 WGS84 Unknown Homo sapiens Bulk Data 05-07-22 Charles Darwin Bulk Verified Verified Credible
3 41.881832 -87.623177 Homo sapiens human Native Detected Positive 05-07-22 05-07-22 Chicago, Illinois, United States 41.881832 -87.623177 WGS84 Unknown Homo sapiens Bulk Data 05-07-22 Charles Darwin Bulk Verified Verified Credible
4 41.881832 -87.623177 Homo sapiens human Native Detected Positive 05-07-22 05-07-22 05-07-22 Chicago, Illinois, United States 41.881832 -87.623177 WGS84 Unknown Homo sapiens Bulk Data 05-07-22 Charles Darwin Bulk Verified Verified Credible

Просмотреть файл

@ -0,0 +1,67 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import builtins
import os
from pathlib import Path
from typing import Any
import pytest
from _pytest.monkeypatch import MonkeyPatch
from torchgeo.datasets import BoundingBox, EDDMapS, IntersectionDataset, UnionDataset
pytest.importorskip("pandas", minversion="0.23.2")
class TestEDDMapS:
@pytest.fixture(scope="class")
def dataset(self) -> EDDMapS:
root = os.path.join("tests", "data", "eddmaps")
return EDDMapS(root)
def test_getitem(self, dataset: EDDMapS) -> None:
x = dataset[dataset.bounds]
assert isinstance(x, dict)
def test_len(self, dataset: EDDMapS) -> None:
assert len(dataset) == 2
def test_and(self, dataset: EDDMapS) -> None:
ds = dataset & dataset
assert isinstance(ds, IntersectionDataset)
def test_or(self, dataset: EDDMapS) -> None:
ds = dataset | dataset
assert isinstance(ds, UnionDataset)
def test_no_data(self, tmp_path: Path) -> None:
with pytest.raises(FileNotFoundError, match="Dataset not found"):
EDDMapS(str(tmp_path))
@pytest.fixture
def mock_missing_module(self, monkeypatch: MonkeyPatch) -> None:
import_orig = builtins.__import__
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any:
if name == "pandas":
raise ImportError()
return import_orig(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", mocked_import)
def test_mock_missing_module(
self, dataset: EDDMapS, mock_missing_module: None
) -> None:
with pytest.raises(
ImportError,
match="pandas is not installed and is required to use this dataset",
):
EDDMapS(dataset.root)
def test_invalid_query(self, dataset: EDDMapS) -> None:
query = BoundingBox(0, 0, 0, 0, 0, 0)
with pytest.raises(
IndexError, match="query: .* not found in index with bounds:"
):
dataset[query]

Просмотреть файл

@ -28,6 +28,7 @@ from .cowc import COWC, COWCCounting, COWCDetection
from .cv4a_kenya_crop_type import CV4AKenyaCropType
from .cyclone import TropicalCycloneWindEstimation
from .dfc2022 import DFC2022
from .eddmaps import EDDMapS
from .enviroatlas import EnviroAtlas
from .esri2020 import Esri2020
from .etci2021 import ETCI2021
@ -118,6 +119,7 @@ __all__ = (
"ChesapeakeWV",
"ChesapeakeCVPR",
"CMSGlobalMangroveCanopy",
"EDDMapS",
"Esri2020",
"EUDEM",
"GBIF",

Просмотреть файл

@ -0,0 +1,116 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""Dataset for EDDMapS."""
import os
import sys
from typing import Any, Dict
import numpy as np
from rasterio.crs import CRS
from .geo import GeoDataset
from .utils import BoundingBox, disambiguate_timestamp
class EDDMapS(GeoDataset):
"""Dataset for EDDMapS.
`EDDMapS <https://www.eddmaps.org/>`_, Early Detection and Distribution Mapping
System, is a web-based mapping system for documenting invasive species and pest
distribution. Launched in 2005 by the Center for Invasive Species and Ecosystem
Health at the University of Georgia, it was originally designed as a tool for
state Exotic Pest Plant Councils to develop more complete distribution data of
invasive species. Since then, the program has expanded to include the entire US
and Canada as well as to document certain native pest species.
EDDMapS query results can be downloaded in CSV, KML, or Shapefile format. This
dataset currently only supports CSV files.
If you use an EDDMapS dataset in your research, please cite it like so:
* EDDMapS. *YEAR*. Early Detection & Distribution Mapping System. The University of
Georgia - Center for Invasive Species and Ecosystem Health. Available online at
http://www.eddmaps.org/; last accessed *DATE*.
.. note::
This dataset requires the following additional library to be installed:
* `pandas <https://pypi.org/project/pandas/>`_ to load CSV files
.. versionadded:: 0.3
"""
res = 0
_crs = CRS.from_epsg(4326) # Lat/Lon
def __init__(self, root: str = "data") -> None:
"""Initialize a new Dataset instance.
Args:
root: root directory where dataset can be found
Raises:
FileNotFoundError: if no files are found in ``root``
ImportError: if pandas is not installed
"""
super().__init__()
self.root = root
filepath = os.path.join(root, "mappings.csv")
if not os.path.exists(filepath):
raise FileNotFoundError(f"Dataset not found in `root={self.root}`")
try:
import pandas as pd # noqa: F401
except ImportError:
raise ImportError(
"pandas is not installed and is required to use this dataset"
)
# Read CSV file
data = pd.read_csv(
filepath, engine="c", usecols=["ObsDate", "Latitude", "Longitude"]
)
# Convert from pandas DataFrame to rtree Index
i = 0
for date, y, x in data.itertuples(index=False, name=None):
# Skip rows without lat/lon
if np.isnan(y) or np.isnan(x):
continue
if not pd.isna(date):
mint, maxt = disambiguate_timestamp(date, "%m-%d-%y")
else:
mint, maxt = 0, sys.maxsize
coords = (x, x, y, y, mint, maxt)
self.index.insert(i, coords)
i += 1
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
"""Retrieve metadata indexed by query.
Args:
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
Returns:
sample of metadata at that index
Raises:
IndexError: if query is not found in the index
"""
hits = self.index.intersection(tuple(query), objects=True)
bboxes = [hit.bbox for hit in hits]
if not bboxes:
raise IndexError(
f"query: {query} not found in index with bounds: {self.bounds}"
)
sample = {"crs": self.crs, "bbox": bboxes}
return sample