зеркало из https://github.com/microsoft/torchgeo.git
Add EDDMapS dataset (#533)
* Add EDDMapS dataset * Mypy hack * Test fix
This commit is contained in:
Родитель
369b36122a
Коммит
827985ad0a
|
@ -52,6 +52,11 @@ Cropland Data Layer (CDL)
|
|||
|
||||
.. autoclass:: CDL
|
||||
|
||||
EDDMapS
|
||||
^^^^^^^
|
||||
|
||||
.. autoclass:: EDDMapS
|
||||
|
||||
EnviroAtlas
|
||||
^^^^^^^^^^^
|
||||
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import pandas as pd
|
||||
|
||||
filename = "mappings.csv"
|
||||
|
||||
size = 3
|
||||
data = {
|
||||
"gbifID": [""] * size,
|
||||
"decimalLatitude": [41.881832] * size,
|
||||
"decimalLongitude": [""] + [-87.623177] * (size - 1),
|
||||
"objectid": [""] * size,
|
||||
"reporter": [""] * size,
|
||||
"RecOwner": [""] * size,
|
||||
"SciName": ["Homo sapiens"] * size,
|
||||
"ComName": ["human"] * size,
|
||||
"Nativity": ["Native"] * size,
|
||||
"OccStatus": ["Detected"] * size,
|
||||
"Status": ["Positive"] * size,
|
||||
"ObsDate": ["", "", "05-07-22"],
|
||||
"DateEnt": ["05-07-22"] * size,
|
||||
"DateUp": ["05-07-22"] * size,
|
||||
"Location": ["Chicago, Illinois, United States"] * size,
|
||||
"Latitude": [41.881832] * size,
|
||||
"Longitude": [""] + [-87.623177] * (size - 1),
|
||||
"Datum": ["WGS84"] * size,
|
||||
"Method": [""] * size,
|
||||
"CoordAcc": [""] * size,
|
||||
"DataType": [""] * size,
|
||||
"Centroid": [""] * size,
|
||||
"Abundance": [""] * size,
|
||||
"InfestAcre": [""] * size,
|
||||
"GrossAcre": [""] * size,
|
||||
"Percentcov": [""] * size,
|
||||
"Density": [""] * size,
|
||||
"Quantity": [""] * size,
|
||||
"QuantityU": [""] * size,
|
||||
"APPXQuant": [""] * size,
|
||||
"NumCollect": [""] * size,
|
||||
"Smallest": [""] * size,
|
||||
"Largest": [""] * size,
|
||||
"Incidence": [""] * size,
|
||||
"Severity": [""] * size,
|
||||
"Host": [""] * size,
|
||||
"Host_Name": [""] * size,
|
||||
"HostPheno": [""] * size,
|
||||
"HostDamage": [""] * size,
|
||||
"ManageStat": ["Unknown"] * size,
|
||||
"PopStat": [""] * size,
|
||||
"Habitat": [""] * size,
|
||||
"LocalOwner": [""] * size,
|
||||
"Site": [""] * size,
|
||||
"RecBasis": [""] * size,
|
||||
"Museum": [""] * size,
|
||||
"MuseumRec": [""] * size,
|
||||
"Voucher": [""] * size,
|
||||
"ObsIDer": [""] * size,
|
||||
"CollectTme": [""] * size,
|
||||
"UUID": [""] * size,
|
||||
"OrgSrcID": [""] * size,
|
||||
"OrigName": ["Homo sapiens"] * size,
|
||||
"RecSrcTyp": ["Bulk Data"] * size,
|
||||
"Surveyor": [""] * size,
|
||||
"DateAcc": [""] * size,
|
||||
"VisitType": [""] * size,
|
||||
"DataMthd": [""] * size,
|
||||
"TrapType": [""] * size,
|
||||
"NumTraps": [""] * size,
|
||||
"TargetName": [""] * size,
|
||||
"TargetCnt": [""] * size,
|
||||
"TargetRnge": [""] * size,
|
||||
"Phenology": [""] * size,
|
||||
"LifeStatus": [""] * size,
|
||||
"Sex": [""] * size,
|
||||
"PID": [""] * size,
|
||||
"WaterName": [""] * size,
|
||||
"WaterType": [""] * size,
|
||||
"Substrate": [""] * size,
|
||||
"TreatArea": [""] * size,
|
||||
"PlantTreat": [""] * size,
|
||||
"TreatComm": [""] * size,
|
||||
"Reference": [""] * size,
|
||||
"Locality": [""] * size,
|
||||
"Comments": [""] * size,
|
||||
"ReviewDate": ["05-07-22"] * size,
|
||||
"Reviewer": ["Charles Darwin"] * size,
|
||||
"VerifyMthd": ["Bulk Verified"] * size,
|
||||
"Verified": ["Verified"] * size,
|
||||
"IDCred": ["Credible"] * size,
|
||||
"ReviewComm": [""] * size,
|
||||
}
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
df.to_csv(filename, index=False)
|
|
@ -0,0 +1,4 @@
|
|||
gbifID,decimalLatitude,decimalLongitude,objectid,reporter,RecOwner,SciName,ComName,Nativity,OccStatus,Status,ObsDate,DateEnt,DateUp,Location,Latitude,Longitude,Datum,Method,CoordAcc,DataType,Centroid,Abundance,InfestAcre,GrossAcre,Percentcov,Density,Quantity,QuantityU,APPXQuant,NumCollect,Smallest,Largest,Incidence,Severity,Host,Host_Name,HostPheno,HostDamage,ManageStat,PopStat,Habitat,LocalOwner,Site,RecBasis,Museum,MuseumRec,Voucher,ObsIDer,CollectTme,UUID,OrgSrcID,OrigName,RecSrcTyp,Surveyor,DateAcc,VisitType,DataMthd,TrapType,NumTraps,TargetName,TargetCnt,TargetRnge,Phenology,LifeStatus,Sex,PID,WaterName,WaterType,Substrate,TreatArea,PlantTreat,TreatComm,Reference,Locality,Comments,ReviewDate,Reviewer,VerifyMthd,Verified,IDCred,ReviewComm
|
||||
,41.881832,,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
|
||||
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
|
||||
,41.881832,-87.623177,,,,Homo sapiens,human,Native,Detected,Positive,05-07-22,05-07-22,05-07-22,"Chicago, Illinois, United States",41.881832,-87.623177,WGS84,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,Homo sapiens,Bulk Data,,,,,,,,,,,,,,,,,,,,,,,05-07-22,Charles Darwin,Bulk Verified,Verified,Credible,
|
|
|
@ -0,0 +1,67 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import builtins
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
|
||||
from torchgeo.datasets import BoundingBox, EDDMapS, IntersectionDataset, UnionDataset
|
||||
|
||||
pytest.importorskip("pandas", minversion="0.23.2")
|
||||
|
||||
|
||||
class TestEDDMapS:
|
||||
@pytest.fixture(scope="class")
|
||||
def dataset(self) -> EDDMapS:
|
||||
root = os.path.join("tests", "data", "eddmaps")
|
||||
return EDDMapS(root)
|
||||
|
||||
def test_getitem(self, dataset: EDDMapS) -> None:
|
||||
x = dataset[dataset.bounds]
|
||||
assert isinstance(x, dict)
|
||||
|
||||
def test_len(self, dataset: EDDMapS) -> None:
|
||||
assert len(dataset) == 2
|
||||
|
||||
def test_and(self, dataset: EDDMapS) -> None:
|
||||
ds = dataset & dataset
|
||||
assert isinstance(ds, IntersectionDataset)
|
||||
|
||||
def test_or(self, dataset: EDDMapS) -> None:
|
||||
ds = dataset | dataset
|
||||
assert isinstance(ds, UnionDataset)
|
||||
|
||||
def test_no_data(self, tmp_path: Path) -> None:
|
||||
with pytest.raises(FileNotFoundError, match="Dataset not found"):
|
||||
EDDMapS(str(tmp_path))
|
||||
|
||||
@pytest.fixture
|
||||
def mock_missing_module(self, monkeypatch: MonkeyPatch) -> None:
|
||||
import_orig = builtins.__import__
|
||||
|
||||
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any:
|
||||
if name == "pandas":
|
||||
raise ImportError()
|
||||
return import_orig(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "__import__", mocked_import)
|
||||
|
||||
def test_mock_missing_module(
|
||||
self, dataset: EDDMapS, mock_missing_module: None
|
||||
) -> None:
|
||||
with pytest.raises(
|
||||
ImportError,
|
||||
match="pandas is not installed and is required to use this dataset",
|
||||
):
|
||||
EDDMapS(dataset.root)
|
||||
|
||||
def test_invalid_query(self, dataset: EDDMapS) -> None:
|
||||
query = BoundingBox(0, 0, 0, 0, 0, 0)
|
||||
with pytest.raises(
|
||||
IndexError, match="query: .* not found in index with bounds:"
|
||||
):
|
||||
dataset[query]
|
|
@ -28,6 +28,7 @@ from .cowc import COWC, COWCCounting, COWCDetection
|
|||
from .cv4a_kenya_crop_type import CV4AKenyaCropType
|
||||
from .cyclone import TropicalCycloneWindEstimation
|
||||
from .dfc2022 import DFC2022
|
||||
from .eddmaps import EDDMapS
|
||||
from .enviroatlas import EnviroAtlas
|
||||
from .esri2020 import Esri2020
|
||||
from .etci2021 import ETCI2021
|
||||
|
@ -118,6 +119,7 @@ __all__ = (
|
|||
"ChesapeakeWV",
|
||||
"ChesapeakeCVPR",
|
||||
"CMSGlobalMangroveCanopy",
|
||||
"EDDMapS",
|
||||
"Esri2020",
|
||||
"EUDEM",
|
||||
"GBIF",
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""Dataset for EDDMapS."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
from rasterio.crs import CRS
|
||||
|
||||
from .geo import GeoDataset
|
||||
from .utils import BoundingBox, disambiguate_timestamp
|
||||
|
||||
|
||||
class EDDMapS(GeoDataset):
|
||||
"""Dataset for EDDMapS.
|
||||
|
||||
`EDDMapS <https://www.eddmaps.org/>`_, Early Detection and Distribution Mapping
|
||||
System, is a web-based mapping system for documenting invasive species and pest
|
||||
distribution. Launched in 2005 by the Center for Invasive Species and Ecosystem
|
||||
Health at the University of Georgia, it was originally designed as a tool for
|
||||
state Exotic Pest Plant Councils to develop more complete distribution data of
|
||||
invasive species. Since then, the program has expanded to include the entire US
|
||||
and Canada as well as to document certain native pest species.
|
||||
|
||||
EDDMapS query results can be downloaded in CSV, KML, or Shapefile format. This
|
||||
dataset currently only supports CSV files.
|
||||
|
||||
If you use an EDDMapS dataset in your research, please cite it like so:
|
||||
|
||||
* EDDMapS. *YEAR*. Early Detection & Distribution Mapping System. The University of
|
||||
Georgia - Center for Invasive Species and Ecosystem Health. Available online at
|
||||
http://www.eddmaps.org/; last accessed *DATE*.
|
||||
|
||||
.. note::
|
||||
This dataset requires the following additional library to be installed:
|
||||
|
||||
* `pandas <https://pypi.org/project/pandas/>`_ to load CSV files
|
||||
|
||||
.. versionadded:: 0.3
|
||||
"""
|
||||
|
||||
res = 0
|
||||
_crs = CRS.from_epsg(4326) # Lat/Lon
|
||||
|
||||
def __init__(self, root: str = "data") -> None:
|
||||
"""Initialize a new Dataset instance.
|
||||
|
||||
Args:
|
||||
root: root directory where dataset can be found
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: if no files are found in ``root``
|
||||
ImportError: if pandas is not installed
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.root = root
|
||||
|
||||
filepath = os.path.join(root, "mappings.csv")
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError(f"Dataset not found in `root={self.root}`")
|
||||
|
||||
try:
|
||||
import pandas as pd # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pandas is not installed and is required to use this dataset"
|
||||
)
|
||||
|
||||
# Read CSV file
|
||||
data = pd.read_csv(
|
||||
filepath, engine="c", usecols=["ObsDate", "Latitude", "Longitude"]
|
||||
)
|
||||
|
||||
# Convert from pandas DataFrame to rtree Index
|
||||
i = 0
|
||||
for date, y, x in data.itertuples(index=False, name=None):
|
||||
# Skip rows without lat/lon
|
||||
if np.isnan(y) or np.isnan(x):
|
||||
continue
|
||||
|
||||
if not pd.isna(date):
|
||||
mint, maxt = disambiguate_timestamp(date, "%m-%d-%y")
|
||||
else:
|
||||
mint, maxt = 0, sys.maxsize
|
||||
|
||||
coords = (x, x, y, y, mint, maxt)
|
||||
self.index.insert(i, coords)
|
||||
i += 1
|
||||
|
||||
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
|
||||
"""Retrieve metadata indexed by query.
|
||||
|
||||
Args:
|
||||
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
|
||||
|
||||
Returns:
|
||||
sample of metadata at that index
|
||||
|
||||
Raises:
|
||||
IndexError: if query is not found in the index
|
||||
"""
|
||||
hits = self.index.intersection(tuple(query), objects=True)
|
||||
bboxes = [hit.bbox for hit in hits]
|
||||
|
||||
if not bboxes:
|
||||
raise IndexError(
|
||||
f"query: {query} not found in index with bounds: {self.bounds}"
|
||||
)
|
||||
|
||||
sample = {"crs": self.crs, "bbox": bboxes}
|
||||
|
||||
return sample
|
Загрузка…
Ссылка в новой задаче