This commit is contained in:
Adam J. Stewart 2022-05-14 21:29:34 -05:00 коммит произвёл GitHub
Родитель 1f2006e62c
Коммит 369b36122a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 265 добавлений и 0 удалений

Просмотреть файл

@ -77,6 +77,11 @@ GlobBiomass
.. autoclass:: GlobBiomass
iNaturalist
^^^^^^^^^^^
.. autoclass:: INaturalist
Landsat
^^^^^^^

58
tests/data/inaturalist/data.py Executable file
Просмотреть файл

@ -0,0 +1,58 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import pandas as pd
filename = "observations-012345.csv"
# User can select which columns to export. The following are the default columns.
# Not all columns may exist in the actual dataset.
size = 4
data = {
"id": [""] * size,
"observed_on_string": [""] * size,
"observed_on": ["", "", "2022-05-07", "2022-05-07"],
"time_observed_at": ["", "", "", "2022-05-07 11:02:53 +0100"],
"time_zone": ["Central Time (US & Canada)"] * size,
"user_id": [123] * size,
"user_login": ["darwin"] * size,
"created_at": ["2022-05-07 11:02:53 +0100"] * size,
"updated_at": ["2022-05-07 11:02:53 +0100"] * size,
"quality_grade": ["research"] * size,
"license": ["CCO"] * size,
"url": ["https://inaturalist.org/observations/123"] * size,
"image_url": [
"https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg"
]
* size,
"sound_url": ["https://static.inaturalist.org/sounds/123.m4a?123"] * size,
"tag_list": ["Chicago"] * size,
"description": [""] * size,
"num_identification_agreements": [1] * size,
"num_identification_disagreements": [0] * size,
"captive_cultivated": ["false"] * size,
"oauth_application_id": [""] * size,
"place_guess": ["Chicago"] * size,
"latitude": [41.881832] * size,
"longitude": [""] + [-87.623177] * (size - 1),
"positional_accuracy": [5] * size,
"private_place_guess": [""] * size,
"private_latitude": [""] * size,
"private_longitude": [""] * size,
"public_positional_accuracy": [5] * size,
"geoprivacy": [""] * size,
"taxon_geoprivacy": [""] * size,
"coordinates_obscured": ["false"] * size,
"positioning_method": ["gps"] * size,
"positioning_device": ["gps"] * size,
"species_guess": ["Homo sapiens"] * size,
"scientific_name": ["Homo sapiens"] * size,
"common_name": ["human"] * size,
"iconic_taxon_name": ["Animalia"] * size,
"taxon_id": [123] * size,
}
df = pd.DataFrame(data)
df.to_csv(filename, index=False)

Просмотреть файл

@ -0,0 +1,5 @@
id,observed_on_string,observed_on,time_observed_at,time_zone,user_id,user_login,created_at,updated_at,quality_grade,license,url,image_url,sound_url,tag_list,description,num_identification_agreements,num_identification_disagreements,captive_cultivated,oauth_application_id,place_guess,latitude,longitude,positional_accuracy,private_place_guess,private_latitude,private_longitude,public_positional_accuracy,geoprivacy,taxon_geoprivacy,coordinates_obscured,positioning_method,positioning_device,species_guess,scientific_name,common_name,iconic_taxon_name,taxon_id
,,,,Central Time (US & Canada),123,darwin,2022-05-07 11:02:53 +0100,2022-05-07 11:02:53 +0100,research,CCO,https://inaturalist.org/observations/123,https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg,https://static.inaturalist.org/sounds/123.m4a?123,Chicago,,1,0,false,,Chicago,41.881832,,5,,,,5,,,false,gps,gps,Homo sapiens,Homo sapiens,human,Animalia,123
,,,,Central Time (US & Canada),123,darwin,2022-05-07 11:02:53 +0100,2022-05-07 11:02:53 +0100,research,CCO,https://inaturalist.org/observations/123,https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg,https://static.inaturalist.org/sounds/123.m4a?123,Chicago,,1,0,false,,Chicago,41.881832,-87.623177,5,,,,5,,,false,gps,gps,Homo sapiens,Homo sapiens,human,Animalia,123
,,2022-05-07,,Central Time (US & Canada),123,darwin,2022-05-07 11:02:53 +0100,2022-05-07 11:02:53 +0100,research,CCO,https://inaturalist.org/observations/123,https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg,https://static.inaturalist.org/sounds/123.m4a?123,Chicago,,1,0,false,,Chicago,41.881832,-87.623177,5,,,,5,,,false,gps,gps,Homo sapiens,Homo sapiens,human,Animalia,123
,,2022-05-07,2022-05-07 11:02:53 +0100,Central Time (US & Canada),123,darwin,2022-05-07 11:02:53 +0100,2022-05-07 11:02:53 +0100,research,CCO,https://inaturalist.org/observations/123,https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg,https://static.inaturalist.org/sounds/123.m4a?123,Chicago,,1,0,false,,Chicago,41.881832,-87.623177,5,,,,5,,,false,gps,gps,Homo sapiens,Homo sapiens,human,Animalia,123
1 id observed_on_string observed_on time_observed_at time_zone user_id user_login created_at updated_at quality_grade license url image_url sound_url tag_list description num_identification_agreements num_identification_disagreements captive_cultivated oauth_application_id place_guess latitude longitude positional_accuracy private_place_guess private_latitude private_longitude public_positional_accuracy geoprivacy taxon_geoprivacy coordinates_obscured positioning_method positioning_device species_guess scientific_name common_name iconic_taxon_name taxon_id
2 Central Time (US & Canada) 123 darwin 2022-05-07 11:02:53 +0100 2022-05-07 11:02:53 +0100 research CCO https://inaturalist.org/observations/123 https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg https://static.inaturalist.org/sounds/123.m4a?123 Chicago 1 0 false Chicago 41.881832 5 5 false gps gps Homo sapiens Homo sapiens human Animalia 123
3 Central Time (US & Canada) 123 darwin 2022-05-07 11:02:53 +0100 2022-05-07 11:02:53 +0100 research CCO https://inaturalist.org/observations/123 https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg https://static.inaturalist.org/sounds/123.m4a?123 Chicago 1 0 false Chicago 41.881832 -87.623177 5 5 false gps gps Homo sapiens Homo sapiens human Animalia 123
4 2022-05-07 Central Time (US & Canada) 123 darwin 2022-05-07 11:02:53 +0100 2022-05-07 11:02:53 +0100 research CCO https://inaturalist.org/observations/123 https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg https://static.inaturalist.org/sounds/123.m4a?123 Chicago 1 0 false Chicago 41.881832 -87.623177 5 5 false gps gps Homo sapiens Homo sapiens human Animalia 123
5 2022-05-07 2022-05-07 11:02:53 +0100 Central Time (US & Canada) 123 darwin 2022-05-07 11:02:53 +0100 2022-05-07 11:02:53 +0100 research CCO https://inaturalist.org/observations/123 https://inaturalist-open-data.s3.amazonaws.com/photos/123/medium.jpg https://static.inaturalist.org/sounds/123.m4a?123 Chicago 1 0 false Chicago 41.881832 -87.623177 5 5 false gps gps Homo sapiens Homo sapiens human Animalia 123

Просмотреть файл

@ -0,0 +1,72 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import builtins
import os
from pathlib import Path
from typing import Any
import pytest
from _pytest.monkeypatch import MonkeyPatch
from torchgeo.datasets import (
BoundingBox,
INaturalist,
IntersectionDataset,
UnionDataset,
)
pytest.importorskip("pandas", minversion="0.23.2")
class TestINaturalist:
@pytest.fixture(scope="class")
def dataset(self) -> INaturalist:
root = os.path.join("tests", "data", "inaturalist")
return INaturalist(root)
def test_getitem(self, dataset: INaturalist) -> None:
x = dataset[dataset.bounds]
assert isinstance(x, dict)
def test_len(self, dataset: INaturalist) -> None:
assert len(dataset) == 3
def test_and(self, dataset: INaturalist) -> None:
ds = dataset & dataset
assert isinstance(ds, IntersectionDataset)
def test_or(self, dataset: INaturalist) -> None:
ds = dataset | dataset
assert isinstance(ds, UnionDataset)
def test_no_data(self, tmp_path: Path) -> None:
with pytest.raises(FileNotFoundError, match="Dataset not found"):
INaturalist(str(tmp_path))
@pytest.fixture
def mock_missing_module(self, monkeypatch: MonkeyPatch) -> None:
import_orig = builtins.__import__
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any:
if name == "pandas":
raise ImportError()
return import_orig(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", mocked_import)
def test_mock_missing_module(
self, dataset: INaturalist, mock_missing_module: None
) -> None:
with pytest.raises(
ImportError,
match="pandas is not installed and is required to use this dataset",
):
INaturalist(dataset.root)
def test_invalid_query(self, dataset: INaturalist) -> None:
query = BoundingBox(0, 0, 0, 0, 0, 0)
with pytest.raises(
IndexError, match="query: .* not found in index with bounds:"
):
dataset[query]

Просмотреть файл

@ -48,6 +48,7 @@ from .geo import (
from .gid15 import GID15
from .globbiomass import GlobBiomass
from .idtrees import IDTReeS
from .inaturalist import INaturalist
from .inria import InriaAerialImageLabeling
from .landcoverai import LandCoverAI
from .landsat import (
@ -121,6 +122,7 @@ __all__ = (
"EUDEM",
"GBIF",
"GlobBiomass",
"INaturalist",
"Landsat",
"Landsat1",
"Landsat2",

Просмотреть файл

@ -0,0 +1,123 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""Dataset for iNaturalist."""
import glob
import os
import sys
from typing import Any, Dict
from rasterio.crs import CRS
from .geo import GeoDataset
from .utils import BoundingBox, disambiguate_timestamp
class INaturalist(GeoDataset):
"""Dataset for iNaturalist.
`iNaturalist <https://www.inaturalist.org/>`_ is a joint initiative of the
California Academy of Sciences and the National Geographic Society. It allows
citizen scientists to upload observations of organisms that can be downloaded by
scientists and researchers.
If you use an iNaturalist dataset in your research, please cite it according to:
* https://www.inaturalist.org/pages/help#cite
.. note::
This dataset requires the following additional library to be installed:
* `pandas <https://pypi.org/project/pandas/>`_ to load CSV files
.. versionadded:: 0.3
"""
res = 0
_crs = CRS.from_epsg(4326) # Lat/Lon
def __init__(self, root: str = "data") -> None:
"""Initialize a new Dataset instance.
Args:
root: root directory where dataset can be found
Raises:
FileNotFoundError: if no files are found in ``root``
ImportError: if pandas is not installed
"""
super().__init__()
self.root = root
files = glob.glob(os.path.join(root, "**.csv"))
if not files:
raise FileNotFoundError(f"Dataset not found in `root={self.root}`")
try:
import pandas as pd # noqa: F401
except ImportError:
raise ImportError(
"pandas is not installed and is required to use this dataset"
)
# Read CSV file
data = pd.read_csv(
files[0],
engine="c",
usecols=["observed_on", "time_observed_at", "latitude", "longitude"],
)
# Dataset contains many possible timestamps:
#
# * observed_on_string: no consistent format (can't use)
# * observed_on: day precision (better)
# * time_observed_at: second precision (best)
# * created_at: when observation was submitted (shouldn't use)
# * updated_at: when submission was updated (shouldn't use)
#
# The created_at/updated_at timestamps can be years after the actual submission,
# so they shouldn't be used, even if observed_on/time_observed_at are missing.
# Convert from pandas DataFrame to rtree Index
i = 0
for date, time, y, x in data.itertuples(index=False, name=None):
# Skip rows without lat/lon
if pd.isna(y) or pd.isna(x):
continue
if not pd.isna(time):
mint, maxt = disambiguate_timestamp(time, "%Y-%m-%d %H:%M:%S %z")
elif not pd.isna(date):
mint, maxt = disambiguate_timestamp(date, "%Y-%m-%d")
else:
mint, maxt = 0, sys.maxsize
coords = (x, x, y, y, mint, maxt)
self.index.insert(i, coords)
i += 1
def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
"""Retrieve metadata indexed by query.
Args:
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
Returns:
sample of metadata at that index
Raises:
IndexError: if query is not found in the index
"""
hits = self.index.intersection(tuple(query), objects=True)
bboxes = [hit.bbox for hit in hits]
if not bboxes:
raise IndexError(
f"query: {query} not found in index with bounds: {self.bounds}"
)
sample = {"crs": self.crs, "bbox": bboxes}
return sample