diff --git a/docs/api/datamodules.rst b/docs/api/datamodules.rst
new file mode 100644
index 000000000..ad47f9cd6
--- /dev/null
+++ b/docs/api/datamodules.rst
@@ -0,0 +1,105 @@
+torchgeo.datamodules
+====================
+
+.. module:: torchgeo.datamodules
+
+Geospatial DataModules
+----------------------
+
+Chesapeake Bay High-Resolution Land Cover Project
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: ChesapeakeCVPRDataModule
+
+National Agriculture Imagery Program (NAIP)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: NAIPChesapeakeDataModule
+
+Non-geospatial DataModules
+--------------------------
+
+BigEarthNet
+^^^^^^^^^^^
+
+.. autoclass:: BigEarthNetDataModule
+
+Cars Overhead With Context (COWC)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: COWCCountingDataModule
+
+ETCI2021 Flood Detection
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: ETCI2021DataModule
+
+EuroSAT
+^^^^^^^
+
+.. autoclass:: EuroSATDataModule
+
+FAIR1M (Fine-grAined object recognItion in high-Resolution imagery)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: FAIR1MDataModule
+
+LandCover.ai (Land Cover from Aerial Imagery)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: LandCoverAIDataModule
+
+LoveDA (Land-cOVEr Domain Adaptive semantic segmentation)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: LoveDADataModule
+
+NASA Marine Debris
+^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: NASAMarineDebrisDataModule
+
+OSCD (Onera Satellite Change Detection)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: OSCDDataModule
+
+Potsdam
+^^^^^^^
+
+.. autoclass:: Potsdam2DDataModule
+
+RESISC45 (Remote Sensing Image Scene Classification)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: RESISC45DataModule
+
+SEN12MS
+^^^^^^^
+
+.. autoclass:: SEN12MSDataModule
+
+So2Sat
+^^^^^^
+
+.. autoclass:: So2SatDataModule
+
+Tropical Cyclone Wind Estimation Competition
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: CycloneDataModule
+
+UC Merced
+^^^^^^^^^
+
+.. autoclass:: UCMercedDataModule
+
+Vaihingen
+^^^^^^^^^
+
+.. autoclass:: Vaihingen2DDataModule
+
+xView2
+^^^^^^
+
+.. autoclass:: XView2DataModule
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
index 4b30c762a..f3df706a8 100644
--- a/docs/api/datasets.rst
+++ b/docs/api/datasets.rst
@@ -31,7 +31,6 @@ Chesapeake Bay High-Resolution Land Cover Project
 .. autoclass:: ChesapeakeVA
 .. autoclass:: ChesapeakeWV
 .. autoclass:: ChesapeakeCVPR
-.. autoclass:: ChesapeakeCVPRDataModule
 
 Cropland Data Layer (CDL)
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -57,7 +56,6 @@ National Agriculture Imagery Program (NAIP)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: NAIP
-.. autoclass:: NAIPChesapeakeDataModule
 
 Sentinel
 ^^^^^^^^
@@ -86,7 +84,6 @@ BigEarthNet
 ^^^^^^^^^^^
 
 .. autoclass:: BigEarthNet
-.. autoclass:: BigEarthNetDataModule
 
 Cars Overhead With Context (COWC)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -94,7 +91,6 @@ Cars Overhead With Context (COWC)
 .. autoclass:: COWC
 .. autoclass:: COWCCounting
 .. autoclass:: COWCDetection
-.. autoclass:: COWCCountingDataModule
 
 CV4A Kenya Crop Type Competition
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -105,19 +101,16 @@ ETCI2021 Flood Detection
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: ETCI2021
-.. autoclass:: ETCI2021DataModule
 
 EuroSAT
-^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^
 
 .. autoclass:: EuroSAT
-.. autoclass:: EuroSATDataModule
 
 FAIR1M (Fine-grAined object recognItion in high-Resolution imagery)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: FAIR1M
-.. autoclass:: FAIR1MDataModule
 
 GID-15 (Gaofen Image Dataset)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -133,7 +126,6 @@ LandCover.ai (Land Cover from Aerial Imagery)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: LandCoverAI
-.. autoclass:: LandCoverAIDataModule
 
 LEVIR-CD+ (LEVIR Change Detection +)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -144,19 +136,16 @@ LoveDA (Land-cOVEr Domain Adaptive semantic segmentation)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: LoveDA
-.. autoclass:: LoveDADataModule
 
 NASA Marine Debris
 ^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: NASAMarineDebris
-.. autoclass:: NASAMarineDebrisDataModule
 
 OSCD (Onera Satellite Change Detection)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: OSCD
-.. autoclass:: OSCDDataModule
 
 PatternNet
 ^^^^^^^^^^
@@ -167,13 +156,11 @@ Potsdam
 ^^^^^^^
 
 .. autoclass:: Potsdam2D
-.. autoclass:: Potsdam2DDataModule
 
 RESISC45 (Remote Sensing Image Scene Classification)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: RESISC45
-.. autoclass:: RESISC45DataModule
 
 Seasonal Contrast
 ^^^^^^^^^^^^^^^^^
@@ -184,13 +171,11 @@ SEN12MS
 ^^^^^^^
 
 .. autoclass:: SEN12MS
-.. autoclass:: SEN12MSDataModule
 
 So2Sat
 ^^^^^^
 
 .. autoclass:: So2Sat
-.. autoclass:: So2SatDataModule
 
 SpaceNet
 ^^^^^^^^
@@ -206,30 +191,26 @@ Tropical Cyclone Wind Estimation Competition
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autoclass:: TropicalCycloneWindEstimation
-.. autoclass:: CycloneDataModule
+
+UC Merced
+^^^^^^^^^
+
+.. autoclass:: UCMerced
 
 Vaihingen
 ^^^^^^^^^
 
 .. autoclass:: Vaihingen2D
-.. autoclass:: Vaihingen2DDataModule
 
 NWPU VHR-10
 ^^^^^^^^^^^
 
 .. autoclass:: VHR10
 
-UC Merced
-^^^^^^^^^
-
-.. autoclass:: UCMerced
-.. autoclass:: UCMercedDataModule
-
 xView2
 ^^^^^^
 
 .. autoclass:: XView2
-.. autoclass:: XView2DataModule
 
 ZueriCrop
 ^^^^^^^^^
diff --git a/docs/index.rst b/docs/index.rst
index d8fe309a2..d2e85fa5d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -15,6 +15,7 @@ torchgeo
    :maxdepth: 2
    :caption: Package Reference
 
+   api/datamodules
    api/datasets
    api/models
    api/samplers
diff --git a/experiments/test_chesapeakecvpr_models.py b/experiments/test_chesapeakecvpr_models.py
index 249dd20bc..38ab24fa8 100755
--- a/experiments/test_chesapeakecvpr_models.py
+++ b/experiments/test_chesapeakecvpr_models.py
@@ -11,7 +11,7 @@ import os
 import pytorch_lightning as pl
 import torch
 
-from torchgeo.datasets import ChesapeakeCVPRDataModule
+from torchgeo.datamodules import ChesapeakeCVPRDataModule
 from torchgeo.trainers.chesapeake import ChesapeakeCVPRSegmentationTask
 
 ALL_TEST_SPLITS = [["de-val"], ["pa-test"], ["ny-test"], ["pa-test", "ny-test"]]
diff --git a/pyproject.toml b/pyproject.toml
index cfb341315..f449ed089 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,7 +73,7 @@ strict_equality = true
 
 [tool.pydocstyle]
 convention = "google"
-match_dir = "(datasets|models|samplers|torchgeo|trainers|transforms)"
+match_dir = "(datamodules|datasets|models|samplers|torchgeo|trainers|transforms)"
 
 [tool.pytest.ini_options]
 # Skip slow tests by default
diff --git a/tests/datamodules/__init__.py b/tests/datamodules/__init__.py
new file mode 100644
index 000000000..5b7f7a925
--- /dev/null
+++ b/tests/datamodules/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
diff --git a/tests/datamodules/test_bigearthnet.py b/tests/datamodules/test_bigearthnet.py
new file mode 100644
index 000000000..b460877a7
--- /dev/null
+++ b/tests/datamodules/test_bigearthnet.py
@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import BigEarthNetDataModule
+
+
+class TestBigEarthNetDataModule:
+    @pytest.fixture(scope="class", params=["s1", "s2", "all"])
+    def datamodule(self, request: SubRequest) -> BigEarthNetDataModule:
+        bands = request.param
+        root = os.path.join("tests", "data", "bigearthnet")
+        num_classes = 19
+        batch_size = 1
+        num_workers = 0
+        dm = BigEarthNetDataModule(root, bands, num_classes, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: BigEarthNetDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: BigEarthNetDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: BigEarthNetDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_chesapeake.py b/tests/datamodules/test_chesapeake.py
new file mode 100644
index 000000000..f04d470b7
--- /dev/null
+++ b/tests/datamodules/test_chesapeake.py
@@ -0,0 +1,52 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+import torch
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import ChesapeakeCVPRDataModule
+
+
+class TestChesapeakeCVPRDataModule:
+    @pytest.fixture(scope="class", params=[5, 7])
+    def datamodule(self, request: SubRequest) -> ChesapeakeCVPRDataModule:
+        dm = ChesapeakeCVPRDataModule(
+            os.path.join("tests", "data", "chesapeake", "cvpr"),
+            ["de-test"],
+            ["de-test"],
+            ["de-test"],
+            patch_size=32,
+            patches_per_tile=2,
+            batch_size=2,
+            num_workers=0,
+            class_set=request.param,
+        )
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: ChesapeakeCVPRDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: ChesapeakeCVPRDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: ChesapeakeCVPRDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
+
+    def test_nodata_check(self, datamodule: ChesapeakeCVPRDataModule) -> None:
+        nodata_check = datamodule.nodata_check(4)
+        sample = {
+            "image": torch.ones(1, 2, 2),  # type: ignore[attr-defined]
+            "mask": torch.ones(2, 2),  # type: ignore[attr-defined]
+        }
+        out = nodata_check(sample)
+        assert torch.equal(  # type: ignore[attr-defined]
+            out["image"], torch.zeros(1, 4, 4)  # type: ignore[attr-defined]
+        )
+        assert torch.equal(  # type: ignore[attr-defined]
+            out["mask"], torch.zeros(4, 4)  # type: ignore[attr-defined]
+        )
diff --git a/tests/datamodules/test_cowc.py b/tests/datamodules/test_cowc.py
new file mode 100644
index 000000000..8b6e974f3
--- /dev/null
+++ b/tests/datamodules/test_cowc.py
@@ -0,0 +1,30 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import COWCCountingDataModule
+
+
+class TestCOWCCountingDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> COWCCountingDataModule:
+        root = os.path.join("tests", "data", "cowc_counting")
+        seed = 0
+        batch_size = 1
+        num_workers = 0
+        dm = COWCCountingDataModule(root, seed, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: COWCCountingDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: COWCCountingDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: COWCCountingDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_cyclone.py b/tests/datamodules/test_cyclone.py
new file mode 100644
index 000000000..843cc1656
--- /dev/null
+++ b/tests/datamodules/test_cyclone.py
@@ -0,0 +1,30 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import CycloneDataModule
+
+
+class TestCycloneDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> CycloneDataModule:
+        root = os.path.join("tests", "data", "cyclone")
+        seed = 0
+        batch_size = 1
+        num_workers = 0
+        dm = CycloneDataModule(root, seed, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: CycloneDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: CycloneDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: CycloneDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_etci2021.py b/tests/datamodules/test_etci2021.py
new file mode 100644
index 000000000..b51e8daf1
--- /dev/null
+++ b/tests/datamodules/test_etci2021.py
@@ -0,0 +1,30 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import ETCI2021DataModule
+
+
+class TestETCI2021DataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> ETCI2021DataModule:
+        root = os.path.join("tests", "data", "etci2021")
+        seed = 0
+        batch_size = 2
+        num_workers = 0
+        dm = ETCI2021DataModule(root, seed, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: ETCI2021DataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: ETCI2021DataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: ETCI2021DataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_eurosat.py b/tests/datamodules/test_eurosat.py
new file mode 100644
index 000000000..a8a51cd7b
--- /dev/null
+++ b/tests/datamodules/test_eurosat.py
@@ -0,0 +1,29 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import EuroSATDataModule
+
+
+class TestEuroSATDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> EuroSATDataModule:
+        root = os.path.join("tests", "data", "eurosat")
+        batch_size = 1
+        num_workers = 0
+        dm = EuroSATDataModule(root, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: EuroSATDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: EuroSATDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: EuroSATDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_fair1m.py b/tests/datamodules/test_fair1m.py
new file mode 100644
index 000000000..1f19922f1
--- /dev/null
+++ b/tests/datamodules/test_fair1m.py
@@ -0,0 +1,30 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import FAIR1MDataModule
+
+
+class TestFAIR1MDataModule:
+    @pytest.fixture(scope="class", params=[True, False])
+    def datamodule(self) -> FAIR1MDataModule:
+        root = os.path.join("tests", "data", "fair1m")
+        batch_size = 2
+        num_workers = 0
+        dm = FAIR1MDataModule(
+            root, batch_size, num_workers, val_split_pct=0.33, test_split_pct=0.33
+        )
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: FAIR1MDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: FAIR1MDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: FAIR1MDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_landcoverai.py b/tests/datamodules/test_landcoverai.py
new file mode 100644
index 000000000..1d4f2e431
--- /dev/null
+++ b/tests/datamodules/test_landcoverai.py
@@ -0,0 +1,29 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import LandCoverAIDataModule
+
+
+class TestLandCoverAIDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> LandCoverAIDataModule:
+        root = os.path.join("tests", "data", "landcoverai")
+        batch_size = 2
+        num_workers = 0
+        dm = LandCoverAIDataModule(root, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: LandCoverAIDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: LandCoverAIDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: LandCoverAIDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_loveda.py b/tests/datamodules/test_loveda.py
new file mode 100644
index 000000000..c19e8cb0a
--- /dev/null
+++ b/tests/datamodules/test_loveda.py
@@ -0,0 +1,34 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import LoveDADataModule
+
+
+class TestLoveDADataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> LoveDADataModule:
+        root = os.path.join("tests", "data", "loveda")
+        batch_size = 2
+        num_workers = 0
+        scene = ["rural", "urban"]
+
+        dm = LoveDADataModule(
+            root_dir=root, scene=scene, batch_size=batch_size, num_workers=num_workers
+        )
+
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: LoveDADataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: LoveDADataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: LoveDADataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_naip.py b/tests/datamodules/test_naip.py
new file mode 100644
index 000000000..5f9d676f2
--- /dev/null
+++ b/tests/datamodules/test_naip.py
@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import NAIPChesapeakeDataModule
+
+
+class TestNAIPChesapeakeDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> NAIPChesapeakeDataModule:
+        dm = NAIPChesapeakeDataModule(
+            os.path.join("tests", "data", "naip"),
+            os.path.join("tests", "data", "chesapeake", "BAYWIDE"),
+            batch_size=2,
+            num_workers=0,
+        )
+        dm.patch_size = 32
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: NAIPChesapeakeDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: NAIPChesapeakeDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: NAIPChesapeakeDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_nasa_marine_debris.py b/tests/datamodules/test_nasa_marine_debris.py
new file mode 100644
index 000000000..eff571f95
--- /dev/null
+++ b/tests/datamodules/test_nasa_marine_debris.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import NASAMarineDebrisDataModule
+
+
+class TestNASAMarineDebrisDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> NASAMarineDebrisDataModule:
+        root = os.path.join("tests", "data", "nasa_marine_debris")
+        batch_size = 2
+        num_workers = 0
+        val_split_pct = 0.3
+        test_split_pct = 0.3
+        dm = NASAMarineDebrisDataModule(
+            root, batch_size, num_workers, val_split_pct, test_split_pct
+        )
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: NASAMarineDebrisDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: NASAMarineDebrisDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: NASAMarineDebrisDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_oscd.py b/tests/datamodules/test_oscd.py
new file mode 100644
index 000000000..7d090f99c
--- /dev/null
+++ b/tests/datamodules/test_oscd.py
@@ -0,0 +1,62 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import OSCDDataModule
+
+
+class TestOSCDDataModule:
+    @pytest.fixture(scope="class", params=zip(["all", "rgb"], [0.0, 0.5]))
+    def datamodule(self, request: SubRequest) -> OSCDDataModule:
+        bands, val_split_pct = request.param
+        patch_size = (2, 2)
+        num_patches_per_tile = 2
+        root = os.path.join("tests", "data", "oscd")
+        batch_size = 1
+        num_workers = 0
+        dm = OSCDDataModule(
+            root,
+            bands,
+            batch_size,
+            num_workers,
+            val_split_pct,
+            patch_size,
+            num_patches_per_tile,
+        )
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: OSCDDataModule) -> None:
+        sample = next(iter(datamodule.train_dataloader()))
+        assert sample["image"].shape[-2:] == sample["mask"].shape[-2:] == (2, 2)
+        assert sample["image"].shape[0] == sample["mask"].shape[0] == 2
+        if datamodule.bands == "all":
+            assert sample["image"].shape[1] == 26
+        else:
+            assert sample["image"].shape[1] == 6
+
+    def test_val_dataloader(self, datamodule: OSCDDataModule) -> None:
+        sample = next(iter(datamodule.val_dataloader()))
+        if datamodule.val_split_pct > 0.0:
+            assert (
+                sample["image"].shape[-2:] == sample["mask"].shape[-2:] == (1280, 1280)
+            )
+            assert sample["image"].shape[0] == sample["mask"].shape[0] == 1
+            if datamodule.bands == "all":
+                assert sample["image"].shape[1] == 26
+            else:
+                assert sample["image"].shape[1] == 6
+
+    def test_test_dataloader(self, datamodule: OSCDDataModule) -> None:
+        sample = next(iter(datamodule.test_dataloader()))
+        assert sample["image"].shape[-2:] == sample["mask"].shape[-2:] == (1280, 1280)
+        assert sample["image"].shape[0] == sample["mask"].shape[0] == 1
+        if datamodule.bands == "all":
+            assert sample["image"].shape[1] == 26
+        else:
+            assert sample["image"].shape[1] == 6
diff --git a/tests/datamodules/test_potsdam.py b/tests/datamodules/test_potsdam.py
new file mode 100644
index 000000000..f67be0fea
--- /dev/null
+++ b/tests/datamodules/test_potsdam.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import Potsdam2DDataModule
+
+
+class TestPotsdam2DDataModule:
+    @pytest.fixture(scope="class", params=[0.0, 0.5])
+    def datamodule(self, request: SubRequest) -> Potsdam2DDataModule:
+        root = os.path.join("tests", "data", "potsdam")
+        batch_size = 1
+        num_workers = 0
+        val_split_size = request.param
+        dm = Potsdam2DDataModule(
+            root, batch_size, num_workers, val_split_pct=val_split_size
+        )
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: Potsdam2DDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: Potsdam2DDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: Potsdam2DDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_resisc45.py b/tests/datamodules/test_resisc45.py
new file mode 100644
index 000000000..e1b9baa83
--- /dev/null
+++ b/tests/datamodules/test_resisc45.py
@@ -0,0 +1,29 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import RESISC45DataModule
+
+
+class TestRESISC45DataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> RESISC45DataModule:
+        root = os.path.join("tests", "data", "resisc45")
+        batch_size = 2
+        num_workers = 0
+        dm = RESISC45DataModule(root, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: RESISC45DataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: RESISC45DataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: RESISC45DataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_sen12ms.py b/tests/datamodules/test_sen12ms.py
new file mode 100644
index 000000000..7cd6df738
--- /dev/null
+++ b/tests/datamodules/test_sen12ms.py
@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import SEN12MSDataModule
+
+
+class TestSEN12MSDataModule:
+    @pytest.fixture(scope="class", params=["all", "s1", "s2-all", "s2-reduced"])
+    def datamodule(self, request: SubRequest) -> SEN12MSDataModule:
+        root = os.path.join("tests", "data", "sen12ms")
+        seed = 0
+        bands = request.param
+        batch_size = 1
+        num_workers = 0
+        dm = SEN12MSDataModule(root, seed, bands, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: SEN12MSDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: SEN12MSDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: SEN12MSDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_so2sat.py b/tests/datamodules/test_so2sat.py
new file mode 100644
index 000000000..2f7323069
--- /dev/null
+++ b/tests/datamodules/test_so2sat.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import So2SatDataModule
+
+pytest.importorskip("h5py")
+
+
+class TestSo2SatDataModule:
+    @pytest.fixture(scope="class", params=zip([True, False], ["rgb", "s2"]))
+    def datamodule(self, request: SubRequest) -> So2SatDataModule:
+        unsupervised_mode, bands = request.param
+        root = os.path.join("tests", "data", "so2sat")
+        batch_size = 2
+        num_workers = 0
+        dm = So2SatDataModule(root, batch_size, num_workers, bands, unsupervised_mode)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: So2SatDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: So2SatDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: So2SatDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_ucmerced.py b/tests/datamodules/test_ucmerced.py
new file mode 100644
index 000000000..8dd7ab833
--- /dev/null
+++ b/tests/datamodules/test_ucmerced.py
@@ -0,0 +1,29 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+
+from torchgeo.datamodules import UCMercedDataModule
+
+
+class TestUCMercedDataModule:
+    @pytest.fixture(scope="class")
+    def datamodule(self) -> UCMercedDataModule:
+        root = os.path.join("tests", "data", "ucmerced")
+        batch_size = 2
+        num_workers = 0
+        dm = UCMercedDataModule(root, batch_size, num_workers)
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: UCMercedDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: UCMercedDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: UCMercedDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_utils.py b/tests/datamodules/test_utils.py
new file mode 100644
index 000000000..e5bc527f6
--- /dev/null
+++ b/tests/datamodules/test_utils.py
@@ -0,0 +1,25 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import torch
+from torch.utils.data import TensorDataset
+
+from torchgeo.datamodules.utils import dataset_split
+
+
+def test_dataset_split() -> None:
+    num_samples = 24
+    x = torch.ones(num_samples, 5)  # type: ignore[attr-defined]
+    y = torch.randint(low=0, high=2, size=(num_samples,))  # type: ignore[attr-defined]
+    ds = TensorDataset(x, y)
+
+    # Test only train/val set split
+    train_ds, val_ds = dataset_split(ds, val_pct=1 / 2)
+    assert len(train_ds) == num_samples // 2
+    assert len(val_ds) == num_samples // 2
+
+    # Test train/val/test set split
+    train_ds, val_ds, test_ds = dataset_split(ds, val_pct=1 / 3, test_pct=1 / 3)
+    assert len(train_ds) == num_samples // 3
+    assert len(val_ds) == num_samples // 3
+    assert len(test_ds) == num_samples // 3
diff --git a/tests/datamodules/test_vaihingen.py b/tests/datamodules/test_vaihingen.py
new file mode 100644
index 000000000..453a987ec
--- /dev/null
+++ b/tests/datamodules/test_vaihingen.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import Vaihingen2DDataModule
+
+
+class TestVaihingen2DDataModule:
+    @pytest.fixture(scope="class", params=[0.0, 0.5])
+    def datamodule(self, request: SubRequest) -> Vaihingen2DDataModule:
+        root = os.path.join("tests", "data", "vaihingen")
+        batch_size = 1
+        num_workers = 0
+        val_split_size = request.param
+        dm = Vaihingen2DDataModule(
+            root, batch_size, num_workers, val_split_pct=val_split_size
+        )
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: Vaihingen2DDataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: Vaihingen2DDataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: Vaihingen2DDataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datamodules/test_xview2.py b/tests/datamodules/test_xview2.py
new file mode 100644
index 000000000..5e1637533
--- /dev/null
+++ b/tests/datamodules/test_xview2.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datamodules import XView2DataModule
+
+
+class TestXView2DataModule:
+    @pytest.fixture(scope="class", params=[0.0, 0.5])
+    def datamodule(self, request: SubRequest) -> XView2DataModule:
+        root = os.path.join("tests", "data", "xview2")
+        batch_size = 1
+        num_workers = 0
+        val_split_size = request.param
+        dm = XView2DataModule(
+            root, batch_size, num_workers, val_split_pct=val_split_size
+        )
+        dm.prepare_data()
+        dm.setup()
+        return dm
+
+    def test_train_dataloader(self, datamodule: XView2DataModule) -> None:
+        next(iter(datamodule.train_dataloader()))
+
+    def test_val_dataloader(self, datamodule: XView2DataModule) -> None:
+        next(iter(datamodule.val_dataloader()))
+
+    def test_test_dataloader(self, datamodule: XView2DataModule) -> None:
+        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_bigearthnet.py b/tests/datasets/test_bigearthnet.py
index 2561eb9f8..d1f9ca80f 100644
--- a/tests/datasets/test_bigearthnet.py
+++ b/tests/datasets/test_bigearthnet.py
@@ -13,7 +13,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import BigEarthNet, BigEarthNetDataModule
+from torchgeo.datasets import BigEarthNet
 
 
 def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
@@ -148,26 +148,3 @@ class TestBigEarthNet:
         "to automaticaly download the dataset."
         with pytest.raises(RuntimeError, match=err):
             BigEarthNet(str(tmp_path))
-
-
-class TestBigEarthNetDataModule:
-    @pytest.fixture(scope="class", params=["s1", "s2", "all"])
-    def datamodule(self, request: SubRequest) -> BigEarthNetDataModule:
-        bands = request.param
-        root = os.path.join("tests", "data", "bigearthnet")
-        num_classes = 19
-        batch_size = 1
-        num_workers = 0
-        dm = BigEarthNetDataModule(root, bands, num_classes, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: BigEarthNetDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: BigEarthNetDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: BigEarthNetDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_chesapeake.py b/tests/datasets/test_chesapeake.py
index 573719ce9..2f321967b 100644
--- a/tests/datasets/test_chesapeake.py
+++ b/tests/datasets/test_chesapeake.py
@@ -19,7 +19,6 @@ from torchgeo.datasets import (
     BoundingBox,
     Chesapeake13,
     ChesapeakeCVPR,
-    ChesapeakeCVPRDataModule,
     IntersectionDataset,
     UnionDataset,
 )
@@ -179,45 +178,3 @@ class TestChesapeakeCVPR:
             IndexError, match="query: .* spans multiple tiles which is not valid"
         ):
             ds[dataset.bounds]
-
-
-class TestChesapeakeCVPRDataModule:
-    @pytest.fixture(scope="class", params=[5, 7])
-    def datamodule(self, request: SubRequest) -> ChesapeakeCVPRDataModule:
-        dm = ChesapeakeCVPRDataModule(
-            os.path.join("tests", "data", "chesapeake", "cvpr"),
-            ["de-test"],
-            ["de-test"],
-            ["de-test"],
-            patch_size=32,
-            patches_per_tile=2,
-            batch_size=2,
-            num_workers=0,
-            class_set=request.param,
-        )
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: ChesapeakeCVPRDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: ChesapeakeCVPRDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: ChesapeakeCVPRDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
-
-    def test_nodata_check(self, datamodule: ChesapeakeCVPRDataModule) -> None:
-        nodata_check = datamodule.nodata_check(4)
-        sample = {
-            "image": torch.ones(1, 2, 2),  # type: ignore[attr-defined]
-            "mask": torch.ones(2, 2),  # type: ignore[attr-defined]
-        }
-        out = nodata_check(sample)
-        assert torch.equal(  # type: ignore[attr-defined]
-            out["image"], torch.zeros(1, 4, 4)  # type: ignore[attr-defined]
-        )
-        assert torch.equal(  # type: ignore[attr-defined]
-            out["mask"], torch.zeros(4, 4)  # type: ignore[attr-defined]
-        )
diff --git a/tests/datasets/test_cowc.py b/tests/datasets/test_cowc.py
index 6ec7b5330..11cc3744c 100644
--- a/tests/datasets/test_cowc.py
+++ b/tests/datasets/test_cowc.py
@@ -14,7 +14,7 @@ from _pytest.monkeypatch import MonkeyPatch
 from torch.utils.data import ConcatDataset
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import COWCCounting, COWCCountingDataModule, COWCDetection
+from torchgeo.datasets import COWCCounting, COWCDetection
 from torchgeo.datasets.cowc import COWC
 
 
@@ -148,25 +148,3 @@ class TestCOWCDetection:
     def test_not_downloaded(self, tmp_path: Path) -> None:
         with pytest.raises(RuntimeError, match="Dataset not found or corrupted."):
             COWCDetection(str(tmp_path))
-
-
-class TestCOWCCountingDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> COWCCountingDataModule:
-        root = os.path.join("tests", "data", "cowc_counting")
-        seed = 0
-        batch_size = 1
-        num_workers = 0
-        dm = COWCCountingDataModule(root, seed, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: COWCCountingDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: COWCCountingDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: COWCCountingDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_cyclone.py b/tests/datasets/test_cyclone.py
index 6955143a1..c9bb803c8 100644
--- a/tests/datasets/test_cyclone.py
+++ b/tests/datasets/test_cyclone.py
@@ -15,7 +15,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 from torch.utils.data import ConcatDataset
 
-from torchgeo.datasets import CycloneDataModule, TropicalCycloneWindEstimation
+from torchgeo.datasets import TropicalCycloneWindEstimation
 
 
 class Dataset:
@@ -103,25 +103,3 @@ class TestTropicalCycloneWindEstimation:
         )
         dataset.plot(sample)
         plt.close()
-
-
-class TestCycloneDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> CycloneDataModule:
-        root = os.path.join("tests", "data", "cyclone")
-        seed = 0
-        batch_size = 1
-        num_workers = 0
-        dm = CycloneDataModule(root, seed, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: CycloneDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: CycloneDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: CycloneDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_etci2021.py b/tests/datasets/test_etci2021.py
index 0aaee918b..89750dd0d 100644
--- a/tests/datasets/test_etci2021.py
+++ b/tests/datasets/test_etci2021.py
@@ -14,7 +14,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import ETCI2021, ETCI2021DataModule
+from torchgeo.datasets import ETCI2021
 
 
 def download_url(url: str, root: str, *args: str) -> None:
@@ -95,25 +95,3 @@ class TestETCI2021:
         x["prediction"] = x["mask"][0].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestETCI2021DataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> ETCI2021DataModule:
-        root = os.path.join("tests", "data", "etci2021")
-        seed = 0
-        batch_size = 2
-        num_workers = 0
-        dm = ETCI2021DataModule(root, seed, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: ETCI2021DataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: ETCI2021DataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: ETCI2021DataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_eurosat.py b/tests/datasets/test_eurosat.py
index 008195bb7..a8b47ea25 100644
--- a/tests/datasets/test_eurosat.py
+++ b/tests/datasets/test_eurosat.py
@@ -15,7 +15,7 @@ from _pytest.monkeypatch import MonkeyPatch
 from torch.utils.data import ConcatDataset
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import EuroSAT, EuroSATDataModule
+from torchgeo.datasets import EuroSAT
 
 
 def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
@@ -100,24 +100,3 @@ class TestEuroSAT:
         x["prediction"] = x["label"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestEuroSATDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> EuroSATDataModule:
-        root = os.path.join("tests", "data", "eurosat")
-        batch_size = 1
-        num_workers = 0
-        dm = EuroSATDataModule(root, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: EuroSATDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: EuroSATDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: EuroSATDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_fair1m.py b/tests/datasets/test_fair1m.py
index 9b175c649..3f188ebb6 100644
--- a/tests/datasets/test_fair1m.py
+++ b/tests/datasets/test_fair1m.py
@@ -12,7 +12,7 @@ import torch
 import torch.nn as nn
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import FAIR1M, FAIR1MDataModule
+from torchgeo.datasets import FAIR1M
 
 
 class TestFAIR1M:
@@ -73,25 +73,3 @@ class TestFAIR1M:
         x["prediction_boxes"] = x["boxes"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestFAIR1MDataModule:
-    @pytest.fixture(scope="class", params=[True, False])
-    def datamodule(self) -> FAIR1MDataModule:
-        root = os.path.join("tests", "data", "fair1m")
-        batch_size = 2
-        num_workers = 0
-        dm = FAIR1MDataModule(
-            root, batch_size, num_workers, val_split_pct=0.33, test_split_pct=0.33
-        )
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: FAIR1MDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: FAIR1MDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: FAIR1MDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_landcoverai.py b/tests/datasets/test_landcoverai.py
index 8a6942e58..e971f88ae 100644
--- a/tests/datasets/test_landcoverai.py
+++ b/tests/datasets/test_landcoverai.py
@@ -15,7 +15,7 @@ from _pytest.monkeypatch import MonkeyPatch
 from torch.utils.data import ConcatDataset
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import LandCoverAI, LandCoverAIDataModule
+from torchgeo.datasets import LandCoverAI
 
 
 def download_url(url: str, root: str, *args: str) -> None:
@@ -78,24 +78,3 @@ class TestLandCoverAI:
         x["prediction"] = x["mask"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestLandCoverAIDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> LandCoverAIDataModule:
-        root = os.path.join("tests", "data", "landcoverai")
-        batch_size = 2
-        num_workers = 0
-        dm = LandCoverAIDataModule(root, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: LandCoverAIDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: LandCoverAIDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: LandCoverAIDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_loveda.py b/tests/datasets/test_loveda.py
index 0bfca7bc6..e445ae9d3 100644
--- a/tests/datasets/test_loveda.py
+++ b/tests/datasets/test_loveda.py
@@ -14,7 +14,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import LoveDA, LoveDADataModule
+from torchgeo.datasets import LoveDA
 
 
 def download_url(url: str, root: str, *args: str) -> None:
@@ -99,29 +99,3 @@ class TestLoveDA:
     def test_plot(self, dataset: LoveDA) -> None:
         dataset.plot(dataset[0], suptitle="Test")
         plt.close()
-
-
-class TestLoveDADataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> LoveDADataModule:
-        root = os.path.join("tests", "data", "loveda")
-        batch_size = 2
-        num_workers = 0
-        scene = ["rural", "urban"]
-
-        dm = LoveDADataModule(
-            root_dir=root, scene=scene, batch_size=batch_size, num_workers=num_workers
-        )
-
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: LoveDADataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: LoveDADataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: LoveDADataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_naip.py b/tests/datasets/test_naip.py
index 4ef17e7cf..2089d09ac 100644
--- a/tests/datasets/test_naip.py
+++ b/tests/datasets/test_naip.py
@@ -12,13 +12,7 @@ import torch.nn as nn
 from _pytest.monkeypatch import MonkeyPatch
 from rasterio.crs import CRS
 
-from torchgeo.datasets import (
-    NAIP,
-    BoundingBox,
-    IntersectionDataset,
-    NAIPChesapeakeDataModule,
-    UnionDataset,
-)
+from torchgeo.datasets import NAIP, BoundingBox, IntersectionDataset, UnionDataset
 
 
 class TestNAIP:
@@ -60,27 +54,3 @@ class TestNAIP:
             IndexError, match="query: .* not found in index with bounds:"
         ):
             dataset[query]
-
-
-class TestNAIPChesapeakeDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> NAIPChesapeakeDataModule:
-        dm = NAIPChesapeakeDataModule(
-            os.path.join("tests", "data", "naip"),
-            os.path.join("tests", "data", "chesapeake", "BAYWIDE"),
-            batch_size=2,
-            num_workers=0,
-        )
-        dm.patch_size = 32
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: NAIPChesapeakeDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: NAIPChesapeakeDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: NAIPChesapeakeDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_nasa_marine_debris.py b/tests/datasets/test_nasa_marine_debris.py
index a8b20c2cd..deb8366dd 100644
--- a/tests/datasets/test_nasa_marine_debris.py
+++ b/tests/datasets/test_nasa_marine_debris.py
@@ -13,7 +13,7 @@ import torch
 import torch.nn as nn
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import NASAMarineDebris, NASAMarineDebrisDataModule
+from torchgeo.datasets import NASAMarineDebris
 
 
 class Dataset:
@@ -85,28 +85,3 @@ class TestNASAMarineDebris:
         x["prediction_boxes"] = x["boxes"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestNASAMarineDebrisDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> NASAMarineDebrisDataModule:
-        root = os.path.join("tests", "data", "nasa_marine_debris")
-        batch_size = 2
-        num_workers = 0
-        val_split_pct = 0.3
-        test_split_pct = 0.3
-        dm = NASAMarineDebrisDataModule(
-            root, batch_size, num_workers, val_split_pct, test_split_pct
-        )
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: NASAMarineDebrisDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: NASAMarineDebrisDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: NASAMarineDebrisDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_oscd.py b/tests/datasets/test_oscd.py
index 2bfaf25d5..be61d08b8 100644
--- a/tests/datasets/test_oscd.py
+++ b/tests/datasets/test_oscd.py
@@ -16,7 +16,7 @@ from matplotlib import pyplot as plt
 from torch.utils.data import ConcatDataset
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import OSCD, OSCDDataModule
+from torchgeo.datasets import OSCD
 
 
 def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
@@ -105,56 +105,3 @@ class TestOSCD:
     def test_plot(self, dataset: OSCD) -> None:
         dataset.plot(dataset[0], suptitle="Test")
         plt.close()
-
-
-class TestOSCDDataModule:
-    @pytest.fixture(scope="class", params=zip(["all", "rgb"], [0.0, 0.5]))
-    def datamodule(self, request: SubRequest) -> OSCDDataModule:
-        bands, val_split_pct = request.param
-        patch_size = (2, 2)
-        num_patches_per_tile = 2
-        root = os.path.join("tests", "data", "oscd")
-        batch_size = 1
-        num_workers = 0
-        dm = OSCDDataModule(
-            root,
-            bands,
-            batch_size,
-            num_workers,
-            val_split_pct,
-            patch_size,
-            num_patches_per_tile,
-        )
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: OSCDDataModule) -> None:
-        sample = next(iter(datamodule.train_dataloader()))
-        assert sample["image"].shape[-2:] == sample["mask"].shape[-2:] == (2, 2)
-        assert sample["image"].shape[0] == sample["mask"].shape[0] == 2
-        if datamodule.bands == "all":
-            assert sample["image"].shape[1] == 26
-        else:
-            assert sample["image"].shape[1] == 6
-
-    def test_val_dataloader(self, datamodule: OSCDDataModule) -> None:
-        sample = next(iter(datamodule.val_dataloader()))
-        if datamodule.val_split_pct > 0.0:
-            assert (
-                sample["image"].shape[-2:] == sample["mask"].shape[-2:] == (1280, 1280)
-            )
-            assert sample["image"].shape[0] == sample["mask"].shape[0] == 1
-            if datamodule.bands == "all":
-                assert sample["image"].shape[1] == 26
-            else:
-                assert sample["image"].shape[1] == 6
-
-    def test_test_dataloader(self, datamodule: OSCDDataModule) -> None:
-        sample = next(iter(datamodule.test_dataloader()))
-        assert sample["image"].shape[-2:] == sample["mask"].shape[-2:] == (1280, 1280)
-        assert sample["image"].shape[0] == sample["mask"].shape[0] == 1
-        if datamodule.bands == "all":
-            assert sample["image"].shape[1] == 26
-        else:
-            assert sample["image"].shape[1] == 6
diff --git a/tests/datasets/test_potsdam.py b/tests/datasets/test_potsdam.py
index b11d0dc13..6a298baf3 100644
--- a/tests/datasets/test_potsdam.py
+++ b/tests/datasets/test_potsdam.py
@@ -13,7 +13,7 @@ import torch.nn as nn
 from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import Potsdam2D, Potsdam2DDataModule
+from torchgeo.datasets import Potsdam2D
 
 
 class TestPotsdam2D:
@@ -75,27 +75,3 @@ class TestPotsdam2D:
         x["prediction"] = x["mask"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestPotsdam2DDataModule:
-    @pytest.fixture(scope="class", params=[0.0, 0.5])
-    def datamodule(self, request: SubRequest) -> Potsdam2DDataModule:
-        root = os.path.join("tests", "data", "potsdam")
-        batch_size = 1
-        num_workers = 0
-        val_split_size = request.param
-        dm = Potsdam2DDataModule(
-            root, batch_size, num_workers, val_split_pct=val_split_size
-        )
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: Potsdam2DDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: Potsdam2DDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: Potsdam2DDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_resisc45.py b/tests/datasets/test_resisc45.py
index 75ed6ee2d..c8f4d9157 100644
--- a/tests/datasets/test_resisc45.py
+++ b/tests/datasets/test_resisc45.py
@@ -15,7 +15,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import RESISC45, RESISC45DataModule
+from torchgeo.datasets import RESISC45
 
 
 def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
@@ -101,24 +101,3 @@ class TestRESISC45:
         x["prediction"] = x["label"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestRESISC45DataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> RESISC45DataModule:
-        root = os.path.join("tests", "data", "resisc45")
-        batch_size = 2
-        num_workers = 0
-        dm = RESISC45DataModule(root, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: RESISC45DataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: RESISC45DataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: RESISC45DataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_sen12ms.py b/tests/datasets/test_sen12ms.py
index 2332e70f3..94c796434 100644
--- a/tests/datasets/test_sen12ms.py
+++ b/tests/datasets/test_sen12ms.py
@@ -12,7 +12,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 from torch.utils.data import ConcatDataset
 
-from torchgeo.datasets import SEN12MS, SEN12MSDataModule
+from torchgeo.datasets import SEN12MS
 
 
 class TestSEN12MS:
@@ -82,26 +82,3 @@ class TestSEN12MS:
             ds = SEN12MS(root, bands=bands, checksum=False)
             x = ds[0]["image"]
             assert x.shape[0] == len(bands)
-
-
-class TestSEN12MSDataModule:
-    @pytest.fixture(scope="class", params=["all", "s1", "s2-all", "s2-reduced"])
-    def datamodule(self, request: SubRequest) -> SEN12MSDataModule:
-        root = os.path.join("tests", "data", "sen12ms")
-        seed = 0
-        bands = request.param
-        batch_size = 1
-        num_workers = 0
-        dm = SEN12MSDataModule(root, seed, bands, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: SEN12MSDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: SEN12MSDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: SEN12MSDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_so2sat.py b/tests/datasets/test_so2sat.py
index ab4085ba5..7df9fe5dd 100644
--- a/tests/datasets/test_so2sat.py
+++ b/tests/datasets/test_so2sat.py
@@ -13,7 +13,7 @@ import torch.nn as nn
 from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import So2Sat, So2SatDataModule
+from torchgeo.datasets import So2Sat
 
 pytest.importorskip("h5py")
 
@@ -91,25 +91,3 @@ class TestSo2Sat:
             match="h5py is not installed and is required to use this dataset",
         ):
             So2Sat(dataset.root)
-
-
-class TestSo2SatDataModule:
-    @pytest.fixture(scope="class", params=zip([True, False], ["rgb", "s2"]))
-    def datamodule(self, request: SubRequest) -> So2SatDataModule:
-        unsupervised_mode, bands = request.param
-        root = os.path.join("tests", "data", "so2sat")
-        batch_size = 2
-        num_workers = 0
-        dm = So2SatDataModule(root, batch_size, num_workers, bands, unsupervised_mode)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: So2SatDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: So2SatDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: So2SatDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_ucmerced.py b/tests/datasets/test_ucmerced.py
index ad6efb662..600c2595d 100644
--- a/tests/datasets/test_ucmerced.py
+++ b/tests/datasets/test_ucmerced.py
@@ -15,7 +15,7 @@ from _pytest.monkeypatch import MonkeyPatch
 from torch.utils.data import ConcatDataset
 
 import torchgeo.datasets.utils
-from torchgeo.datasets import UCMerced, UCMercedDataModule
+from torchgeo.datasets import UCMerced
 
 
 def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
@@ -102,24 +102,3 @@ class TestUCMerced:
         x["prediction"] = x["label"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestUCMercedDataModule:
-    @pytest.fixture(scope="class")
-    def datamodule(self) -> UCMercedDataModule:
-        root = os.path.join("tests", "data", "ucmerced")
-        batch_size = 2
-        num_workers = 0
-        dm = UCMercedDataModule(root, batch_size, num_workers)
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: UCMercedDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: UCMercedDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: UCMercedDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_utils.py b/tests/datasets/test_utils.py
index 9e8732dea..631897bb7 100644
--- a/tests/datasets/test_utils.py
+++ b/tests/datasets/test_utils.py
@@ -18,13 +18,11 @@ import pytest
 import torch
 from _pytest.monkeypatch import MonkeyPatch
 from rasterio.crs import CRS
-from torch.utils.data import TensorDataset
 
 import torchgeo.datasets.utils
 from torchgeo.datasets.utils import (
     BoundingBox,
     concat_samples,
-    dataset_split,
     disambiguate_timestamp,
     download_and_extract_archive,
     download_radiant_mlhub_collection,
@@ -563,24 +561,6 @@ def test_nonexisting_directory(tmp_path: Path) -> None:
         assert subdir.cwd() == subdir
 
 
-def test_dataset_split() -> None:
-    num_samples = 24
-    x = torch.ones(num_samples, 5)  # type: ignore[attr-defined]
-    y = torch.randint(low=0, high=2, size=(num_samples,))  # type: ignore[attr-defined]
-    ds = TensorDataset(x, y)
-
-    # Test only train/val set split
-    train_ds, val_ds = dataset_split(ds, val_pct=1 / 2)
-    assert len(train_ds) == num_samples // 2
-    assert len(val_ds) == num_samples // 2
-
-    # Test train/val/test set split
-    train_ds, val_ds, test_ds = dataset_split(ds, val_pct=1 / 3, test_pct=1 / 3)
-    assert len(train_ds) == num_samples // 3
-    assert len(val_ds) == num_samples // 3
-    assert len(test_ds) == num_samples // 3
-
-
 def test_percentile_normalization() -> None:
     img = np.array([[1, 2], [98, 100]])
 
diff --git a/tests/datasets/test_vaihingen.py b/tests/datasets/test_vaihingen.py
index 531dd24e5..033017ea0 100644
--- a/tests/datasets/test_vaihingen.py
+++ b/tests/datasets/test_vaihingen.py
@@ -13,7 +13,7 @@ import torch.nn as nn
 from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import Vaihingen2D, Vaihingen2DDataModule
+from torchgeo.datasets import Vaihingen2D
 
 
 class TestVaihingen2D:
@@ -84,27 +84,3 @@ class TestVaihingen2D:
         x["prediction"] = x["mask"].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestVaihingen2DDataModule:
-    @pytest.fixture(scope="class", params=[0.0, 0.5])
-    def datamodule(self, request: SubRequest) -> Vaihingen2DDataModule:
-        root = os.path.join("tests", "data", "vaihingen")
-        batch_size = 1
-        num_workers = 0
-        val_split_size = request.param
-        dm = Vaihingen2DDataModule(
-            root, batch_size, num_workers, val_split_pct=val_split_size
-        )
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: Vaihingen2DDataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: Vaihingen2DDataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: Vaihingen2DDataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/datasets/test_xview2.py b/tests/datasets/test_xview2.py
index b35833761..92e00f4c7 100644
--- a/tests/datasets/test_xview2.py
+++ b/tests/datasets/test_xview2.py
@@ -13,7 +13,7 @@ import torch.nn as nn
 from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import XView2, XView2DataModule
+from torchgeo.datasets import XView2
 
 
 class TestXView2:
@@ -95,27 +95,3 @@ class TestXView2:
         x["prediction"] = x["mask"][0].clone()
         dataset.plot(x)
         plt.close()
-
-
-class TestXView2DataModule:
-    @pytest.fixture(scope="class", params=[0.0, 0.5])
-    def datamodule(self, request: SubRequest) -> XView2DataModule:
-        root = os.path.join("tests", "data", "xview2")
-        batch_size = 1
-        num_workers = 0
-        val_split_size = request.param
-        dm = XView2DataModule(
-            root, batch_size, num_workers, val_split_pct=val_split_size
-        )
-        dm.prepare_data()
-        dm.setup()
-        return dm
-
-    def test_train_dataloader(self, datamodule: XView2DataModule) -> None:
-        next(iter(datamodule.train_dataloader()))
-
-    def test_val_dataloader(self, datamodule: XView2DataModule) -> None:
-        next(iter(datamodule.val_dataloader()))
-
-    def test_test_dataloader(self, datamodule: XView2DataModule) -> None:
-        next(iter(datamodule.test_dataloader()))
diff --git a/tests/trainers/test_byol.py b/tests/trainers/test_byol.py
index ac5e9e2b7..304d4a5bf 100644
--- a/tests/trainers/test_byol.py
+++ b/tests/trainers/test_byol.py
@@ -12,7 +12,7 @@ from omegaconf import OmegaConf
 from pytorch_lightning.core.lightning import LightningModule
 from torchvision.models import resnet18
 
-from torchgeo.datasets import ChesapeakeCVPRDataModule
+from torchgeo.datamodules import ChesapeakeCVPRDataModule
 from torchgeo.trainers import BYOLTask
 from torchgeo.trainers.byol import BYOL, SimCLRAugmentation
 
diff --git a/tests/trainers/test_chesapeake.py b/tests/trainers/test_chesapeake.py
index a9c95907d..920936802 100644
--- a/tests/trainers/test_chesapeake.py
+++ b/tests/trainers/test_chesapeake.py
@@ -9,7 +9,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 from omegaconf import OmegaConf
 
-from torchgeo.datasets import ChesapeakeCVPRDataModule
+from torchgeo.datamodules import ChesapeakeCVPRDataModule
 from torchgeo.trainers.chesapeake import ChesapeakeCVPRSegmentationTask
 
 from .test_utils import FakeTrainer, mocked_log
diff --git a/tests/trainers/test_landcoverai.py b/tests/trainers/test_landcoverai.py
index b14d5b466..d3e70dfb0 100644
--- a/tests/trainers/test_landcoverai.py
+++ b/tests/trainers/test_landcoverai.py
@@ -8,7 +8,7 @@ import pytest
 from _pytest.monkeypatch import MonkeyPatch
 from omegaconf import OmegaConf
 
-from torchgeo.datasets import LandCoverAIDataModule
+from torchgeo.datamodules import LandCoverAIDataModule
 from torchgeo.trainers.landcoverai import LandCoverAISegmentationTask
 
 from .test_utils import FakeTrainer, mocked_log
diff --git a/tests/trainers/test_naipchesapeake.py b/tests/trainers/test_naipchesapeake.py
index 3b8cce5ac..37d94cb0e 100644
--- a/tests/trainers/test_naipchesapeake.py
+++ b/tests/trainers/test_naipchesapeake.py
@@ -8,7 +8,7 @@ import pytest
 from _pytest.monkeypatch import MonkeyPatch
 from omegaconf import OmegaConf
 
-from torchgeo.datasets import NAIPChesapeakeDataModule
+from torchgeo.datamodules import NAIPChesapeakeDataModule
 from torchgeo.trainers.naipchesapeake import NAIPChesapeakeSegmentationTask
 
 from .test_utils import FakeTrainer, mocked_log
diff --git a/tests/trainers/test_regression.py b/tests/trainers/test_regression.py
index cfa7e1692..ed3af3a3b 100644
--- a/tests/trainers/test_regression.py
+++ b/tests/trainers/test_regression.py
@@ -8,7 +8,7 @@ import pytest
 from _pytest.monkeypatch import MonkeyPatch
 from omegaconf import OmegaConf
 
-from torchgeo.datasets import CycloneDataModule
+from torchgeo.datamodules import CycloneDataModule
 from torchgeo.trainers import RegressionTask
 
 from .test_utils import mocked_log
diff --git a/tests/trainers/test_resisc45.py b/tests/trainers/test_resisc45.py
index 0b832295f..1eec36e2f 100644
--- a/tests/trainers/test_resisc45.py
+++ b/tests/trainers/test_resisc45.py
@@ -7,7 +7,7 @@ from typing import Any, Dict, Generator
 import pytest
 from _pytest.monkeypatch import MonkeyPatch
 
-from torchgeo.datasets import RESISC45DataModule
+from torchgeo.datamodules import RESISC45DataModule
 from torchgeo.trainers.resisc45 import RESISC45ClassificationTask
 
 from .test_utils import FakeTrainer, mocked_log
diff --git a/tests/trainers/test_segmentation.py b/tests/trainers/test_segmentation.py
index 058f94170..658e4870f 100644
--- a/tests/trainers/test_segmentation.py
+++ b/tests/trainers/test_segmentation.py
@@ -9,7 +9,7 @@ from _pytest.fixtures import SubRequest
 from _pytest.monkeypatch import MonkeyPatch
 from omegaconf import OmegaConf
 
-from torchgeo.datasets import ChesapeakeCVPRDataModule
+from torchgeo.datamodules import ChesapeakeCVPRDataModule
 from torchgeo.trainers import SemanticSegmentationTask
 
 from .test_utils import FakeTrainer, mocked_log
diff --git a/torchgeo/__init__.py b/torchgeo/__init__.py
index cc1b2abb9..a447a5f87 100644
--- a/torchgeo/__init__.py
+++ b/torchgeo/__init__.py
@@ -13,7 +13,7 @@ from typing import Dict, Tuple, Type
 
 import pytorch_lightning as pl
 
-from .datasets import (
+from .datamodules import (
     BigEarthNetDataModule,
     ChesapeakeCVPRDataModule,
     COWCCountingDataModule,
diff --git a/torchgeo/datamodules/__init__.py b/torchgeo/datamodules/__init__.py
new file mode 100644
index 000000000..e09fe0ab3
--- /dev/null
+++ b/torchgeo/datamodules/__init__.py
@@ -0,0 +1,52 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""TorchGeo datamodules."""
+
+from .bigearthnet import BigEarthNetDataModule
+from .chesapeake import ChesapeakeCVPRDataModule
+from .cowc import COWCCountingDataModule
+from .cyclone import CycloneDataModule
+from .etci2021 import ETCI2021DataModule
+from .eurosat import EuroSATDataModule
+from .fair1m import FAIR1MDataModule
+from .landcoverai import LandCoverAIDataModule
+from .loveda import LoveDADataModule
+from .naip import NAIPChesapeakeDataModule
+from .nasa_marine_debris import NASAMarineDebrisDataModule
+from .oscd import OSCDDataModule
+from .potsdam import Potsdam2DDataModule
+from .resisc45 import RESISC45DataModule
+from .sen12ms import SEN12MSDataModule
+from .so2sat import So2SatDataModule
+from .ucmerced import UCMercedDataModule
+from .vaihingen import Vaihingen2DDataModule
+from .xview import XView2DataModule
+
+__all__ = (
+    # GeoDataset
+    "ChesapeakeCVPRDataModule",
+    "NAIPChesapeakeDataModule",
+    # VisionDataset
+    "BigEarthNetDataModule",
+    "COWCCountingDataModule",
+    "ETCI2021DataModule",
+    "EuroSATDataModule",
+    "FAIR1MDataModule",
+    "LandCoverAIDataModule",
+    "LoveDADataModule",
+    "NASAMarineDebrisDataModule",
+    "OSCDDataModule",
+    "Potsdam2DDataModule",
+    "RESISC45DataModule",
+    "SEN12MSDataModule",
+    "So2SatDataModule",
+    "CycloneDataModule",
+    "UCMercedDataModule",
+    "Vaihingen2DDataModule",
+    "XView2DataModule",
+)
+
+# https://stackoverflow.com/questions/40018681
+for module in __all__:
+    globals()[module].__module__ = "torchgeo.datamodules"
diff --git a/torchgeo/datamodules/bigearthnet.py b/torchgeo/datamodules/bigearthnet.py
new file mode 100644
index 000000000..11c2e4ed9
--- /dev/null
+++ b/torchgeo/datamodules/bigearthnet.py
@@ -0,0 +1,178 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""BigEarthNet datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose
+
+from ..datasets import BigEarthNet
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class BigEarthNetDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the BigEarthNet dataset.
+
+    Uses the train/val/test splits from the dataset.
+    """
+
+    # (VV, VH, B01, B02, B03, B04, B05, B06, B07, B08, B8A, B09, B11, B12)
+    # min/max band statistics computed on 100k random samples
+    band_mins_raw = torch.tensor(  # type: ignore[attr-defined]
+        [-70.0, -72.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+    )
+    band_maxs_raw = torch.tensor(  # type: ignore[attr-defined]
+        [
+            31.0,
+            35.0,
+            18556.0,
+            20528.0,
+            18976.0,
+            17874.0,
+            16611.0,
+            16512.0,
+            16394.0,
+            16672.0,
+            16141.0,
+            16097.0,
+            15336.0,
+            15203.0,
+        ]
+    )
+
+    # min/max band statistics computed by percentile clipping the
+    # above to samples to [2, 98]
+    band_mins = torch.tensor(  # type: ignore[attr-defined]
+        [-48.0, -42.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+    )
+    band_maxs = torch.tensor(  # type: ignore[attr-defined]
+        [
+            6.0,
+            16.0,
+            9859.0,
+            12872.0,
+            13163.0,
+            14445.0,
+            12477.0,
+            12563.0,
+            12289.0,
+            15596.0,
+            12183.0,
+            9458.0,
+            5897.0,
+            5544.0,
+        ]
+    )
+
+    def __init__(
+        self,
+        root_dir: str,
+        bands: str = "all",
+        num_classes: int = 19,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for BigEarthNet based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the BigEarthNet Dataset classes
+            bands: load Sentinel-1 bands, Sentinel-2, or both. one of {s1, s2, all}
+            num_classes: number of classes to load in target. one of {19, 43}
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.bands = bands
+        self.num_classes = num_classes
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+        if bands == "all":
+            self.mins = self.band_mins[:, None, None]
+            self.maxs = self.band_maxs[:, None, None]
+        elif bands == "s1":
+            self.mins = self.band_mins[:2, None, None]
+            self.maxs = self.band_maxs[:2, None, None]
+        else:
+            self.mins = self.band_mins[2:, None, None]
+            self.maxs = self.band_maxs[2:, None, None]
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset."""
+        sample["image"] = sample["image"].float()
+        sample["image"] = (sample["image"] - self.mins) / (self.maxs - self.mins)
+        sample["image"] = torch.clip(  # type: ignore[attr-defined]
+            sample["image"], min=0.0, max=1.0
+        )
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        BigEarthNet(self.root_dir, split="train", bands=self.bands, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+        """
+        transforms = Compose([self.preprocess])
+        self.train_dataset = BigEarthNet(
+            self.root_dir,
+            split="train",
+            bands=self.bands,
+            num_classes=self.num_classes,
+            transforms=transforms,
+        )
+        self.val_dataset = BigEarthNet(
+            self.root_dir,
+            split="val",
+            bands=self.bands,
+            num_classes=self.num_classes,
+            transforms=transforms,
+        )
+        self.test_dataset = BigEarthNet(
+            self.root_dir,
+            split="test",
+            bands=self.bands,
+            num_classes=self.num_classes,
+            transforms=transforms,
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training."""
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation."""
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing."""
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/chesapeake.py b/torchgeo/datamodules/chesapeake.py
new file mode 100644
index 000000000..225f4c311
--- /dev/null
+++ b/torchgeo/datamodules/chesapeake.py
@@ -0,0 +1,312 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Chesapeake Bay High-Resolution Land Cover Project datamodule."""
+
+from typing import Any, Callable, Dict, List, Optional
+
+import torch
+import torch.nn.functional as F
+from pytorch_lightning.core.datamodule import LightningDataModule
+from torch import Tensor
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose
+
+from ..datasets import ChesapeakeCVPR, stack_samples
+from ..samplers.batch import RandomBatchGeoSampler
+from ..samplers.single import GridGeoSampler
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class ChesapeakeCVPRDataModule(LightningDataModule):
+    """LightningDataModule implementation for the Chesapeake CVPR Land Cover dataset.
+
+    Uses the random splits defined per state to partition tiles into train, val,
+    and test sets.
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        train_splits: List[str],
+        val_splits: List[str],
+        test_splits: List[str],
+        patches_per_tile: int = 200,
+        patch_size: int = 256,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        class_set: int = 7,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for Chesapeake CVPR based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the ChesapeakeCVPR Dataset
+                classes
+            train_splits: The splits used to train the model, e.g. ["ny-train"]
+            val_splits: The splits used to validate the model, e.g. ["ny-val"]
+            test_splits: The splits used to test the model, e.g. ["ny-test"]
+            patches_per_tile: The number of patches per tile to sample
+            patch_size: The size of each patch in pixels (test patches will be 1.5 times
+                this size)
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            class_set: The high-resolution land cover class set to use - 5 or 7
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        for state in train_splits + val_splits + test_splits:
+            assert state in ChesapeakeCVPR.splits
+        assert class_set in [5, 7]
+
+        self.root_dir = root_dir
+        self.train_splits = train_splits
+        self.val_splits = val_splits
+        self.test_splits = test_splits
+        self.layers = ["naip-new", "lc"]
+        self.patches_per_tile = patches_per_tile
+        self.patch_size = patch_size
+        # This is a rough estimate of how large of a patch we will need to sample in
+        # EPSG:3857 in order to guarantee a large enough patch in the local CRS.
+        self.original_patch_size = int(patch_size * 2.0)
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.class_set = class_set
+
+    def pad_to(
+        self, size: int = 512, image_value: int = 0, mask_value: int = 0
+    ) -> Callable[[Dict[str, Tensor]], Dict[str, Tensor]]:
+        """Returns a function to perform a padding transform on a single sample.
+
+        Args:
+            size: output image size
+            image_value: value to pad image with
+            mask_value: value to pad mask with
+
+        Returns:
+            function to perform padding
+        """
+
+        def pad_inner(sample: Dict[str, Tensor]) -> Dict[str, Tensor]:
+            _, height, width = sample["image"].shape
+            assert height <= size and width <= size
+
+            height_pad = size - height
+            width_pad = size - width
+
+            # See https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+            # for a description of the format of the padding tuple
+            sample["image"] = F.pad(
+                sample["image"],
+                (0, width_pad, 0, height_pad),
+                mode="constant",
+                value=image_value,
+            )
+            sample["mask"] = F.pad(
+                sample["mask"],
+                (0, width_pad, 0, height_pad),
+                mode="constant",
+                value=mask_value,
+            )
+            return sample
+
+        return pad_inner
+
+    def center_crop(
+        self, size: int = 512
+    ) -> Callable[[Dict[str, Tensor]], Dict[str, Tensor]]:
+        """Returns a function to perform a center crop transform on a single sample.
+
+        Args:
+            size: output image size
+
+        Returns:
+            function to perform center crop
+        """
+
+        def center_crop_inner(sample: Dict[str, Tensor]) -> Dict[str, Tensor]:
+            _, height, width = sample["image"].shape
+
+            y1 = (height - size) // 2
+            x1 = (width - size) // 2
+            sample["image"] = sample["image"][:, y1 : y1 + size, x1 : x1 + size]
+            sample["mask"] = sample["mask"][:, y1 : y1 + size, x1 : x1 + size]
+
+            return sample
+
+        return center_crop_inner
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Preprocesses a single sample.
+
+        Args:
+            sample: sample dictionary containing image and mask
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"] / 255.0
+        sample["mask"] = sample["mask"]
+        sample["mask"] = sample["mask"].squeeze()
+
+        if self.class_set == 5:
+            sample["mask"][sample["mask"] == 5] = 4
+            sample["mask"][sample["mask"] == 6] = 4
+
+        sample["image"] = sample["image"].float()
+        sample["mask"] = sample["mask"].long()
+
+        return sample
+
+    def nodata_check(
+        self, size: int = 512
+    ) -> Callable[[Dict[str, Tensor]], Dict[str, Tensor]]:
+        """Returns a function to check for nodata or mis-sized input.
+
+        Args:
+            size: output image size
+
+        Returns:
+            function to check for nodata values
+        """
+
+        def nodata_check_inner(sample: Dict[str, Tensor]) -> Dict[str, Tensor]:
+            num_channels, height, width = sample["image"].shape
+
+            if height < size or width < size:
+                sample["image"] = torch.zeros(  # type: ignore[attr-defined]
+                    (num_channels, size, size)
+                )
+                sample["mask"] = torch.zeros((size, size))  # type: ignore[attr-defined]
+
+            return sample
+
+        return nodata_check_inner
+
+    def prepare_data(self) -> None:
+        """Confirms that the dataset is downloaded on the local node.
+
+        This method is called once per node, while :func:`setup` is called once per GPU.
+        """
+        ChesapeakeCVPR(
+            self.root_dir,
+            splits=self.train_splits,
+            layers=self.layers,
+            transforms=None,
+            download=False,
+            checksum=False,
+        )
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Create the train/val/test splits based on the original Dataset objects.
+
+        The splits should be done here vs. in :func:`__init__` per the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
+
+        Args:
+            stage: stage to set up
+        """
+        train_transforms = Compose(
+            [
+                self.center_crop(self.patch_size),
+                self.nodata_check(self.patch_size),
+                self.preprocess,
+            ]
+        )
+        val_transforms = Compose(
+            [
+                self.center_crop(self.patch_size),
+                self.nodata_check(self.patch_size),
+                self.preprocess,
+            ]
+        )
+        test_transforms = Compose(
+            [
+                self.pad_to(self.original_patch_size, image_value=0, mask_value=0),
+                self.preprocess,
+            ]
+        )
+
+        self.train_dataset = ChesapeakeCVPR(
+            self.root_dir,
+            splits=self.train_splits,
+            layers=self.layers,
+            transforms=train_transforms,
+            download=False,
+            checksum=False,
+        )
+        self.val_dataset = ChesapeakeCVPR(
+            self.root_dir,
+            splits=self.val_splits,
+            layers=self.layers,
+            transforms=val_transforms,
+            download=False,
+            checksum=False,
+        )
+        self.test_dataset = ChesapeakeCVPR(
+            self.root_dir,
+            splits=self.test_splits,
+            layers=self.layers,
+            transforms=test_transforms,
+            download=False,
+            checksum=False,
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        sampler = RandomBatchGeoSampler(
+            self.train_dataset,
+            size=self.original_patch_size,
+            batch_size=self.batch_size,
+            length=self.patches_per_tile * len(self.train_dataset),
+        )
+        return DataLoader(
+            self.train_dataset,
+            batch_sampler=sampler,
+            num_workers=self.num_workers,
+            collate_fn=stack_samples,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        sampler = GridGeoSampler(
+            self.val_dataset,
+            size=self.original_patch_size,
+            stride=self.original_patch_size,
+        )
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            sampler=sampler,
+            num_workers=self.num_workers,
+            collate_fn=stack_samples,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        sampler = GridGeoSampler(
+            self.test_dataset,
+            size=self.original_patch_size,
+            stride=self.original_patch_size,
+        )
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            sampler=sampler,
+            num_workers=self.num_workers,
+            collate_fn=stack_samples,
+        )
diff --git a/torchgeo/datamodules/cowc.py b/torchgeo/datamodules/cowc.py
new file mode 100644
index 000000000..4d6e4a7cd
--- /dev/null
+++ b/torchgeo/datamodules/cowc.py
@@ -0,0 +1,123 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""COWC datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from torch import Generator  # type: ignore[attr-defined]
+from torch.utils.data import DataLoader, random_split
+
+from ..datasets import COWCCounting
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class COWCCountingDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the COWC Counting dataset."""
+
+    def __init__(
+        self,
+        root_dir: str,
+        seed: int,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for COWC Counting based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the COWCCounting Dataset class
+            seed: The seed value to use when doing the dataset random_split
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.seed = seed
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+    def custom_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image and target
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"] / 255.0  # scale to [0, 1]
+        sample["label"] = sample["label"].float()
+        return sample
+
+    def prepare_data(self) -> None:
+        """Initialize the main ``Dataset`` objects for use in :func:`setup`.
+
+        This includes optionally downloading the dataset. This is done once per node,
+        while :func:`setup` is done once per GPU.
+        """
+        COWCCounting(self.root_dir, download=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Create the train/val/test splits based on the original Dataset objects.
+
+        The splits should be done here vs. in :func:`__init__` per the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
+
+        Args:
+            stage: stage to set up
+        """
+        train_val_dataset = COWCCounting(
+            self.root_dir, split="train", transforms=self.custom_transform
+        )
+        self.test_dataset = COWCCounting(
+            self.root_dir, split="test", transforms=self.custom_transform
+        )
+        self.train_dataset, self.val_dataset = random_split(
+            train_val_dataset,
+            [len(train_val_dataset) - len(self.test_dataset), len(self.test_dataset)],
+            generator=Generator().manual_seed(self.seed),
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/cyclone.py b/torchgeo/datamodules/cyclone.py
new file mode 100644
index 000000000..929628e7c
--- /dev/null
+++ b/torchgeo/datamodules/cyclone.py
@@ -0,0 +1,171 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Tropical Cyclone Wind Estimation Competition datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+from sklearn.model_selection import GroupShuffleSplit
+from torch.utils.data import DataLoader, Subset
+
+from ..datasets import TropicalCycloneWindEstimation
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class CycloneDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the NASA Cyclone dataset.
+
+    Implements 80/20 train/val splits based on hurricane storm ids.
+    See :func:`setup` for more details.
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        seed: int,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        api_key: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for NASA Cyclone based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the
+                TropicalCycloneWindEstimation Datasets classes
+            seed: The seed value to use when doing the sklearn based GroupShuffleSplit
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            api_key: The RadiantEarth MLHub API key to use if the dataset needs to be
+                downloaded
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.seed = seed
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.api_key = api_key
+
+    def custom_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image and target
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"] / 255.0  # scale to [0,1]
+        sample["image"] = (
+            sample["image"].unsqueeze(0).repeat(3, 1, 1)
+        )  # convert to 3 channel
+        sample["label"] = torch.as_tensor(  # type: ignore[attr-defined]
+            sample["label"]
+        ).float()
+
+        return sample
+
+    def prepare_data(self) -> None:
+        """Initialize the main ``Dataset`` objects for use in :func:`setup`.
+
+        This includes optionally downloading the dataset. This is done once per node,
+        while :func:`setup` is done once per GPU.
+        """
+        TropicalCycloneWindEstimation(
+            self.root_dir,
+            split="train",
+            transforms=self.custom_transform,
+            download=self.api_key is not None,
+            api_key=self.api_key,
+        )
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Create the train/val/test splits based on the original Dataset objects.
+
+        The splits should be done here vs. in :func:`__init__` per the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
+
+        We split samples between train/val by the ``storm_id`` property. I.e. all
+        samples with the same ``storm_id`` value will be either in the train or the val
+        split. This is important to test one type of generalizability -- given a new
+        storm, can we predict its windspeed. The test set, however, contains *some*
+        storms from the training set (specifically, the latter parts of the storms) as
+        well as some novel storms.
+
+        Args:
+            stage: stage to set up
+        """
+        self.all_train_dataset = TropicalCycloneWindEstimation(
+            self.root_dir,
+            split="train",
+            transforms=self.custom_transform,
+            download=False,
+        )
+
+        self.all_test_dataset = TropicalCycloneWindEstimation(
+            self.root_dir,
+            split="test",
+            transforms=self.custom_transform,
+            download=False,
+        )
+
+        storm_ids = []
+        for item in self.all_train_dataset.collection:
+            storm_id = item["href"].split("/")[0].split("_")[-2]
+            storm_ids.append(storm_id)
+
+        train_indices, val_indices = next(
+            GroupShuffleSplit(test_size=0.2, n_splits=2, random_state=self.seed).split(
+                storm_ids, groups=storm_ids
+            )
+        )
+
+        self.train_dataset = Subset(self.all_train_dataset, train_indices)
+        self.val_dataset = Subset(self.all_train_dataset, val_indices)
+        self.test_dataset = Subset(
+            self.all_test_dataset, range(len(self.all_test_dataset))
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/etci2021.py b/torchgeo/datamodules/etci2021.py
new file mode 100644
index 000000000..5db89a073
--- /dev/null
+++ b/torchgeo/datamodules/etci2021.py
@@ -0,0 +1,151 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""ETCI 2021 datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+from torch import Generator  # type: ignore[attr-defined]
+from torch.utils.data import DataLoader, random_split
+from torchvision.transforms import Normalize
+
+from ..datasets import ETCI2021
+
+
+class ETCI2021DataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the ETCI2021 dataset.
+
+    Splits the existing train split from the dataset into train/val with 80/20
+    proportions, then uses the existing val dataset as the test data.
+
+    .. versionadded:: 0.2
+    """
+
+    band_means = torch.tensor(  # type: ignore[attr-defined]
+        [0.52253931, 0.52253931, 0.52253931, 0.61221701, 0.61221701, 0.61221701, 0]
+    )
+
+    band_stds = torch.tensor(  # type: ignore[attr-defined]
+        [0.35221376, 0.35221376, 0.35221376, 0.37364622, 0.37364622, 0.37364622, 1]
+    )
+
+    def __init__(
+        self,
+        root_dir: str,
+        seed: int = 0,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for ETCI2021 based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the ETCI2021 Dataset classes
+            seed: The seed value to use when doing the dataset random_split
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.seed = seed
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+        self.norm = Normalize(self.band_means, self.band_stds)
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Notably, moves the given water mask to act as an input layer.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        image = sample["image"]
+        water_mask = sample["mask"][0].unsqueeze(0)
+        flood_mask = sample["mask"][1]
+        flood_mask = (flood_mask > 0).long()
+
+        sample["image"] = torch.cat(  # type: ignore[attr-defined]
+            [image, water_mask], dim=0
+        ).float()
+        sample["image"] /= 255.0
+        sample["image"] = self.norm(sample["image"])
+        sample["mask"] = flood_mask
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        ETCI2021(self.root_dir, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        train_val_dataset = ETCI2021(
+            self.root_dir, split="train", transforms=self.preprocess
+        )
+        self.test_dataset = ETCI2021(
+            self.root_dir, split="val", transforms=self.preprocess
+        )
+
+        size_train_val = len(train_val_dataset)
+        size_train = int(0.8 * size_train_val)
+        size_val = size_train_val - size_train
+
+        self.train_dataset, self.val_dataset = random_split(
+            train_val_dataset,
+            [size_train, size_val],
+            generator=Generator().manual_seed(self.seed),
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/eurosat.py b/torchgeo/datamodules/eurosat.py
new file mode 100644
index 000000000..72708e070
--- /dev/null
+++ b/torchgeo/datamodules/eurosat.py
@@ -0,0 +1,148 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""EuroSAT datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose, Normalize
+
+from ..datasets import EuroSAT
+
+
+class EuroSATDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the EuroSAT dataset.
+
+    Uses the train/val/test splits from the dataset.
+
+    .. versionadded:: 0.2
+    """
+
+    band_means = torch.tensor(  # type: ignore[attr-defined]
+        [
+            1354.40546513,
+            1118.24399958,
+            1042.92983953,
+            947.62620298,
+            1199.47283961,
+            1999.79090914,
+            2369.22292565,
+            2296.82608323,
+            732.08340178,
+            12.11327804,
+            1819.01027855,
+            1118.92391149,
+            2594.14080798,
+        ]
+    )
+
+    band_stds = torch.tensor(  # type: ignore[attr-defined]
+        [
+            245.71762908,
+            333.00778264,
+            395.09249139,
+            593.75055589,
+            566.4170017,
+            861.18399006,
+            1086.63139075,
+            1117.98170791,
+            404.91978886,
+            4.77584468,
+            1002.58768311,
+            761.30323499,
+            1231.58581042,
+        ]
+    )
+
+    def __init__(
+        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
+    ) -> None:
+        """Initialize a LightningDataModule for EuroSAT based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the EuroSAT Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+        self.norm = Normalize(self.band_means, self.band_stds)
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] = self.norm(sample["image"])
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        EuroSAT(self.root_dir)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        self.train_dataset = EuroSAT(self.root_dir, "train", transforms=transforms)
+        self.val_dataset = EuroSAT(self.root_dir, "val", transforms=transforms)
+        self.test_dataset = EuroSAT(self.root_dir, "test", transforms=transforms)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/fair1m.py b/torchgeo/datamodules/fair1m.py
new file mode 100644
index 000000000..15a8cbfca
--- /dev/null
+++ b/torchgeo/datamodules/fair1m.py
@@ -0,0 +1,132 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""FAIR1M datamodule."""
+
+from typing import Any, Dict, List, Optional
+
+import pytorch_lightning as pl
+import torch
+from torch import Tensor
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose
+
+from ..datasets import FAIR1M
+from .utils import dataset_split
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+def collate_fn(batch: List[Dict[str, Tensor]]) -> Dict[str, Any]:
+    """Custom object detection collate fn to handle variable number of boxes.
+
+    Args:
+        batch: list of sample dicts return by dataset
+    Returns:
+        batch dict output
+    """
+    output: Dict[str, Any] = {}
+    output["image"] = torch.stack([sample["image"] for sample in batch])
+    output["boxes"] = [sample["boxes"] for sample in batch]
+    return output
+
+
+class FAIR1MDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the FAIR1M dataset."""
+
+    def __init__(
+        self,
+        root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        val_split_pct: float = 0.2,
+        test_split_pct: float = 0.2,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for FAIR1M based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the FAIR1M Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            val_split_pct: What percentage of the dataset to use as a validation set
+            test_split_pct: What percentage of the dataset to use as a test set
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.val_split_pct = val_split_pct
+        self.test_split_pct = test_split_pct
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        return sample
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        dataset = FAIR1M(self.root_dir, transforms=transforms)
+        self.train_dataset, self.val_dataset, self.test_dataset = dataset_split(
+            dataset, val_pct=self.val_split_pct, test_pct=self.test_split_pct
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+            collate_fn=collate_fn,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+            collate_fn=collate_fn,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+            collate_fn=collate_fn,
+        )
diff --git a/torchgeo/datamodules/landcoverai.py b/torchgeo/datamodules/landcoverai.py
new file mode 100644
index 000000000..95256dffe
--- /dev/null
+++ b/torchgeo/datamodules/landcoverai.py
@@ -0,0 +1,122 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""LandCover.ai datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+
+from ..datasets import LandCoverAI
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class LandCoverAIDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the LandCover.ai dataset.
+
+    Uses the train/val/test splits from the dataset.
+    """
+
+    def __init__(
+        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
+    ) -> None:
+        """Initialize a LightningDataModule for LandCover.ai based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the Landcover.AI Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image and mask
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"] / 255.0
+
+        sample["image"] = sample["image"].float()
+        sample["mask"] = sample["mask"].float().unsqueeze(0) + 1
+
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        _ = LandCoverAI(self.root_dir, download=False, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        train_transforms = self.preprocess
+        val_test_transforms = self.preprocess
+
+        self.train_dataset = LandCoverAI(
+            self.root_dir, split="train", transforms=train_transforms
+        )
+
+        self.val_dataset = LandCoverAI(
+            self.root_dir, split="val", transforms=val_test_transforms
+        )
+
+        self.test_dataset = LandCoverAI(
+            self.root_dir, split="test", transforms=val_test_transforms
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/loveda.py b/torchgeo/datamodules/loveda.py
new file mode 100644
index 000000000..4aeae5323
--- /dev/null
+++ b/torchgeo/datamodules/loveda.py
@@ -0,0 +1,129 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""LoveDA datamodule."""
+
+from typing import Any, Dict, List, Optional
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+
+from ..datasets import LoveDA
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class LoveDADataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the LoveDA dataset.
+
+    Uses the train/val/test splits from the dataset.
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        scene: List[str],
+        batch_size: int = 32,
+        num_workers: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for LoveDA based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` argument to pass to LoveDA Dataset classes
+            scene: specify whether to load only 'urban', only 'rural' or both
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.scene = scene
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image and mask
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"] / 255.0
+
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        _ = LoveDA(self.root_dir, scene=self.scene, download=False, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        train_transforms = self.preprocess
+        val_test_transforms = self.preprocess
+
+        self.train_dataset = LoveDA(
+            self.root_dir, split="train", scene=self.scene, transforms=train_transforms
+        )
+
+        self.val_dataset = LoveDA(
+            self.root_dir, split="val", scene=self.scene, transforms=val_test_transforms
+        )
+
+        self.test_dataset = LoveDA(
+            self.root_dir,
+            split="test",
+            scene=self.scene,
+            transforms=val_test_transforms,
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/naip.py b/torchgeo/datamodules/naip.py
new file mode 100644
index 000000000..b00d142ed
--- /dev/null
+++ b/torchgeo/datamodules/naip.py
@@ -0,0 +1,161 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""National Agriculture Imagery Program (NAIP) datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader
+
+from ..datasets import NAIP, BoundingBox, Chesapeake13, stack_samples
+from ..samplers.batch import RandomBatchGeoSampler
+from ..samplers.single import GridGeoSampler
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class NAIPChesapeakeDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the NAIP and Chesapeake datasets.
+
+    Uses the train/val/test splits from the dataset.
+    """
+
+    # TODO: tune these hyperparams
+    length = 1000
+    stride = 128
+
+    def __init__(
+        self,
+        naip_root_dir: str,
+        chesapeake_root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        patch_size: int = 256,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for NAIP and Chesapeake based DataLoaders.
+
+        Args:
+            naip_root_dir: directory containing NAIP data
+            chesapeake_root_dir: directory containing Chesapeake data
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            patch_size: size of patches to sample
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.naip_root_dir = naip_root_dir
+        self.chesapeake_root_dir = chesapeake_root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.patch_size = patch_size
+
+    def naip_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the NAIP Dataset.
+
+        Args:
+            sample: NAIP image dictionary
+
+        Returns:
+            preprocessed NAIP data
+        """
+        sample["image"] = sample["image"] / 255.0
+        sample["image"] = sample["image"].float()
+        return sample
+
+    def chesapeake_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Chesapeake Dataset.
+
+        Args:
+            sample: Chesapeake mask dictionary
+
+        Returns:
+            preprocessed Chesapeake data
+        """
+        sample["mask"] = sample["mask"].long()[0]
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        Chesapeake13(self.chesapeake_root_dir, download=False, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: state to set up
+        """
+        # TODO: these transforms will be applied independently, this won't work if we
+        # add things like random horizontal flip
+        chesapeake = Chesapeake13(
+            self.chesapeake_root_dir, transforms=self.chesapeake_transform
+        )
+        naip = NAIP(
+            self.naip_root_dir,
+            chesapeake.crs,
+            chesapeake.res,
+            transforms=self.naip_transform,
+        )
+        self.dataset = chesapeake & naip
+
+        # TODO: figure out better train/val/test split
+        roi = self.dataset.bounds
+        midx = roi.minx + (roi.maxx - roi.minx) / 2
+        midy = roi.miny + (roi.maxy - roi.miny) / 2
+        train_roi = BoundingBox(roi.minx, midx, roi.miny, roi.maxy, roi.mint, roi.maxt)
+        val_roi = BoundingBox(midx, roi.maxx, roi.miny, midy, roi.mint, roi.maxt)
+        test_roi = BoundingBox(roi.minx, roi.maxx, midy, roi.maxy, roi.mint, roi.maxt)
+
+        self.train_sampler = RandomBatchGeoSampler(
+            naip, self.patch_size, self.batch_size, self.length, train_roi
+        )
+        self.val_sampler = GridGeoSampler(naip, self.patch_size, self.stride, val_roi)
+        self.test_sampler = GridGeoSampler(naip, self.patch_size, self.stride, test_roi)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.dataset,
+            batch_sampler=self.train_sampler,
+            num_workers=self.num_workers,
+            collate_fn=stack_samples,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.dataset,
+            batch_size=self.batch_size,
+            sampler=self.val_sampler,
+            num_workers=self.num_workers,
+            collate_fn=stack_samples,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.dataset,
+            batch_size=self.batch_size,
+            sampler=self.test_sampler,
+            num_workers=self.num_workers,
+            collate_fn=stack_samples,
+        )
diff --git a/torchgeo/datamodules/nasa_marine_debris.py b/torchgeo/datamodules/nasa_marine_debris.py
new file mode 100644
index 000000000..e6337e9fb
--- /dev/null
+++ b/torchgeo/datamodules/nasa_marine_debris.py
@@ -0,0 +1,140 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""NASA Marine Debris datamodule."""
+
+from typing import Any, Dict, List, Optional
+
+import pytorch_lightning as pl
+import torch
+from torch import Tensor
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose
+
+from ..datasets import NASAMarineDebris
+from .utils import dataset_split
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+def collate_fn(batch: List[Dict[str, Tensor]]) -> Dict[str, Any]:
+    """Custom object detection collate fn to handle variable boxes.
+
+    Args:
+        batch: list of sample dicts return by dataset
+
+    Returns:
+        batch dict output
+    """
+    output: Dict[str, Any] = {}
+    output["image"] = torch.stack([sample["image"] for sample in batch])
+    output["boxes"] = [sample["boxes"] for sample in batch]
+    return output
+
+
+class NASAMarineDebrisDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the NASA Marine Debris dataset."""
+
+    def __init__(
+        self,
+        root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        val_split_pct: float = 0.2,
+        test_split_pct: float = 0.2,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for NASA Marine Debris based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` argument to pass to the Dataset class
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            val_split_pct: What percentage of the dataset to use as a validation set
+            test_split_pct: What percentage of the dataset to use as a test set
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.val_split_pct = val_split_pct
+        self.test_split_pct = test_split_pct
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        NASAMarineDebris(self.root_dir, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        dataset = NASAMarineDebris(self.root_dir, transforms=transforms)
+        self.train_dataset, self.val_dataset, self.test_dataset = dataset_split(
+            dataset, val_pct=self.val_split_pct, test_pct=self.test_split_pct
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+            collate_fn=collate_fn,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+            collate_fn=collate_fn,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+            collate_fn=collate_fn,
+        )
diff --git a/torchgeo/datamodules/oscd.py b/torchgeo/datamodules/oscd.py
new file mode 100644
index 000000000..f77f95310
--- /dev/null
+++ b/torchgeo/datamodules/oscd.py
@@ -0,0 +1,214 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""OSCD datamodule."""
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import kornia.augmentation as K
+import pytorch_lightning as pl
+import torch
+from einops import repeat
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data._utils.collate import default_collate
+from torchvision.transforms import Compose, Normalize
+
+from ..datasets import OSCD
+from .utils import dataset_split
+
+
+class OSCDDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the OSCD dataset.
+
+    Uses the train/test splits from the dataset and further splits
+    the train split into train/val splits.
+
+    .. versionadded: 0.2
+    """
+
+    band_means = torch.tensor(  # type: ignore[attr-defined]
+        [
+            1583.0741,
+            1374.3202,
+            1294.1616,
+            1325.6158,
+            1478.7408,
+            1933.0822,
+            2166.0608,
+            2076.4868,
+            2306.0652,
+            690.9814,
+            16.2360,
+            2080.3347,
+            1524.6930,
+        ]
+    )
+
+    band_stds = torch.tensor(  # type: ignore[attr-defined]
+        [
+            52.1937,
+            83.4168,
+            105.6966,
+            151.1401,
+            147.4615,
+            115.9289,
+            123.1974,
+            114.6483,
+            141.4530,
+            73.2758,
+            4.8368,
+            213.4821,
+            179.4793,
+        ]
+    )
+
+    def __init__(
+        self,
+        root_dir: str,
+        bands: str = "all",
+        train_batch_size: int = 32,
+        num_workers: int = 0,
+        val_split_pct: float = 0.2,
+        patch_size: Tuple[int, int] = (64, 64),
+        num_patches_per_tile: int = 32,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for OSCD based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the OSCD Dataset classes
+            bands: "rgb" or "all"
+            train_batch_size: The batch size used in the train DataLoader
+                (val_batch_size == test_batch_size == 1)
+            num_workers: The number of workers to use in all created DataLoaders
+            val_split_pct: What percentage of the dataset to use as a validation set
+            patch_size: Size of random patch from image and mask (height, width)
+            num_patches_per_tile: number of random patches per sample
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.bands = bands
+        self.train_batch_size = train_batch_size
+        self.num_workers = num_workers
+        self.val_split_pct = val_split_pct
+        self.patch_size = patch_size
+        self.num_patches_per_tile = num_patches_per_tile
+
+        if bands == "rgb":
+            self.band_means = self.band_means[[3, 2, 1], None, None]
+            self.band_stds = self.band_stds[[3, 2, 1], None, None]
+        else:
+            self.band_means = self.band_means[:, None, None]
+            self.band_stds = self.band_stds[:, None, None]
+
+        self.norm = Normalize(self.band_means, self.band_stds)
+        self.rcrop = K.AugmentationSequential(
+            K.RandomCrop(patch_size), data_keys=["input", "mask"], same_on_batch=True
+        )
+        self.padto = K.PadTo((1280, 1280))
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset."""
+        sample["image"] = sample["image"].float()
+        sample["mask"] = sample["mask"]
+        sample["image"] = self.norm(sample["image"])
+        sample["image"] = torch.flatten(  # type: ignore[attr-defined]
+            sample["image"], 0, 1
+        )
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        OSCD(self.root_dir, split="train", bands=self.bands, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+        """
+
+        def n_random_crop(sample: Dict[str, Any]) -> Dict[str, Any]:
+            images, masks = [], []
+            for i in range(self.num_patches_per_tile):
+                mask = repeat(sample["mask"], "h w -> t h w", t=2).float()
+                image, mask = self.rcrop(sample["image"], mask)
+                mask = mask.squeeze()[0]
+                images.append(image.squeeze())
+                masks.append(mask.long())
+            sample["image"] = torch.stack(images)
+            sample["mask"] = torch.stack(masks)
+            return sample
+
+        def pad_to(sample: Dict[str, Any]) -> Dict[str, Any]:
+            sample["image"] = self.padto(sample["image"])[0]
+            sample["mask"] = self.padto(sample["mask"].float()).long()[0, 0]
+            return sample
+
+        train_transforms = Compose([self.preprocess, n_random_crop])
+        # for testing and validation we pad all inputs to a fixed size to avoid issues
+        # with the upsampling paths in encoder-decoder architectures
+        test_transforms = Compose([self.preprocess, pad_to])
+
+        train_dataset = OSCD(
+            self.root_dir, split="train", bands=self.bands, transforms=train_transforms
+        )
+
+        self.train_dataset: Dataset[Any]
+        self.val_dataset: Dataset[Any]
+
+        if self.val_split_pct > 0.0:
+            val_dataset = OSCD(
+                self.root_dir,
+                split="train",
+                bands=self.bands,
+                transforms=test_transforms,
+            )
+            self.train_dataset, self.val_dataset, _ = dataset_split(
+                train_dataset, val_pct=self.val_split_pct, test_pct=0.0
+            )
+            self.val_dataset.dataset = val_dataset
+        else:
+            self.train_dataset = train_dataset
+            self.val_dataset = train_dataset
+
+        self.test_dataset = OSCD(
+            self.root_dir, split="test", bands=self.bands, transforms=test_transforms
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training."""
+
+        def collate_wrapper(batch: List[Dict[str, Any]]) -> Dict[str, Any]:
+            r_batch: Dict[str, Any] = default_collate(  # type: ignore[no-untyped-call]
+                batch
+            )
+            r_batch["image"] = torch.flatten(  # type: ignore[attr-defined]
+                r_batch["image"], 0, 1
+            )
+            r_batch["mask"] = torch.flatten(  # type: ignore[attr-defined]
+                r_batch["mask"], 0, 1
+            )
+            return r_batch
+
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.train_batch_size,
+            num_workers=self.num_workers,
+            collate_fn=collate_wrapper,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation."""
+        return DataLoader(
+            self.val_dataset, batch_size=1, num_workers=self.num_workers, shuffle=False
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing."""
+        return DataLoader(
+            self.test_dataset, batch_size=1, num_workers=self.num_workers, shuffle=False
+        )
diff --git a/torchgeo/datamodules/potsdam.py b/torchgeo/datamodules/potsdam.py
new file mode 100644
index 000000000..0ddbd2dba
--- /dev/null
+++ b/torchgeo/datamodules/potsdam.py
@@ -0,0 +1,121 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Potsdam datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader, Dataset
+from torchvision.transforms import Compose
+
+from ..datasets import Potsdam2D
+from .utils import dataset_split
+
+
+class Potsdam2DDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the Potsdam2D dataset.
+
+    Uses the train/test splits from the dataset.
+
+    .. versionadded: 0.2
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        val_split_pct: float = 0.2,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for Potsdam2D based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` argument to pass to the Potsdam2D Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            val_split_pct: What percentage of the dataset to use as a validation set
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.val_split_pct = val_split_pct
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        return sample
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        dataset = Potsdam2D(self.root_dir, "train", transforms=transforms)
+
+        self.train_dataset: Dataset[Any]
+        self.val_dataset: Dataset[Any]
+
+        if self.val_split_pct > 0.0:
+            self.train_dataset, self.val_dataset, _ = dataset_split(
+                dataset, val_pct=self.val_split_pct, test_pct=0.0
+            )
+        else:
+            self.train_dataset = dataset
+            self.val_dataset = dataset
+
+        self.test_dataset = Potsdam2D(self.root_dir, "test", transforms=transforms)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/resisc45.py b/torchgeo/datamodules/resisc45.py
new file mode 100644
index 000000000..844ee0968
--- /dev/null
+++ b/torchgeo/datamodules/resisc45.py
@@ -0,0 +1,123 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""RESISC45 datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose, Normalize
+
+from ..datasets import RESISC45
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class RESISC45DataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the RESISC45 dataset.
+
+    Uses the train/val/test splits from the dataset.
+    """
+
+    band_means = torch.tensor(  # type: ignore[attr-defined]
+        [0.36801773, 0.38097873, 0.343583]
+    )
+
+    band_stds = torch.tensor(  # type: ignore[attr-defined]
+        [0.14540215, 0.13558227, 0.13203649]
+    )
+
+    def __init__(
+        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
+    ) -> None:
+        """Initialize a LightningDataModule for RESISC45 based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the RESISC45 Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+        self.norm = Normalize(self.band_means, self.band_stds)
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        sample["image"] = self.norm(sample["image"])
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        RESISC45(self.root_dir, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        self.train_dataset = RESISC45(self.root_dir, "train", transforms=transforms)
+        self.val_dataset = RESISC45(self.root_dir, "val", transforms=transforms)
+        self.test_dataset = RESISC45(self.root_dir, "test", transforms=transforms)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/sen12ms.py b/torchgeo/datamodules/sen12ms.py
new file mode 100644
index 000000000..cfe5900c4
--- /dev/null
+++ b/torchgeo/datamodules/sen12ms.py
@@ -0,0 +1,202 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""SEN12MS datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+from sklearn.model_selection import GroupShuffleSplit
+from torch.utils.data import DataLoader, Subset
+
+from ..datasets import SEN12MS
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class SEN12MSDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the SEN12MS dataset.
+
+    Implements 80/20 geographic train/val splits and uses the test split from the
+    classification dataset definitions. See :func:`setup` for more details.
+
+    Uses the Simplified IGBP scheme defined in the 2020 Data Fusion Competition. See
+    https://arxiv.org/abs/2002.08254.
+    """
+
+    #: Mapping from the IGBP class definitions to the DFC2020, taken from the dataloader
+    #: here https://github.com/lukasliebel/dfc2020_baseline.
+    DFC2020_CLASS_MAPPING = torch.tensor(  # type: ignore[attr-defined]
+        [
+            0,  # maps 0s to 0
+            1,  # maps 1s to 1
+            1,  # maps 2s to 1
+            1,  # ...
+            1,
+            1,
+            2,
+            2,
+            3,
+            3,
+            4,
+            5,
+            6,
+            7,
+            6,
+            8,
+            9,
+            10,
+        ]
+    )
+
+    def __init__(
+        self,
+        root_dir: str,
+        seed: int,
+        band_set: str = "all",
+        batch_size: int = 64,
+        num_workers: int = 0,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for SEN12MS based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the SEN12MS Dataset classes
+            seed: The seed value to use when doing the sklearn based ShuffleSplit
+            band_set: The subset of S1/S2 bands to use. Options are: "all",
+                "s1", "s2-all", and "s2-reduced" where the "s2-reduced" set includes:
+                B2, B3, B4, B8, B11, and B12.
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        assert band_set in SEN12MS.BAND_SETS.keys()
+
+        self.root_dir = root_dir
+        self.seed = seed
+        self.band_set = band_set
+        self.band_indices = SEN12MS.BAND_SETS[band_set]
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+    def custom_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image and mask
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+
+        if self.band_set == "all":
+            sample["image"][:2] = sample["image"][:2].clamp(-25, 0) / -25
+            sample["image"][2:] = sample["image"][2:].clamp(0, 10000) / 10000
+        elif self.band_set == "s1":
+            sample["image"][:2] = sample["image"][:2].clamp(-25, 0) / -25
+        else:
+            sample["image"][:] = sample["image"][:].clamp(0, 10000) / 10000
+
+        sample["mask"] = sample["mask"][0, :, :].long()
+        sample["mask"] = torch.take(  # type: ignore[attr-defined]
+            self.DFC2020_CLASS_MAPPING, sample["mask"]
+        )
+
+        return sample
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Create the train/val/test splits based on the original Dataset objects.
+
+        The splits should be done here vs. in :func:`__init__` per the docs:
+        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
+
+        We split samples between train and val geographically with proportions of 80/20.
+        This mimics the geographic test set split.
+
+        Args:
+            stage: stage to set up
+        """
+        season_to_int = {"winter": 0, "spring": 1000, "summer": 2000, "fall": 3000}
+
+        self.all_train_dataset = SEN12MS(
+            self.root_dir,
+            split="train",
+            bands=self.band_indices,
+            transforms=self.custom_transform,
+            checksum=False,
+        )
+
+        self.all_test_dataset = SEN12MS(
+            self.root_dir,
+            split="test",
+            bands=self.band_indices,
+            transforms=self.custom_transform,
+            checksum=False,
+        )
+
+        # A patch is a filename like: "ROIs{num}_{season}_s2_{scene_id}_p{patch_id}.tif"
+        # This patch will belong to the scene that is uniquelly identified by its
+        # (season, scene_id) tuple. Because the largest scene_id is 149, we can simply
+        # give each season a large number and representing a `unique_scene_id` as
+        # `season_id + scene_id`.
+        scenes = []
+        for scene_fn in self.all_train_dataset.ids:
+            parts = scene_fn.split("_")
+            season_id = season_to_int[parts[1]]
+            scene_id = int(parts[3])
+            scenes.append(season_id + scene_id)
+
+        train_indices, val_indices = next(
+            GroupShuffleSplit(test_size=0.2, n_splits=2, random_state=self.seed).split(
+                scenes, groups=scenes
+            )
+        )
+
+        self.train_dataset = Subset(self.all_train_dataset, train_indices)
+        self.val_dataset = Subset(self.all_train_dataset, val_indices)
+        self.test_dataset = Subset(
+            self.all_test_dataset, range(len(self.all_test_dataset))
+        )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/so2sat.py b/torchgeo/datamodules/so2sat.py
new file mode 100644
index 000000000..9f072edbf
--- /dev/null
+++ b/torchgeo/datamodules/so2sat.py
@@ -0,0 +1,225 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""So2Sat datamodule."""
+
+from typing import Any, Dict, Optional, cast
+
+import pytorch_lightning as pl
+import torch
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose
+
+from ..datasets import So2Sat
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class So2SatDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the So2Sat dataset.
+
+    Uses the train/val/test splits from the dataset.
+    """
+
+    band_means = torch.tensor(  # type: ignore[attr-defined]
+        [
+            -3.591224256609313e-05,
+            -7.658561276843396e-06,
+            5.9373857475971184e-05,
+            2.5166231537121083e-05,
+            0.04420110659759328,
+            0.25761027084996196,
+            0.0007556743372573258,
+            0.0013503466830024448,
+            0.12375696117681859,
+            0.1092774636368323,
+            0.1010855203267882,
+            0.1142398616114001,
+            0.1592656692023089,
+            0.18147236008771792,
+            0.1745740312291377,
+            0.19501607349635292,
+            0.15428468872076637,
+            0.10905050699570007,
+        ]
+    ).reshape(18, 1, 1)
+
+    band_stds = torch.tensor(  # type: ignore[attr-defined]
+        [
+            0.17555201137417686,
+            0.17556463274968204,
+            0.45998793417834255,
+            0.455988755730148,
+            2.8559909213125763,
+            8.324800606439833,
+            2.4498757382563103,
+            1.4647352984509094,
+            0.03958795985905458,
+            0.047778262752410296,
+            0.06636616706371974,
+            0.06358874912497474,
+            0.07744387147984592,
+            0.09101635085921553,
+            0.09218466562387101,
+            0.10164581233948201,
+            0.09991773043519253,
+            0.08780632509122865,
+        ]
+    ).reshape(18, 1, 1)
+
+    # this reorders the bands to put S2 RGB first, then remainder of S2, then S1
+    reindex_to_rgb_first = [
+        10,
+        9,
+        8,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        # 0,
+        # 1,
+        # 2,
+        # 3,
+        # 4,
+        # 5,
+        # 6,
+        # 7,
+    ]
+
+    def __init__(
+        self,
+        root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        bands: str = "rgb",
+        unsupervised_mode: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for So2Sat based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the So2Sat Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            bands: Either "rgb" or "s2"
+            unsupervised_mode: Makes the train dataloader return imagery from the train,
+                val, and test sets
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.bands = bands
+        self.unsupervised_mode = unsupervised_mode
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image
+
+        Returns:
+            preprocessed sample
+        """
+        # sample["image"] = (sample["image"] - self.band_means) / self.band_stds
+        sample["image"] = sample["image"].float()
+        sample["image"] = sample["image"][self.reindex_to_rgb_first, :, :]
+
+        if self.bands == "rgb":
+            sample["image"] = sample["image"][:3, :, :]
+
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        So2Sat(self.root_dir, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        train_transforms = Compose([self.preprocess])
+        val_test_transforms = self.preprocess
+
+        if not self.unsupervised_mode:
+
+            self.train_dataset = So2Sat(
+                self.root_dir, split="train", transforms=train_transforms
+            )
+
+            self.val_dataset = So2Sat(
+                self.root_dir, split="validation", transforms=val_test_transforms
+            )
+
+            self.test_dataset = So2Sat(
+                self.root_dir, split="test", transforms=val_test_transforms
+            )
+
+        else:
+
+            temp_train = So2Sat(
+                self.root_dir, split="train", transforms=train_transforms
+            )
+
+            self.val_dataset = So2Sat(
+                self.root_dir, split="validation", transforms=train_transforms
+            )
+
+            self.test_dataset = So2Sat(
+                self.root_dir, split="test", transforms=train_transforms
+            )
+
+            self.train_dataset = cast(
+                So2Sat, temp_train + self.val_dataset + self.test_dataset
+            )
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/ucmerced.py b/torchgeo/datamodules/ucmerced.py
new file mode 100644
index 000000000..69cd97733
--- /dev/null
+++ b/torchgeo/datamodules/ucmerced.py
@@ -0,0 +1,125 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""UC Merced datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+import torch
+import torchvision
+from torch.utils.data import DataLoader
+from torchvision.transforms import Compose, Normalize
+
+from ..datasets import UCMerced
+
+# https://github.com/pytorch/pytorch/issues/60979
+# https://github.com/pytorch/pytorch/pull/61045
+DataLoader.__module__ = "torch.utils.data"
+
+
+class UCMercedDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the UC Merced dataset.
+
+    Uses random train/val/test splits.
+    """
+
+    band_means = torch.tensor([0, 0, 0])  # type: ignore[attr-defined]
+
+    band_stds = torch.tensor([1, 1, 1])  # type: ignore[attr-defined]
+
+    def __init__(
+        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
+    ) -> None:
+        """Initialize a LightningDataModule for UCMerced based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the UCMerced Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+        self.norm = Normalize(self.band_means, self.band_stds)
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: dictionary containing image
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        c, h, w = sample["image"].shape
+        if h != 256 or w != 256:
+            sample["image"] = torchvision.transforms.functional.resize(
+                sample["image"], size=(256, 256)
+            )
+        sample["image"] = self.norm(sample["image"])
+        return sample
+
+    def prepare_data(self) -> None:
+        """Make sure that the dataset is downloaded.
+
+        This method is only called once per run.
+        """
+        UCMerced(self.root_dir, download=False, checksum=False)
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        self.train_dataset = UCMerced(self.root_dir, "train", transforms=transforms)
+        self.val_dataset = UCMerced(self.root_dir, "val", transforms=transforms)
+        self.test_dataset = UCMerced(self.root_dir, "test", transforms=transforms)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/utils.py b/torchgeo/datamodules/utils.py
new file mode 100644
index 000000000..ff1f571c2
--- /dev/null
+++ b/torchgeo/datamodules/utils.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Common datamodule utilities."""
+
+from typing import Any, List, Optional
+
+from torch.utils.data import Dataset, Subset, random_split
+
+
+def dataset_split(
+    dataset: Dataset[Any], val_pct: float, test_pct: Optional[float] = None
+) -> List[Subset[Any]]:
+    """Split a torch Dataset into train/val/test sets.
+
+    If ``test_pct`` is not set then only train and validation splits are returned.
+
+    Args:
+        dataset: dataset to be split into train/val or train/val/test subsets
+        val_pct: percentage of samples to be in validation set
+        test_pct: (Optional) percentage of samples to be in test set
+    Returns:
+        a list of the subset datasets. Either [train, val] or [train, val, test]
+    """
+    if test_pct is None:
+        val_length = int(len(dataset) * val_pct)
+        train_length = len(dataset) - val_length
+        return random_split(dataset, [train_length, val_length])
+    else:
+        val_length = int(len(dataset) * val_pct)
+        test_length = int(len(dataset) * test_pct)
+        train_length = len(dataset) - (val_length + test_length)
+        return random_split(dataset, [train_length, val_length, test_length])
diff --git a/torchgeo/datamodules/vaihingen.py b/torchgeo/datamodules/vaihingen.py
new file mode 100644
index 000000000..afc36892c
--- /dev/null
+++ b/torchgeo/datamodules/vaihingen.py
@@ -0,0 +1,121 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Vaihingen datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader, Dataset
+from torchvision.transforms import Compose
+
+from ..datasets import Vaihingen2D
+from .utils import dataset_split
+
+
+class Vaihingen2DDataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the Vaihingen2D dataset.
+
+    Uses the train/test splits from the dataset.
+
+    .. versionadded: 0.2
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        val_split_pct: float = 0.2,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for Vaihingen2D based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` argument to pass to the Vaihingen Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            val_split_pct: What percentage of the dataset to use as a validation set
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.val_split_pct = val_split_pct
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        return sample
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        dataset = Vaihingen2D(self.root_dir, "train", transforms=transforms)
+
+        self.train_dataset: Dataset[Any]
+        self.val_dataset: Dataset[Any]
+
+        if self.val_split_pct > 0.0:
+            self.train_dataset, self.val_dataset, _ = dataset_split(
+                dataset, val_pct=self.val_split_pct, test_pct=0.0
+            )
+        else:
+            self.train_dataset = dataset
+            self.val_dataset = dataset
+
+        self.test_dataset = Vaihingen2D(self.root_dir, "test", transforms=transforms)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datamodules/xview.py b/torchgeo/datamodules/xview.py
new file mode 100644
index 000000000..a8b5e1188
--- /dev/null
+++ b/torchgeo/datamodules/xview.py
@@ -0,0 +1,121 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""xView2 datamodule."""
+
+from typing import Any, Dict, Optional
+
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader, Dataset
+from torchvision.transforms import Compose
+
+from ..datasets import XView2
+from .utils import dataset_split
+
+
+class XView2DataModule(pl.LightningDataModule):
+    """LightningDataModule implementation for the xView2 dataset.
+
+    Uses the train/val/test splits from the dataset.
+
+    .. versionadded: 0.2
+    """
+
+    def __init__(
+        self,
+        root_dir: str,
+        batch_size: int = 64,
+        num_workers: int = 0,
+        val_split_pct: float = 0.2,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize a LightningDataModule for xView2 based DataLoaders.
+
+        Args:
+            root_dir: The ``root`` arugment to pass to the xView2 Dataset classes
+            batch_size: The batch size to use in all created DataLoaders
+            num_workers: The number of workers to use in all created DataLoaders
+            val_split_pct: What percentage of the dataset to use as a validation set
+        """
+        super().__init__()  # type: ignore[no-untyped-call]
+        self.root_dir = root_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.val_split_pct = val_split_pct
+
+    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
+        """Transform a single sample from the Dataset.
+
+        Args:
+            sample: input image dictionary
+
+        Returns:
+            preprocessed sample
+        """
+        sample["image"] = sample["image"].float()
+        sample["image"] /= 255.0
+        return sample
+
+    def setup(self, stage: Optional[str] = None) -> None:
+        """Initialize the main ``Dataset`` objects.
+
+        This method is called once per GPU per run.
+
+        Args:
+            stage: stage to set up
+        """
+        transforms = Compose([self.preprocess])
+
+        dataset = XView2(self.root_dir, "train", transforms=transforms)
+
+        self.train_dataset: Dataset[Any]
+        self.val_dataset: Dataset[Any]
+
+        if self.val_split_pct > 0.0:
+            self.train_dataset, self.val_dataset, _ = dataset_split(
+                dataset, val_pct=self.val_split_pct, test_pct=0.0
+            )
+        else:
+            self.train_dataset = dataset
+            self.val_dataset = dataset
+
+        self.test_dataset = XView2(self.root_dir, "test", transforms=transforms)
+
+    def train_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for training.
+
+        Returns:
+            training data loader
+        """
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=True,
+        )
+
+    def val_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for validation.
+
+        Returns:
+            validation data loader
+        """
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
+
+    def test_dataloader(self) -> DataLoader[Any]:
+        """Return a DataLoader for testing.
+
+        Returns:
+            testing data loader
+        """
+        return DataLoader(
+            self.test_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            shuffle=False,
+        )
diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
index 0f5e24b38..7e3cf7811 100644
--- a/torchgeo/datasets/__init__.py
+++ b/torchgeo/datasets/__init__.py
@@ -5,7 +5,7 @@
 
 from .advance import ADVANCE
 from .benin_cashews import BeninSmallHolderCashews
-from .bigearthnet import BigEarthNet, BigEarthNetDataModule
+from .bigearthnet import BigEarthNet
 from .cbf import CanadianBuildingFootprints
 from .cdl import CDL
 from .chesapeake import (
@@ -13,7 +13,6 @@ from .chesapeake import (
     Chesapeake7,
     Chesapeake13,
     ChesapeakeCVPR,
-    ChesapeakeCVPRDataModule,
     ChesapeakeDC,
     ChesapeakeDE,
     ChesapeakeMD,
@@ -22,12 +21,12 @@ from .chesapeake import (
     ChesapeakeVA,
     ChesapeakeWV,
 )
-from .cowc import COWC, COWCCounting, COWCCountingDataModule, COWCDetection
+from .cowc import COWC, COWCCounting, COWCDetection
 from .cv4a_kenya_crop_type import CV4AKenyaCropType
-from .cyclone import CycloneDataModule, TropicalCycloneWindEstimation
-from .etci2021 import ETCI2021, ETCI2021DataModule
-from .eurosat import EuroSAT, EuroSATDataModule
-from .fair1m import FAIR1M, FAIR1MDataModule
+from .cyclone import TropicalCycloneWindEstimation
+from .etci2021 import ETCI2021
+from .eurosat import EuroSAT
+from .fair1m import FAIR1M
 from .geo import (
     GeoDataset,
     IntersectionDataset,
@@ -39,7 +38,7 @@ from .geo import (
 )
 from .gid15 import GID15
 from .idtrees import IDTReeS
-from .landcoverai import LandCoverAI, LandCoverAIDataModule
+from .landcoverai import LandCoverAI
 from .landsat import (
     Landsat,
     Landsat1,
@@ -54,23 +53,23 @@ from .landsat import (
     Landsat9,
 )
 from .levircd import LEVIRCDPlus
-from .loveda import LoveDA, LoveDADataModule
-from .naip import NAIP, NAIPChesapeakeDataModule
-from .nasa_marine_debris import NASAMarineDebris, NASAMarineDebrisDataModule
+from .loveda import LoveDA
+from .naip import NAIP
+from .nasa_marine_debris import NASAMarineDebris
 from .nwpu import VHR10
-from .oscd import OSCD, OSCDDataModule
+from .oscd import OSCD
 from .patternnet import PatternNet
-from .potsdam import Potsdam2D, Potsdam2DDataModule
-from .resisc45 import RESISC45, RESISC45DataModule
+from .potsdam import Potsdam2D
+from .resisc45 import RESISC45
 from .seco import SeasonalContrastS2
-from .sen12ms import SEN12MS, SEN12MSDataModule
+from .sen12ms import SEN12MS
 from .sentinel import Sentinel, Sentinel2
-from .so2sat import So2Sat, So2SatDataModule
+from .so2sat import So2Sat
 from .spacenet import SpaceNet, SpaceNet1, SpaceNet2, SpaceNet4, SpaceNet5, SpaceNet7
-from .ucmerced import UCMerced, UCMercedDataModule
+from .ucmerced import UCMerced
 from .utils import BoundingBox, concat_samples, merge_samples, stack_samples
-from .vaihingen import Vaihingen2D, Vaihingen2DDataModule
-from .xview import XView2, XView2DataModule
+from .vaihingen import Vaihingen2D
+from .xview import XView2
 from .zuericrop import ZueriCrop
 
 __all__ = (
@@ -88,7 +87,6 @@ __all__ = (
     "ChesapeakeVA",
     "ChesapeakeWV",
     "ChesapeakeCVPR",
-    "ChesapeakeCVPRDataModule",
     "Landsat",
     "Landsat1",
     "Landsat2",
@@ -101,46 +99,32 @@ __all__ = (
     "Landsat8",
     "Landsat9",
     "NAIP",
-    "NAIPChesapeakeDataModule",
     "Sentinel",
     "Sentinel2",
     # VisionDataset
     "ADVANCE",
     "BeninSmallHolderCashews",
     "BigEarthNet",
-    "BigEarthNetDataModule",
     "COWC",
     "COWCCounting",
     "COWCDetection",
-    "COWCCountingDataModule",
     "CV4AKenyaCropType",
     "ETCI2021",
-    "ETCI2021DataModule",
     "EuroSAT",
-    "EuroSATDataModule",
     "FAIR1M",
-    "FAIR1MDataModule",
     "GID15",
     "IDTReeS",
     "LandCoverAI",
-    "LandCoverAIDataModule",
     "LEVIRCDPlus",
     "LoveDA",
-    "LoveDADataModule",
     "NASAMarineDebris",
-    "NASAMarineDebrisDataModule",
     "OSCD",
-    "OSCDDataModule",
     "PatternNet",
     "Potsdam2D",
-    "Potsdam2DDataModule",
     "RESISC45",
-    "RESISC45DataModule",
     "SeasonalContrastS2",
     "SEN12MS",
-    "SEN12MSDataModule",
     "So2Sat",
-    "So2SatDataModule",
     "SpaceNet",
     "SpaceNet1",
     "SpaceNet2",
@@ -148,14 +132,10 @@ __all__ = (
     "SpaceNet5",
     "SpaceNet7",
     "TropicalCycloneWindEstimation",
-    "CycloneDataModule",
     "UCMerced",
-    "UCMercedDataModule",
     "Vaihingen2D",
-    "Vaihingen2DDataModule",
     "VHR10",
     "XView2",
-    "XView2DataModule",
     "ZueriCrop",
     # Base classes
     "GeoDataset",
diff --git a/torchgeo/datasets/bigearthnet.py b/torchgeo/datasets/bigearthnet.py
index 409e0b822..48d247665 100644
--- a/torchgeo/datasets/bigearthnet.py
+++ b/torchgeo/datasets/bigearthnet.py
@@ -6,24 +6,17 @@
 import glob
 import json
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import numpy as np
-import pytorch_lightning as pl
 import rasterio
 import torch
 from rasterio.enums import Resampling
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
 from .geo import VisionDataset
 from .utils import download_url, extract_archive, sort_sentinel2_bands
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class BigEarthNet(VisionDataset):
     """BigEarthNet dataset.
@@ -511,164 +504,3 @@ class BigEarthNet(VisionDataset):
         """
         if not filepath.endswith(".csv"):
             extract_archive(filepath)
-
-
-class BigEarthNetDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the BigEarthNet dataset.
-
-    Uses the train/val/test splits from the dataset.
-    """
-
-    # (VV, VH, B01, B02, B03, B04, B05, B06, B07, B08, B8A, B09, B11, B12)
-    # min/max band statistics computed on 100k random samples
-    band_mins_raw = torch.tensor(  # type: ignore[attr-defined]
-        [-70.0, -72.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
-    )
-    band_maxs_raw = torch.tensor(  # type: ignore[attr-defined]
-        [
-            31.0,
-            35.0,
-            18556.0,
-            20528.0,
-            18976.0,
-            17874.0,
-            16611.0,
-            16512.0,
-            16394.0,
-            16672.0,
-            16141.0,
-            16097.0,
-            15336.0,
-            15203.0,
-        ]
-    )
-
-    # min/max band statistics computed by percentile clipping the
-    # above to samples to [2, 98]
-    band_mins = torch.tensor(  # type: ignore[attr-defined]
-        [-48.0, -42.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
-    )
-    band_maxs = torch.tensor(  # type: ignore[attr-defined]
-        [
-            6.0,
-            16.0,
-            9859.0,
-            12872.0,
-            13163.0,
-            14445.0,
-            12477.0,
-            12563.0,
-            12289.0,
-            15596.0,
-            12183.0,
-            9458.0,
-            5897.0,
-            5544.0,
-        ]
-    )
-
-    def __init__(
-        self,
-        root_dir: str,
-        bands: str = "all",
-        num_classes: int = 19,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for BigEarthNet based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the BigEarthNet Dataset classes
-            bands: load Sentinel-1 bands, Sentinel-2, or both. one of {s1, s2, all}
-            num_classes: number of classes to load in target. one of {19, 43}
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.bands = bands
-        self.num_classes = num_classes
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-        if bands == "all":
-            self.mins = self.band_mins[:, None, None]
-            self.maxs = self.band_maxs[:, None, None]
-        elif bands == "s1":
-            self.mins = self.band_mins[:2, None, None]
-            self.maxs = self.band_maxs[:2, None, None]
-        else:
-            self.mins = self.band_mins[2:, None, None]
-            self.maxs = self.band_maxs[2:, None, None]
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset."""
-        sample["image"] = sample["image"].float()
-        sample["image"] = (sample["image"] - self.mins) / (self.maxs - self.mins)
-        sample["image"] = torch.clip(  # type: ignore[attr-defined]
-            sample["image"], min=0.0, max=1.0
-        )
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        BigEarthNet(self.root_dir, split="train", bands=self.bands, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-        """
-        transforms = Compose([self.preprocess])
-        self.train_dataset = BigEarthNet(
-            self.root_dir,
-            split="train",
-            bands=self.bands,
-            num_classes=self.num_classes,
-            transforms=transforms,
-        )
-        self.val_dataset = BigEarthNet(
-            self.root_dir,
-            split="val",
-            bands=self.bands,
-            num_classes=self.num_classes,
-            transforms=transforms,
-        )
-        self.test_dataset = BigEarthNet(
-            self.root_dir,
-            split="test",
-            bands=self.bands,
-            num_classes=self.num_classes,
-            transforms=transforms,
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training."""
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation."""
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing."""
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/chesapeake.py b/torchgeo/datasets/chesapeake.py
index 1709b1a9e..d697d96de 100644
--- a/torchgeo/datasets/chesapeake.py
+++ b/torchgeo/datasets/chesapeake.py
@@ -16,21 +16,10 @@ import rasterio.mask
 import shapely.geometry
 import shapely.ops
 import torch
-import torch.nn.functional as F
-from pytorch_lightning.core.datamodule import LightningDataModule
 from rasterio.crs import CRS
-from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
-from ..samplers.batch import RandomBatchGeoSampler
-from ..samplers.single import GridGeoSampler
 from .geo import GeoDataset, RasterDataset
-from .utils import BoundingBox, download_url, extract_archive, stack_samples
-
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
+from .utils import BoundingBox, download_url, extract_archive
 
 
 class Chesapeake(RasterDataset, abc.ABC):
@@ -537,294 +526,3 @@ class ChesapeakeCVPR(GeoDataset):
     def _extract(self) -> None:
         """Extract the dataset."""
         extract_archive(os.path.join(self.root, self.filename))
-
-
-class ChesapeakeCVPRDataModule(LightningDataModule):
-    """LightningDataModule implementation for the Chesapeake CVPR Land Cover dataset.
-
-    Uses the random splits defined per state to partition tiles into train, val,
-    and test sets.
-    """
-
-    def __init__(
-        self,
-        root_dir: str,
-        train_splits: List[str],
-        val_splits: List[str],
-        test_splits: List[str],
-        patches_per_tile: int = 200,
-        patch_size: int = 256,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        class_set: int = 7,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for Chesapeake CVPR based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the ChesapeakeCVPR Dataset
-                classes
-            train_splits: The splits used to train the model, e.g. ["ny-train"]
-            val_splits: The splits used to validate the model, e.g. ["ny-val"]
-            test_splits: The splits used to test the model, e.g. ["ny-test"]
-            patches_per_tile: The number of patches per tile to sample
-            patch_size: The size of each patch in pixels (test patches will be 1.5 times
-                this size)
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            class_set: The high-resolution land cover class set to use - 5 or 7
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        for state in train_splits + val_splits + test_splits:
-            assert state in ChesapeakeCVPR.splits
-        assert class_set in [5, 7]
-
-        self.root_dir = root_dir
-        self.train_splits = train_splits
-        self.val_splits = val_splits
-        self.test_splits = test_splits
-        self.layers = ["naip-new", "lc"]
-        self.patches_per_tile = patches_per_tile
-        self.patch_size = patch_size
-        # This is a rough estimate of how large of a patch we will need to sample in
-        # EPSG:3857 in order to guarantee a large enough patch in the local CRS.
-        self.original_patch_size = int(patch_size * 2.0)
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.class_set = class_set
-
-    def pad_to(
-        self, size: int = 512, image_value: int = 0, mask_value: int = 0
-    ) -> Callable[[Dict[str, Tensor]], Dict[str, Tensor]]:
-        """Returns a function to perform a padding transform on a single sample.
-
-        Args:
-            size: output image size
-            image_value: value to pad image with
-            mask_value: value to pad mask with
-
-        Returns:
-            function to perform padding
-        """
-
-        def pad_inner(sample: Dict[str, Tensor]) -> Dict[str, Tensor]:
-            _, height, width = sample["image"].shape
-            assert height <= size and width <= size
-
-            height_pad = size - height
-            width_pad = size - width
-
-            # See https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
-            # for a description of the format of the padding tuple
-            sample["image"] = F.pad(
-                sample["image"],
-                (0, width_pad, 0, height_pad),
-                mode="constant",
-                value=image_value,
-            )
-            sample["mask"] = F.pad(
-                sample["mask"],
-                (0, width_pad, 0, height_pad),
-                mode="constant",
-                value=mask_value,
-            )
-            return sample
-
-        return pad_inner
-
-    def center_crop(
-        self, size: int = 512
-    ) -> Callable[[Dict[str, Tensor]], Dict[str, Tensor]]:
-        """Returns a function to perform a center crop transform on a single sample.
-
-        Args:
-            size: output image size
-
-        Returns:
-            function to perform center crop
-        """
-
-        def center_crop_inner(sample: Dict[str, Tensor]) -> Dict[str, Tensor]:
-            _, height, width = sample["image"].shape
-
-            y1 = (height - size) // 2
-            x1 = (width - size) // 2
-            sample["image"] = sample["image"][:, y1 : y1 + size, x1 : x1 + size]
-            sample["mask"] = sample["mask"][:, y1 : y1 + size, x1 : x1 + size]
-
-            return sample
-
-        return center_crop_inner
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Preprocesses a single sample.
-
-        Args:
-            sample: sample dictionary containing image and mask
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"] / 255.0
-        sample["mask"] = sample["mask"]
-        sample["mask"] = sample["mask"].squeeze()
-
-        if self.class_set == 5:
-            sample["mask"][sample["mask"] == 5] = 4
-            sample["mask"][sample["mask"] == 6] = 4
-
-        sample["image"] = sample["image"].float()
-        sample["mask"] = sample["mask"].long()
-
-        return sample
-
-    def nodata_check(
-        self, size: int = 512
-    ) -> Callable[[Dict[str, Tensor]], Dict[str, Tensor]]:
-        """Returns a function to check for nodata or mis-sized input.
-
-        Args:
-            size: output image size
-
-        Returns:
-            function to check for nodata values
-        """
-
-        def nodata_check_inner(sample: Dict[str, Tensor]) -> Dict[str, Tensor]:
-            num_channels, height, width = sample["image"].shape
-
-            if height < size or width < size:
-                sample["image"] = torch.zeros(  # type: ignore[attr-defined]
-                    (num_channels, size, size)
-                )
-                sample["mask"] = torch.zeros((size, size))  # type: ignore[attr-defined]
-
-            return sample
-
-        return nodata_check_inner
-
-    def prepare_data(self) -> None:
-        """Confirms that the dataset is downloaded on the local node.
-
-        This method is called once per node, while :func:`setup` is called once per GPU.
-        """
-        ChesapeakeCVPR(
-            self.root_dir,
-            splits=self.train_splits,
-            layers=self.layers,
-            transforms=None,
-            download=False,
-            checksum=False,
-        )
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Create the train/val/test splits based on the original Dataset objects.
-
-        The splits should be done here vs. in :func:`__init__` per the docs:
-        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
-
-        Args:
-            stage: stage to set up
-        """
-        train_transforms = Compose(
-            [
-                self.center_crop(self.patch_size),
-                self.nodata_check(self.patch_size),
-                self.preprocess,
-            ]
-        )
-        val_transforms = Compose(
-            [
-                self.center_crop(self.patch_size),
-                self.nodata_check(self.patch_size),
-                self.preprocess,
-            ]
-        )
-        test_transforms = Compose(
-            [
-                self.pad_to(self.original_patch_size, image_value=0, mask_value=0),
-                self.preprocess,
-            ]
-        )
-
-        self.train_dataset = ChesapeakeCVPR(
-            self.root_dir,
-            splits=self.train_splits,
-            layers=self.layers,
-            transforms=train_transforms,
-            download=False,
-            checksum=False,
-        )
-        self.val_dataset = ChesapeakeCVPR(
-            self.root_dir,
-            splits=self.val_splits,
-            layers=self.layers,
-            transforms=val_transforms,
-            download=False,
-            checksum=False,
-        )
-        self.test_dataset = ChesapeakeCVPR(
-            self.root_dir,
-            splits=self.test_splits,
-            layers=self.layers,
-            transforms=test_transforms,
-            download=False,
-            checksum=False,
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        sampler = RandomBatchGeoSampler(
-            self.train_dataset,
-            size=self.original_patch_size,
-            batch_size=self.batch_size,
-            length=self.patches_per_tile * len(self.train_dataset),
-        )
-        return DataLoader(
-            self.train_dataset,
-            batch_sampler=sampler,
-            num_workers=self.num_workers,
-            collate_fn=stack_samples,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        sampler = GridGeoSampler(
-            self.val_dataset,
-            size=self.original_patch_size,
-            stride=self.original_patch_size,
-        )
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            sampler=sampler,
-            num_workers=self.num_workers,
-            collate_fn=stack_samples,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        sampler = GridGeoSampler(
-            self.test_dataset,
-            size=self.original_patch_size,
-            stride=self.original_patch_size,
-        )
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            sampler=sampler,
-            num_workers=self.num_workers,
-            collate_fn=stack_samples,
-        )
diff --git a/torchgeo/datasets/cowc.py b/torchgeo/datasets/cowc.py
index 35bbdc54b..f16448824 100644
--- a/torchgeo/datasets/cowc.py
+++ b/torchgeo/datasets/cowc.py
@@ -6,22 +6,16 @@
 import abc
 import csv
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from PIL import Image
-from torch import Generator, Tensor  # type: ignore[attr-defined]
-from torch.utils.data import DataLoader, random_split
+from torch import Tensor
 
 from .geo import VisionDataset
 from .utils import check_integrity, download_and_extract_archive
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class COWC(VisionDataset, abc.ABC):
     """Abstract base class for the COWC dataset.
@@ -268,110 +262,3 @@ class COWCDetection(COWC):
 # 4. Unknown
 #
 # May need new abstract base class. Will need subclasses for different patch sizes.
-
-
-class COWCCountingDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the COWC Counting dataset."""
-
-    def __init__(
-        self,
-        root_dir: str,
-        seed: int,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for COWC Counting based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the COWCCounting Dataset class
-            seed: The seed value to use when doing the dataset random_split
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.seed = seed
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-    def custom_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image and target
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"] / 255.0  # scale to [0, 1]
-        sample["label"] = sample["label"].float()
-        return sample
-
-    def prepare_data(self) -> None:
-        """Initialize the main ``Dataset`` objects for use in :func:`setup`.
-
-        This includes optionally downloading the dataset. This is done once per node,
-        while :func:`setup` is done once per GPU.
-        """
-        COWCCounting(self.root_dir, download=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Create the train/val/test splits based on the original Dataset objects.
-
-        The splits should be done here vs. in :func:`__init__` per the docs:
-        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
-
-        Args:
-            stage: stage to set up
-        """
-        train_val_dataset = COWCCounting(
-            self.root_dir, split="train", transforms=self.custom_transform
-        )
-        self.test_dataset = COWCCounting(
-            self.root_dir, split="test", transforms=self.custom_transform
-        )
-        self.train_dataset, self.val_dataset = random_split(
-            train_val_dataset,
-            [len(train_val_dataset) - len(self.test_dataset), len(self.test_dataset)],
-            generator=Generator().manual_seed(self.seed),
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/cyclone.py b/torchgeo/datasets/cyclone.py
index 37c20ca42..0229f1f85 100644
--- a/torchgeo/datasets/cyclone.py
+++ b/torchgeo/datasets/cyclone.py
@@ -10,20 +10,13 @@ from typing import Any, Callable, Dict, Optional, cast
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from PIL import Image
-from sklearn.model_selection import GroupShuffleSplit
 from torch import Tensor
-from torch.utils.data import DataLoader, Subset
 
 from .geo import VisionDataset
 from .utils import check_integrity, download_radiant_mlhub_dataset, extract_archive
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class TropicalCycloneWindEstimation(VisionDataset):
     """Tropical Cyclone Wind Estimation Competition dataset.
@@ -254,157 +247,3 @@ class TropicalCycloneWindEstimation(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class CycloneDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the NASA Cyclone dataset.
-
-    Implements 80/20 train/val splits based on hurricane storm ids.
-    See :func:`setup` for more details.
-    """
-
-    def __init__(
-        self,
-        root_dir: str,
-        seed: int,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        api_key: Optional[str] = None,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for NASA Cyclone based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the
-                TropicalCycloneWindEstimation Datasets classes
-            seed: The seed value to use when doing the sklearn based GroupShuffleSplit
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            api_key: The RadiantEarth MLHub API key to use if the dataset needs to be
-                downloaded
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.seed = seed
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.api_key = api_key
-
-    def custom_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image and target
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"] / 255.0  # scale to [0,1]
-        sample["image"] = (
-            sample["image"].unsqueeze(0).repeat(3, 1, 1)
-        )  # convert to 3 channel
-        sample["label"] = torch.as_tensor(  # type: ignore[attr-defined]
-            sample["label"]
-        ).float()
-
-        return sample
-
-    def prepare_data(self) -> None:
-        """Initialize the main ``Dataset`` objects for use in :func:`setup`.
-
-        This includes optionally downloading the dataset. This is done once per node,
-        while :func:`setup` is done once per GPU.
-        """
-        TropicalCycloneWindEstimation(
-            self.root_dir,
-            split="train",
-            transforms=self.custom_transform,
-            download=self.api_key is not None,
-            api_key=self.api_key,
-        )
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Create the train/val/test splits based on the original Dataset objects.
-
-        The splits should be done here vs. in :func:`__init__` per the docs:
-        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
-
-        We split samples between train/val by the ``storm_id`` property. I.e. all
-        samples with the same ``storm_id`` value will be either in the train or the val
-        split. This is important to test one type of generalizability -- given a new
-        storm, can we predict its windspeed. The test set, however, contains *some*
-        storms from the training set (specifically, the latter parts of the storms) as
-        well as some novel storms.
-
-        Args:
-            stage: stage to set up
-        """
-        self.all_train_dataset = TropicalCycloneWindEstimation(
-            self.root_dir,
-            split="train",
-            transforms=self.custom_transform,
-            download=False,
-        )
-
-        self.all_test_dataset = TropicalCycloneWindEstimation(
-            self.root_dir,
-            split="test",
-            transforms=self.custom_transform,
-            download=False,
-        )
-
-        storm_ids = []
-        for item in self.all_train_dataset.collection:
-            storm_id = item["href"].split("/")[0].split("_")[-2]
-            storm_ids.append(storm_id)
-
-        train_indices, val_indices = next(
-            GroupShuffleSplit(test_size=0.2, n_splits=2, random_state=self.seed).split(
-                storm_ids, groups=storm_ids
-            )
-        )
-
-        self.train_dataset = Subset(self.all_train_dataset, train_indices)
-        self.val_dataset = Subset(self.all_train_dataset, val_indices)
-        self.test_dataset = Subset(
-            self.all_test_dataset, range(len(self.all_test_dataset))
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py
index bb10da22b..dbcf667ce 100644
--- a/torchgeo/datasets/etci2021.py
+++ b/torchgeo/datasets/etci2021.py
@@ -5,16 +5,13 @@
 
 import glob
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from PIL import Image
-from torch import Generator, Tensor  # type: ignore[attr-defined]
-from torch.utils.data import DataLoader, random_split
-from torchvision.transforms import Normalize
+from torch import Tensor
 
 from .geo import VisionDataset
 from .utils import download_and_extract_archive
@@ -320,140 +317,3 @@ class ETCI2021(VisionDataset):
         if suptitle is not None:
             plt.suptitle(suptitle)
         return fig
-
-
-class ETCI2021DataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the ETCI2021 dataset.
-
-    Splits the existing train split from the dataset into train/val with 80/20
-    proportions, then uses the existing val dataset as the test data.
-
-    .. versionadded:: 0.2
-    """
-
-    band_means = torch.tensor(  # type: ignore[attr-defined]
-        [0.52253931, 0.52253931, 0.52253931, 0.61221701, 0.61221701, 0.61221701, 0]
-    )
-
-    band_stds = torch.tensor(  # type: ignore[attr-defined]
-        [0.35221376, 0.35221376, 0.35221376, 0.37364622, 0.37364622, 0.37364622, 1]
-    )
-
-    def __init__(
-        self,
-        root_dir: str,
-        seed: int = 0,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for ETCI2021 based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the ETCI2021 Dataset classes
-            seed: The seed value to use when doing the dataset random_split
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.seed = seed
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-        self.norm = Normalize(self.band_means, self.band_stds)
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Notably, moves the given water mask to act as an input layer.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        image = sample["image"]
-        water_mask = sample["mask"][0].unsqueeze(0)
-        flood_mask = sample["mask"][1]
-        flood_mask = (flood_mask > 0).long()
-
-        sample["image"] = torch.cat(  # type: ignore[attr-defined]
-            [image, water_mask], dim=0
-        ).float()
-        sample["image"] /= 255.0
-        sample["image"] = self.norm(sample["image"])
-        sample["mask"] = flood_mask
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        ETCI2021(self.root_dir, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        train_val_dataset = ETCI2021(
-            self.root_dir, split="train", transforms=self.preprocess
-        )
-        self.test_dataset = ETCI2021(
-            self.root_dir, split="val", transforms=self.preprocess
-        )
-
-        size_train_val = len(train_val_dataset)
-        size_train = int(0.8 * size_train_val)
-        size_val = size_train_val - size_train
-
-        self.train_dataset, self.val_dataset = random_split(
-            train_val_dataset,
-            [size_train, size_val],
-            generator=Generator().manual_seed(self.seed),
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/eurosat.py b/torchgeo/datasets/eurosat.py
index 9ba06c1c6..e1e140720 100644
--- a/torchgeo/datasets/eurosat.py
+++ b/torchgeo/datasets/eurosat.py
@@ -4,15 +4,11 @@
 """EuroSAT dataset."""
 
 import os
-from typing import Any, Callable, Dict, Optional, cast
+from typing import Callable, Dict, Optional, cast
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
-import torch
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose, Normalize
 
 from .geo import VisionClassificationDataset
 from .utils import check_integrity, download_url, extract_archive, rasterio_loader
@@ -229,138 +225,3 @@ class EuroSAT(VisionClassificationDataset):
         if suptitle is not None:
             plt.suptitle(suptitle)
         return fig
-
-
-class EuroSATDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the EuroSAT dataset.
-
-    Uses the train/val/test splits from the dataset.
-
-    .. versionadded:: 0.2
-    """
-
-    band_means = torch.tensor(  # type: ignore[attr-defined]
-        [
-            1354.40546513,
-            1118.24399958,
-            1042.92983953,
-            947.62620298,
-            1199.47283961,
-            1999.79090914,
-            2369.22292565,
-            2296.82608323,
-            732.08340178,
-            12.11327804,
-            1819.01027855,
-            1118.92391149,
-            2594.14080798,
-        ]
-    )
-
-    band_stds = torch.tensor(  # type: ignore[attr-defined]
-        [
-            245.71762908,
-            333.00778264,
-            395.09249139,
-            593.75055589,
-            566.4170017,
-            861.18399006,
-            1086.63139075,
-            1117.98170791,
-            404.91978886,
-            4.77584468,
-            1002.58768311,
-            761.30323499,
-            1231.58581042,
-        ]
-    )
-
-    def __init__(
-        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
-    ) -> None:
-        """Initialize a LightningDataModule for EuroSAT based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the EuroSAT Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-        self.norm = Normalize(self.band_means, self.band_stds)
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] = self.norm(sample["image"])
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        EuroSAT(self.root_dir)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        self.train_dataset = EuroSAT(self.root_dir, "train", transforms=transforms)
-        self.val_dataset = EuroSAT(self.root_dir, "val", transforms=transforms)
-        self.test_dataset = EuroSAT(self.root_dir, "test", transforms=transforms)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/fair1m.py b/torchgeo/datasets/fair1m.py
index e75f27be3..c8e2184f2 100644
--- a/torchgeo/datasets/fair1m.py
+++ b/torchgeo/datasets/fair1m.py
@@ -11,33 +11,12 @@ from xml.etree import ElementTree
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
-from ..datasets.utils import check_integrity, dataset_split, extract_archive
 from .geo import VisionDataset
-
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
-
-def collate_fn(batch: List[Dict[str, Tensor]]) -> Dict[str, Any]:
-    """Custom object detection collate fn to handle variable number of boxes.
-
-    Args:
-        batch: list of sample dicts return by dataset
-    Returns:
-        batch dict output
-    """
-    output: Dict[str, Any] = {}
-    output["image"] = torch.stack([sample["image"] for sample in batch])
-    output["boxes"] = [sample["boxes"] for sample in batch]
-    return output
+from .utils import check_integrity, extract_archive
 
 
 def parse_pascal_voc(path: str) -> Dict[str, Any]:
@@ -350,102 +329,3 @@ class FAIR1M(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class FAIR1MDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the FAIR1M dataset."""
-
-    def __init__(
-        self,
-        root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        val_split_pct: float = 0.2,
-        test_split_pct: float = 0.2,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for FAIR1M based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the FAIR1M Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            val_split_pct: What percentage of the dataset to use as a validation set
-            test_split_pct: What percentage of the dataset to use as a test set
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_split_pct = val_split_pct
-        self.test_split_pct = test_split_pct
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        return sample
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        dataset = FAIR1M(self.root_dir, transforms=transforms)
-        self.train_dataset, self.val_dataset, self.test_dataset = dataset_split(
-            dataset, val_pct=self.val_split_pct, test_pct=self.test_split_pct
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-            collate_fn=collate_fn,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-            collate_fn=collate_fn,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-            collate_fn=collate_fn,
-        )
diff --git a/torchgeo/datasets/landcoverai.py b/torchgeo/datasets/landcoverai.py
index e579d668d..2fecb5d6e 100644
--- a/torchgeo/datasets/landcoverai.py
+++ b/torchgeo/datasets/landcoverai.py
@@ -6,24 +6,18 @@
 import hashlib
 import os
 from functools import lru_cache
-from typing import Any, Callable, Dict, Optional
+from typing import Callable, Dict, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from matplotlib.colors import ListedColormap
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
 
 from .geo import VisionDataset
 from .utils import check_integrity, download_and_extract_archive, working_dir
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class LandCoverAI(VisionDataset):
     r"""LandCover.ai dataset.
@@ -266,110 +260,3 @@ class LandCoverAI(VisionDataset):
         if suptitle is not None:
             plt.suptitle(suptitle)
         return fig
-
-
-class LandCoverAIDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the LandCover.ai dataset.
-
-    Uses the train/val/test splits from the dataset.
-    """
-
-    def __init__(
-        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
-    ) -> None:
-        """Initialize a LightningDataModule for LandCover.ai based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the Landcover.AI Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image and mask
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"] / 255.0
-
-        sample["image"] = sample["image"].float()
-        sample["mask"] = sample["mask"].float().unsqueeze(0) + 1
-
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        _ = LandCoverAI(self.root_dir, download=False, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        train_transforms = self.preprocess
-        val_test_transforms = self.preprocess
-
-        self.train_dataset = LandCoverAI(
-            self.root_dir, split="train", transforms=train_transforms
-        )
-
-        self.val_dataset = LandCoverAI(
-            self.root_dir, split="val", transforms=val_test_transforms
-        )
-
-        self.test_dataset = LandCoverAI(
-            self.root_dir, split="test", transforms=val_test_transforms
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/loveda.py b/torchgeo/datasets/loveda.py
index b3a0a52e8..30fe98adf 100644
--- a/torchgeo/datasets/loveda.py
+++ b/torchgeo/datasets/loveda.py
@@ -5,23 +5,17 @@
 
 import glob
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
 
 from .geo import VisionDataset
 from .utils import download_and_extract_archive
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class LoveDA(VisionDataset):
     """LoveDA dataset.
@@ -305,117 +299,3 @@ class LoveDA(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class LoveDADataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the LoveDA dataset.
-
-    Uses the train/val/test splits from the dataset.
-    """
-
-    def __init__(
-        self,
-        root_dir: str,
-        scene: List[str],
-        batch_size: int = 32,
-        num_workers: int = 0,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for LoveDA based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` argument to pass to LoveDA Dataset classes
-            scene: specify whether to load only 'urban', only 'rural' or both
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.scene = scene
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image and mask
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"] / 255.0
-
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        _ = LoveDA(self.root_dir, scene=self.scene, download=False, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        train_transforms = self.preprocess
-        val_test_transforms = self.preprocess
-
-        self.train_dataset = LoveDA(
-            self.root_dir, split="train", scene=self.scene, transforms=train_transforms
-        )
-
-        self.val_dataset = LoveDA(
-            self.root_dir, split="val", scene=self.scene, transforms=val_test_transforms
-        )
-
-        self.test_dataset = LoveDA(
-            self.root_dir,
-            split="test",
-            scene=self.scene,
-            transforms=val_test_transforms,
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/naip.py b/torchgeo/datasets/naip.py
index 02cfe1e33..b6b4bcece 100644
--- a/torchgeo/datasets/naip.py
+++ b/torchgeo/datasets/naip.py
@@ -3,20 +3,7 @@
 
 """National Agriculture Imagery Program (NAIP) dataset."""
 
-from typing import Any, Dict, Optional
-
-import pytorch_lightning as pl
-from torch.utils.data import DataLoader
-
-from ..samplers.batch import RandomBatchGeoSampler
-from ..samplers.single import GridGeoSampler
-from .chesapeake import Chesapeake13
 from .geo import RasterDataset
-from .utils import BoundingBox, stack_samples
-
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
 
 
 class NAIP(RasterDataset):
@@ -55,147 +42,3 @@ class NAIP(RasterDataset):
     # Plotting
     all_bands = ["R", "G", "B", "NIR"]
     rgb_bands = ["R", "G", "B"]
-
-
-class NAIPChesapeakeDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the NAIP and Chesapeake datasets.
-
-    Uses the train/val/test splits from the dataset.
-    """
-
-    # TODO: tune these hyperparams
-    length = 1000
-    stride = 128
-
-    def __init__(
-        self,
-        naip_root_dir: str,
-        chesapeake_root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        patch_size: int = 256,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for NAIP and Chesapeake based DataLoaders.
-
-        Args:
-            naip_root_dir: directory containing NAIP data
-            chesapeake_root_dir: directory containing Chesapeake data
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            patch_size: size of patches to sample
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.naip_root_dir = naip_root_dir
-        self.chesapeake_root_dir = chesapeake_root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.patch_size = patch_size
-
-    def naip_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the NAIP Dataset.
-
-        Args:
-            sample: NAIP image dictionary
-
-        Returns:
-            preprocessed NAIP data
-        """
-        sample["image"] = sample["image"] / 255.0
-        sample["image"] = sample["image"].float()
-        return sample
-
-    def chesapeake_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Chesapeake Dataset.
-
-        Args:
-            sample: Chesapeake mask dictionary
-
-        Returns:
-            preprocessed Chesapeake data
-        """
-        sample["mask"] = sample["mask"].long()[0]
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        Chesapeake13(self.chesapeake_root_dir, download=False, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: state to set up
-        """
-        # TODO: these transforms will be applied independently, this won't work if we
-        # add things like random horizontal flip
-        chesapeake = Chesapeake13(
-            self.chesapeake_root_dir, transforms=self.chesapeake_transform
-        )
-        naip = NAIP(
-            self.naip_root_dir,
-            chesapeake.crs,
-            chesapeake.res,
-            transforms=self.naip_transform,
-        )
-        self.dataset = chesapeake & naip
-
-        # TODO: figure out better train/val/test split
-        roi = self.dataset.bounds
-        midx = roi.minx + (roi.maxx - roi.minx) / 2
-        midy = roi.miny + (roi.maxy - roi.miny) / 2
-        train_roi = BoundingBox(roi.minx, midx, roi.miny, roi.maxy, roi.mint, roi.maxt)
-        val_roi = BoundingBox(midx, roi.maxx, roi.miny, midy, roi.mint, roi.maxt)
-        test_roi = BoundingBox(roi.minx, roi.maxx, midy, roi.maxy, roi.mint, roi.maxt)
-
-        self.train_sampler = RandomBatchGeoSampler(
-            naip, self.patch_size, self.batch_size, self.length, train_roi
-        )
-        self.val_sampler = GridGeoSampler(naip, self.patch_size, self.stride, val_roi)
-        self.test_sampler = GridGeoSampler(naip, self.patch_size, self.stride, test_roi)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.dataset,
-            batch_sampler=self.train_sampler,
-            num_workers=self.num_workers,
-            collate_fn=stack_samples,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.dataset,
-            batch_size=self.batch_size,
-            sampler=self.val_sampler,
-            num_workers=self.num_workers,
-            collate_fn=stack_samples,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.dataset,
-            batch_size=self.batch_size,
-            sampler=self.test_sampler,
-            num_workers=self.num_workers,
-            collate_fn=stack_samples,
-        )
diff --git a/torchgeo/datasets/nasa_marine_debris.py b/torchgeo/datasets/nasa_marine_debris.py
index bd239e658..2b5027519 100644
--- a/torchgeo/datasets/nasa_marine_debris.py
+++ b/torchgeo/datasets/nasa_marine_debris.py
@@ -4,39 +4,17 @@
 """NASA Marine Debris dataset."""
 
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import rasterio
 import torch
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 from torchvision.utils import draw_bounding_boxes
 
 from .geo import VisionDataset
-from .utils import dataset_split, download_radiant_mlhub_dataset, extract_archive
-
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
-
-def collate_fn(batch: List[Dict[str, Tensor]]) -> Dict[str, Any]:
-    """Custom object detection collate fn to handle variable boxes.
-
-    Args:
-        batch: list of sample dicts return by dataset
-
-    Returns:
-        batch dict output
-    """
-    output: Dict[str, Any] = {}
-    output["image"] = torch.stack([sample["image"] for sample in batch])
-    output["boxes"] = [sample["boxes"] for sample in batch]
-    return output
+from .utils import download_radiant_mlhub_dataset, extract_archive
 
 
 class NASAMarineDebris(VisionDataset):
@@ -279,109 +257,3 @@ class NASAMarineDebris(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class NASAMarineDebrisDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the NASA Marine Debris dataset."""
-
-    def __init__(
-        self,
-        root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        val_split_pct: float = 0.2,
-        test_split_pct: float = 0.2,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for NASA Marine Debris based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` argument to pass to the Dataset class
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            val_split_pct: What percentage of the dataset to use as a validation set
-            test_split_pct: What percentage of the dataset to use as a test set
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_split_pct = val_split_pct
-        self.test_split_pct = test_split_pct
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        NASAMarineDebris(self.root_dir, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        dataset = NASAMarineDebris(self.root_dir, transforms=transforms)
-        self.train_dataset, self.val_dataset, self.test_dataset = dataset_split(
-            dataset, val_pct=self.val_split_pct, test_pct=self.test_split_pct
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-            collate_fn=collate_fn,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-            collate_fn=collate_fn,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-            collate_fn=collate_fn,
-        )
diff --git a/torchgeo/datasets/oscd.py b/torchgeo/datasets/oscd.py
index c2f807b49..803d7405c 100644
--- a/torchgeo/datasets/oscd.py
+++ b/torchgeo/datasets/oscd.py
@@ -5,25 +5,23 @@
 
 import glob
 import os
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Callable, Dict, List, Optional, Sequence, Union
 
-import kornia.augmentation as K
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
-from einops import repeat
 from matplotlib.figure import Figure
 from numpy import ndarray as Array
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torch.utils.data._utils.collate import default_collate
-from torchvision.transforms import Compose, Normalize
 
-from ..datasets.utils import dataset_split, draw_semantic_segmentation_masks
 from .geo import VisionDataset
-from .utils import download_url, extract_archive, sort_sentinel2_bands
+from .utils import (
+    download_url,
+    draw_semantic_segmentation_masks,
+    extract_archive,
+    sort_sentinel2_bands,
+)
 
 
 class OSCD(VisionDataset):
@@ -317,202 +315,3 @@ class OSCD(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class OSCDDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the OSCD dataset.
-
-    Uses the train/test splits from the dataset and further splits
-    the train split into train/val splits.
-
-    .. versionadded: 0.2
-    """
-
-    band_means = torch.tensor(  # type: ignore[attr-defined]
-        [
-            1583.0741,
-            1374.3202,
-            1294.1616,
-            1325.6158,
-            1478.7408,
-            1933.0822,
-            2166.0608,
-            2076.4868,
-            2306.0652,
-            690.9814,
-            16.2360,
-            2080.3347,
-            1524.6930,
-        ]
-    )
-
-    band_stds = torch.tensor(  # type: ignore[attr-defined]
-        [
-            52.1937,
-            83.4168,
-            105.6966,
-            151.1401,
-            147.4615,
-            115.9289,
-            123.1974,
-            114.6483,
-            141.4530,
-            73.2758,
-            4.8368,
-            213.4821,
-            179.4793,
-        ]
-    )
-
-    def __init__(
-        self,
-        root_dir: str,
-        bands: str = "all",
-        train_batch_size: int = 32,
-        num_workers: int = 0,
-        val_split_pct: float = 0.2,
-        patch_size: Tuple[int, int] = (64, 64),
-        num_patches_per_tile: int = 32,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for OSCD based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the OSCD Dataset classes
-            bands: "rgb" or "all"
-            train_batch_size: The batch size used in the train DataLoader
-                (val_batch_size == test_batch_size == 1)
-            num_workers: The number of workers to use in all created DataLoaders
-            val_split_pct: What percentage of the dataset to use as a validation set
-            patch_size: Size of random patch from image and mask (height, width)
-            num_patches_per_tile: number of random patches per sample
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.bands = bands
-        self.train_batch_size = train_batch_size
-        self.num_workers = num_workers
-        self.val_split_pct = val_split_pct
-        self.patch_size = patch_size
-        self.num_patches_per_tile = num_patches_per_tile
-
-        if bands == "rgb":
-            self.band_means = self.band_means[[3, 2, 1], None, None]
-            self.band_stds = self.band_stds[[3, 2, 1], None, None]
-        else:
-            self.band_means = self.band_means[:, None, None]
-            self.band_stds = self.band_stds[:, None, None]
-
-        self.norm = Normalize(self.band_means, self.band_stds)
-        self.rcrop = K.AugmentationSequential(
-            K.RandomCrop(patch_size), data_keys=["input", "mask"], same_on_batch=True
-        )
-        self.padto = K.PadTo((1280, 1280))
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset."""
-        sample["image"] = sample["image"].float()
-        sample["mask"] = sample["mask"]
-        sample["image"] = self.norm(sample["image"])
-        sample["image"] = torch.flatten(  # type: ignore[attr-defined]
-            sample["image"], 0, 1
-        )
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        OSCD(self.root_dir, split="train", bands=self.bands, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-        """
-
-        def n_random_crop(sample: Dict[str, Any]) -> Dict[str, Any]:
-            images, masks = [], []
-            for i in range(self.num_patches_per_tile):
-                mask = repeat(sample["mask"], "h w -> t h w", t=2).float()
-                image, mask = self.rcrop(sample["image"], mask)
-                mask = mask.squeeze()[0]
-                images.append(image.squeeze())
-                masks.append(mask.long())
-            sample["image"] = torch.stack(images)
-            sample["mask"] = torch.stack(masks)
-            return sample
-
-        def pad_to(sample: Dict[str, Any]) -> Dict[str, Any]:
-            sample["image"] = self.padto(sample["image"])[0]
-            sample["mask"] = self.padto(sample["mask"].float()).long()[0, 0]
-            return sample
-
-        train_transforms = Compose([self.preprocess, n_random_crop])
-        # for testing and validation we pad all inputs to a fixed size to avoid issues
-        # with the upsampling paths in encoder-decoder architectures
-        test_transforms = Compose([self.preprocess, pad_to])
-
-        train_dataset = OSCD(
-            self.root_dir, split="train", bands=self.bands, transforms=train_transforms
-        )
-        if self.val_split_pct > 0.0:
-            val_dataset = OSCD(
-                self.root_dir,
-                split="train",
-                bands=self.bands,
-                transforms=test_transforms,
-            )
-            self.train_dataset, self.val_dataset, _ = dataset_split(
-                train_dataset, val_pct=self.val_split_pct, test_pct=0.0
-            )
-            self.val_dataset.dataset = val_dataset
-        else:
-            self.train_dataset = train_dataset  # type: ignore[assignment]
-            self.val_dataset = None  # type: ignore[assignment]
-
-        self.test_dataset = OSCD(
-            self.root_dir, split="test", bands=self.bands, transforms=test_transforms
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training."""
-
-        def collate_wrapper(batch: List[Dict[str, Any]]) -> Dict[str, Any]:
-            r_batch: Dict[str, Any] = default_collate(  # type: ignore[no-untyped-call]
-                batch
-            )
-            r_batch["image"] = torch.flatten(  # type: ignore[attr-defined]
-                r_batch["image"], 0, 1
-            )
-            r_batch["mask"] = torch.flatten(  # type: ignore[attr-defined]
-                r_batch["mask"], 0, 1
-            )
-            return r_batch
-
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.train_batch_size,
-            num_workers=self.num_workers,
-            collate_fn=collate_wrapper,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation."""
-        if self.val_split_pct == 0.0:
-            return self.train_dataloader()
-        else:
-            return DataLoader(
-                self.val_dataset,
-                batch_size=1,
-                num_workers=self.num_workers,
-                shuffle=False,
-            )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing."""
-        return DataLoader(
-            self.test_dataset, batch_size=1, num_workers=self.num_workers, shuffle=False
-        )
diff --git a/torchgeo/datasets/potsdam.py b/torchgeo/datasets/potsdam.py
index 40149d042..a54e4b18f 100644
--- a/torchgeo/datasets/potsdam.py
+++ b/torchgeo/datasets/potsdam.py
@@ -4,22 +4,23 @@
 """Potsdam dataset."""
 
 import os
-from typing import Any, Callable, Dict, Optional
+from typing import Callable, Dict, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import rasterio
 import torch
 from matplotlib.figure import Figure
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
-from ..datasets.utils import dataset_split, draw_semantic_segmentation_masks
 from .geo import VisionDataset
-from .utils import check_integrity, extract_archive, rgb_to_mask
+from .utils import (
+    check_integrity,
+    draw_semantic_segmentation_masks,
+    extract_archive,
+    rgb_to_mask,
+)
 
 
 class Potsdam2D(VisionDataset):
@@ -293,111 +294,3 @@ class Potsdam2D(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class Potsdam2DDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the Potsdam2D dataset.
-
-    Uses the train/test splits from the dataset.
-
-    .. versionadded: 0.2
-    """
-
-    def __init__(
-        self,
-        root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        val_split_pct: float = 0.2,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for Potsdam2D based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` argument to pass to the Potsdam2D Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            val_split_pct: What percentage of the dataset to use as a validation set
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_split_pct = val_split_pct
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        return sample
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        dataset = Potsdam2D(self.root_dir, "train", transforms=transforms)
-
-        if self.val_split_pct > 0.0:
-            self.train_dataset, self.val_dataset, _ = dataset_split(
-                dataset, val_pct=self.val_split_pct, test_pct=0.0
-            )
-        else:
-            self.train_dataset = dataset  # type: ignore[assignment]
-            self.val_dataset = None  # type: ignore[assignment]
-
-        self.test_dataset = Potsdam2D(self.root_dir, "test", transforms=transforms)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        if self.val_split_pct == 0.0:
-            return self.train_dataloader()
-        else:
-            return DataLoader(
-                self.val_dataset,
-                batch_size=self.batch_size,
-                num_workers=self.num_workers,
-                shuffle=False,
-            )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/resisc45.py b/torchgeo/datasets/resisc45.py
index 4b5c9560a..13117d546 100644
--- a/torchgeo/datasets/resisc45.py
+++ b/torchgeo/datasets/resisc45.py
@@ -4,23 +4,15 @@
 """RESISC45 dataset."""
 
 import os
-from typing import Any, Callable, Dict, Optional, cast
+from typing import Callable, Dict, Optional, cast
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
-import torch
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose, Normalize
 
 from .geo import VisionClassificationDataset
 from .utils import download_url, extract_archive
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class RESISC45(VisionClassificationDataset):
     """RESISC45 dataset.
@@ -288,109 +280,3 @@ class RESISC45(VisionClassificationDataset):
         if suptitle is not None:
             plt.suptitle(suptitle)
         return fig
-
-
-class RESISC45DataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the RESISC45 dataset.
-
-    Uses the train/val/test splits from the dataset.
-    """
-
-    band_means = torch.tensor(  # type: ignore[attr-defined]
-        [0.36801773, 0.38097873, 0.343583]
-    )
-
-    band_stds = torch.tensor(  # type: ignore[attr-defined]
-        [0.14540215, 0.13558227, 0.13203649]
-    )
-
-    def __init__(
-        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
-    ) -> None:
-        """Initialize a LightningDataModule for RESISC45 based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the RESISC45 Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-        self.norm = Normalize(self.band_means, self.band_stds)
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        sample["image"] = self.norm(sample["image"])
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        RESISC45(self.root_dir, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        self.train_dataset = RESISC45(self.root_dir, "train", transforms=transforms)
-        self.val_dataset = RESISC45(self.root_dir, "val", transforms=transforms)
-        self.test_dataset = RESISC45(self.root_dir, "test", transforms=transforms)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/sen12ms.py b/torchgeo/datasets/sen12ms.py
index f1cf8b2ad..8ff0e9a44 100644
--- a/torchgeo/datasets/sen12ms.py
+++ b/torchgeo/datasets/sen12ms.py
@@ -4,23 +4,16 @@
 """SEN12MS dataset."""
 
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import numpy as np
-import pytorch_lightning as pl
 import rasterio
 import torch
-from sklearn.model_selection import GroupShuffleSplit
 from torch import Tensor
-from torch.utils.data import DataLoader, Subset
 
 from .geo import VisionDataset
 from .utils import check_integrity
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class SEN12MS(VisionDataset):
     """SEN12MS dataset.
@@ -246,188 +239,3 @@ class SEN12MS(VisionDataset):
             if not check_integrity(filepath, md5 if self.checksum else None):
                 return False
         return True
-
-
-class SEN12MSDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the SEN12MS dataset.
-
-    Implements 80/20 geographic train/val splits and uses the test split from the
-    classification dataset definitions. See :func:`setup` for more details.
-
-    Uses the Simplified IGBP scheme defined in the 2020 Data Fusion Competition. See
-    https://arxiv.org/abs/2002.08254.
-    """
-
-    #: Mapping from the IGBP class definitions to the DFC2020, taken from the dataloader
-    #: here https://github.com/lukasliebel/dfc2020_baseline.
-    DFC2020_CLASS_MAPPING = torch.tensor(  # type: ignore[attr-defined]
-        [
-            0,  # maps 0s to 0
-            1,  # maps 1s to 1
-            1,  # maps 2s to 1
-            1,  # ...
-            1,
-            1,
-            2,
-            2,
-            3,
-            3,
-            4,
-            5,
-            6,
-            7,
-            6,
-            8,
-            9,
-            10,
-        ]
-    )
-
-    def __init__(
-        self,
-        root_dir: str,
-        seed: int,
-        band_set: str = "all",
-        batch_size: int = 64,
-        num_workers: int = 0,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for SEN12MS based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the SEN12MS Dataset classes
-            seed: The seed value to use when doing the sklearn based ShuffleSplit
-            band_set: The subset of S1/S2 bands to use. Options are: "all",
-                "s1", "s2-all", and "s2-reduced" where the "s2-reduced" set includes:
-                B2, B3, B4, B8, B11, and B12.
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        assert band_set in SEN12MS.BAND_SETS.keys()
-
-        self.root_dir = root_dir
-        self.seed = seed
-        self.band_set = band_set
-        self.band_indices = SEN12MS.BAND_SETS[band_set]
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-    def custom_transform(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image and mask
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-
-        if self.band_set == "all":
-            sample["image"][:2] = sample["image"][:2].clamp(-25, 0) / -25
-            sample["image"][2:] = sample["image"][2:].clamp(0, 10000) / 10000
-        elif self.band_set == "s1":
-            sample["image"][:2] = sample["image"][:2].clamp(-25, 0) / -25
-        else:
-            sample["image"][:] = sample["image"][:].clamp(0, 10000) / 10000
-
-        sample["mask"] = sample["mask"][0, :, :].long()
-        sample["mask"] = torch.take(  # type: ignore[attr-defined]
-            self.DFC2020_CLASS_MAPPING, sample["mask"]
-        )
-
-        return sample
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Create the train/val/test splits based on the original Dataset objects.
-
-        The splits should be done here vs. in :func:`__init__` per the docs:
-        https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html#setup.
-
-        We split samples between train and val geographically with proportions of 80/20.
-        This mimics the geographic test set split.
-
-        Args:
-            stage: stage to set up
-        """
-        season_to_int = {"winter": 0, "spring": 1000, "summer": 2000, "fall": 3000}
-
-        self.all_train_dataset = SEN12MS(
-            self.root_dir,
-            split="train",
-            bands=self.band_indices,
-            transforms=self.custom_transform,
-            checksum=False,
-        )
-
-        self.all_test_dataset = SEN12MS(
-            self.root_dir,
-            split="test",
-            bands=self.band_indices,
-            transforms=self.custom_transform,
-            checksum=False,
-        )
-
-        # A patch is a filename like: "ROIs{num}_{season}_s2_{scene_id}_p{patch_id}.tif"
-        # This patch will belong to the scene that is uniquelly identified by its
-        # (season, scene_id) tuple. Because the largest scene_id is 149, we can simply
-        # give each season a large number and representing a `unique_scene_id` as
-        # `season_id + scene_id`.
-        scenes = []
-        for scene_fn in self.all_train_dataset.ids:
-            parts = scene_fn.split("_")
-            season_id = season_to_int[parts[1]]
-            scene_id = int(parts[3])
-            scenes.append(season_id + scene_id)
-
-        train_indices, val_indices = next(
-            GroupShuffleSplit(test_size=0.2, n_splits=2, random_state=self.seed).split(
-                scenes, groups=scenes
-            )
-        )
-
-        self.train_dataset = Subset(self.all_train_dataset, train_indices)
-        self.val_dataset = Subset(self.all_train_dataset, val_indices)
-        self.test_dataset = Subset(
-            self.all_test_dataset, range(len(self.all_test_dataset))
-        )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/so2sat.py b/torchgeo/datasets/so2sat.py
index 606a73e62..aaee0ecd2 100644
--- a/torchgeo/datasets/so2sat.py
+++ b/torchgeo/datasets/so2sat.py
@@ -4,23 +4,16 @@
 """So2Sat dataset."""
 
 import os
-from typing import Any, Callable, Dict, Optional, cast
+from typing import Callable, Dict, Optional, cast
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
 from .geo import VisionDataset
 from .utils import check_integrity, percentile_normalization
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class So2Sat(VisionDataset):
     """So2Sat dataset.
@@ -250,211 +243,3 @@ class So2Sat(VisionDataset):
         if suptitle is not None:
             plt.suptitle(suptitle)
         return fig
-
-
-class So2SatDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the So2Sat dataset.
-
-    Uses the train/val/test splits from the dataset.
-    """
-
-    band_means = torch.tensor(  # type: ignore[attr-defined]
-        [
-            -3.591224256609313e-05,
-            -7.658561276843396e-06,
-            5.9373857475971184e-05,
-            2.5166231537121083e-05,
-            0.04420110659759328,
-            0.25761027084996196,
-            0.0007556743372573258,
-            0.0013503466830024448,
-            0.12375696117681859,
-            0.1092774636368323,
-            0.1010855203267882,
-            0.1142398616114001,
-            0.1592656692023089,
-            0.18147236008771792,
-            0.1745740312291377,
-            0.19501607349635292,
-            0.15428468872076637,
-            0.10905050699570007,
-        ]
-    ).reshape(18, 1, 1)
-
-    band_stds = torch.tensor(  # type: ignore[attr-defined]
-        [
-            0.17555201137417686,
-            0.17556463274968204,
-            0.45998793417834255,
-            0.455988755730148,
-            2.8559909213125763,
-            8.324800606439833,
-            2.4498757382563103,
-            1.4647352984509094,
-            0.03958795985905458,
-            0.047778262752410296,
-            0.06636616706371974,
-            0.06358874912497474,
-            0.07744387147984592,
-            0.09101635085921553,
-            0.09218466562387101,
-            0.10164581233948201,
-            0.09991773043519253,
-            0.08780632509122865,
-        ]
-    ).reshape(18, 1, 1)
-
-    # this reorders the bands to put S2 RGB first, then remainder of S2, then S1
-    reindex_to_rgb_first = [
-        10,
-        9,
-        8,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        # 0,
-        # 1,
-        # 2,
-        # 3,
-        # 4,
-        # 5,
-        # 6,
-        # 7,
-    ]
-
-    def __init__(
-        self,
-        root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        bands: str = "rgb",
-        unsupervised_mode: bool = False,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for So2Sat based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the So2Sat Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            bands: Either "rgb" or "s2"
-            unsupervised_mode: Makes the train dataloader return imagery from the train,
-                val, and test sets
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.bands = bands
-        self.unsupervised_mode = unsupervised_mode
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image
-
-        Returns:
-            preprocessed sample
-        """
-        # sample["image"] = (sample["image"] - self.band_means) / self.band_stds
-        sample["image"] = sample["image"].float()
-        sample["image"] = sample["image"][self.reindex_to_rgb_first, :, :]
-
-        if self.bands == "rgb":
-            sample["image"] = sample["image"][:3, :, :]
-
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        So2Sat(self.root_dir, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        train_transforms = Compose([self.preprocess])
-        val_test_transforms = self.preprocess
-
-        if not self.unsupervised_mode:
-
-            self.train_dataset = So2Sat(
-                self.root_dir, split="train", transforms=train_transforms
-            )
-
-            self.val_dataset = So2Sat(
-                self.root_dir, split="validation", transforms=val_test_transforms
-            )
-
-            self.test_dataset = So2Sat(
-                self.root_dir, split="test", transforms=val_test_transforms
-            )
-
-        else:
-
-            temp_train = So2Sat(
-                self.root_dir, split="train", transforms=train_transforms
-            )
-
-            self.val_dataset = So2Sat(
-                self.root_dir, split="validation", transforms=train_transforms
-            )
-
-            self.test_dataset = So2Sat(
-                self.root_dir, split="test", transforms=train_transforms
-            )
-
-            self.train_dataset = cast(
-                So2Sat, temp_train + self.val_dataset + self.test_dataset
-            )
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/spacenet.py b/torchgeo/datasets/spacenet.py
index 41cb1f2ff..38ac5c6a1 100644
--- a/torchgeo/datasets/spacenet.py
+++ b/torchgeo/datasets/spacenet.py
@@ -24,8 +24,8 @@ from rasterio.features import rasterize
 from rasterio.transform import Affine
 from torch import Tensor
 
-from torchgeo.datasets.geo import VisionDataset
-from torchgeo.datasets.utils import (
+from .geo import VisionDataset
+from .utils import (
     check_integrity,
     download_radiant_mlhub_collection,
     extract_archive,
diff --git a/torchgeo/datasets/ucmerced.py b/torchgeo/datasets/ucmerced.py
index 431b526b7..21b09e32a 100644
--- a/torchgeo/datasets/ucmerced.py
+++ b/torchgeo/datasets/ucmerced.py
@@ -3,24 +3,15 @@
 
 """UC Merced dataset."""
 import os
-from typing import Any, Callable, Dict, Optional, cast
+from typing import Callable, Dict, Optional, cast
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
-import torch
-import torchvision
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose, Normalize
 
 from .geo import VisionClassificationDataset
 from .utils import check_integrity, download_url, extract_archive
 
-# https://github.com/pytorch/pytorch/issues/60979
-# https://github.com/pytorch/pytorch/pull/61045
-DataLoader.__module__ = "torch.utils.data"
-
 
 class UCMerced(VisionClassificationDataset):
     """UC Merced dataset.
@@ -251,110 +242,3 @@ class UCMerced(VisionClassificationDataset):
         if suptitle is not None:
             plt.suptitle(suptitle)
         return fig
-
-
-class UCMercedDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the UC Merced dataset.
-
-    Uses random train/val/test splits.
-    """
-
-    band_means = torch.tensor([0, 0, 0])  # type: ignore[attr-defined]
-
-    band_stds = torch.tensor([1, 1, 1])  # type: ignore[attr-defined]
-
-    def __init__(
-        self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any
-    ) -> None:
-        """Initialize a LightningDataModule for UCMerced based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the UCMerced Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-
-        self.norm = Normalize(self.band_means, self.band_stds)
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: dictionary containing image
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        c, h, w = sample["image"].shape
-        if h != 256 or w != 256:
-            sample["image"] = torchvision.transforms.functional.resize(
-                sample["image"], size=(256, 256)
-            )
-        sample["image"] = self.norm(sample["image"])
-        return sample
-
-    def prepare_data(self) -> None:
-        """Make sure that the dataset is downloaded.
-
-        This method is only called once per run.
-        """
-        UCMerced(self.root_dir, download=False, checksum=False)
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        self.train_dataset = UCMerced(self.root_dir, "train", transforms=transforms)
-        self.val_dataset = UCMerced(self.root_dir, "val", transforms=transforms)
-        self.test_dataset = UCMerced(self.root_dir, "test", transforms=transforms)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        return DataLoader(
-            self.val_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/utils.py b/torchgeo/datasets/utils.py
index fceb6201a..9a68be30c 100644
--- a/torchgeo/datasets/utils.py
+++ b/torchgeo/datasets/utils.py
@@ -32,7 +32,6 @@ import numpy as np
 import rasterio
 import torch
 from torch import Tensor
-from torch.utils.data import Dataset, Subset, random_split
 from torchvision.datasets.utils import check_integrity, download_url
 from torchvision.utils import draw_segmentation_masks
 
@@ -48,7 +47,6 @@ __all__ = (
     "concat_samples",
     "merge_samples",
     "rasterio_loader",
-    "dataset_split",
     "sort_sentinel2_bands",
     "draw_semantic_segmentation_masks",
     "rgb_to_mask",
@@ -519,31 +517,6 @@ def rasterio_loader(path: str) -> np.ndarray:  # type: ignore[type-arg]
     return array
 
 
-def dataset_split(
-    dataset: Dataset[Any], val_pct: float, test_pct: Optional[float] = None
-) -> List[Subset[Any]]:
-    """Split a torch Dataset into train/val/test sets.
-
-    If ``test_pct`` is not set then only train and validation splits are returned.
-
-    Args:
-        dataset: dataset to be split into train/val or train/val/test subsets
-        val_pct: percentage of samples to be in validation set
-        test_pct: (Optional) percentage of samples to be in test set
-    Returns:
-        a list of the subset datasets. Either [train, val] or [train, val, test]
-    """
-    if test_pct is None:
-        val_length = int(len(dataset) * val_pct)
-        train_length = len(dataset) - val_length
-        return random_split(dataset, [train_length, val_length])
-    else:
-        val_length = int(len(dataset) * val_pct)
-        test_length = int(len(dataset) * test_pct)
-        train_length = len(dataset) - (val_length + test_length)
-        return random_split(dataset, [train_length, val_length, test_length])
-
-
 def sort_sentinel2_bands(x: str) -> str:
     """Sort Sentinel-2 band files in the correct order."""
     x = os.path.basename(x).split("_")[-1]
diff --git a/torchgeo/datasets/vaihingen.py b/torchgeo/datasets/vaihingen.py
index f95e8e72d..c7bb3e7f4 100644
--- a/torchgeo/datasets/vaihingen.py
+++ b/torchgeo/datasets/vaihingen.py
@@ -4,21 +4,22 @@
 """Vaihingen dataset."""
 
 import os
-from typing import Any, Callable, Dict, Optional
+from typing import Callable, Dict, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from matplotlib.figure import Figure
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
-from ..datasets.utils import dataset_split, draw_semantic_segmentation_masks
 from .geo import VisionDataset
-from .utils import check_integrity, extract_archive, rgb_to_mask
+from .utils import (
+    check_integrity,
+    draw_semantic_segmentation_masks,
+    extract_archive,
+    rgb_to_mask,
+)
 
 
 class Vaihingen2D(VisionDataset):
@@ -293,111 +294,3 @@ class Vaihingen2D(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class Vaihingen2DDataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the Vaihingen2D dataset.
-
-    Uses the train/test splits from the dataset.
-
-    .. versionadded: 0.2
-    """
-
-    def __init__(
-        self,
-        root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        val_split_pct: float = 0.2,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for Vaihingen2D based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` argument to pass to the Vaihingen Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            val_split_pct: What percentage of the dataset to use as a validation set
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_split_pct = val_split_pct
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        return sample
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        dataset = Vaihingen2D(self.root_dir, "train", transforms=transforms)
-
-        if self.val_split_pct > 0.0:
-            self.train_dataset, self.val_dataset, _ = dataset_split(
-                dataset, val_pct=self.val_split_pct, test_pct=0.0
-            )
-        else:
-            self.train_dataset = dataset  # type: ignore[assignment]
-            self.val_dataset = None  # type: ignore[assignment]
-
-        self.test_dataset = Vaihingen2D(self.root_dir, "test", transforms=transforms)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        if self.val_split_pct == 0.0:
-            return self.train_dataloader()
-        else:
-            return DataLoader(
-                self.val_dataset,
-                batch_size=self.batch_size,
-                num_workers=self.num_workers,
-                shuffle=False,
-            )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/datasets/xview.py b/torchgeo/datasets/xview.py
index c4e7774e0..b7ff4d460 100644
--- a/torchgeo/datasets/xview.py
+++ b/torchgeo/datasets/xview.py
@@ -5,20 +5,16 @@
 
 import glob
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Callable, Dict, List, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pytorch_lightning as pl
 import torch
 from PIL import Image
 from torch import Tensor
-from torch.utils.data import DataLoader
-from torchvision.transforms import Compose
 
-from ..datasets.utils import dataset_split, draw_semantic_segmentation_masks
 from .geo import VisionDataset
-from .utils import check_integrity, extract_archive
+from .utils import check_integrity, draw_semantic_segmentation_masks, extract_archive
 
 
 class XView2(VisionDataset):
@@ -282,111 +278,3 @@ class XView2(VisionDataset):
             plt.suptitle(suptitle)
 
         return fig
-
-
-class XView2DataModule(pl.LightningDataModule):
-    """LightningDataModule implementation for the xView2 dataset.
-
-    Uses the train/val/test splits from the dataset.
-
-    .. versionadded: 0.2
-    """
-
-    def __init__(
-        self,
-        root_dir: str,
-        batch_size: int = 64,
-        num_workers: int = 0,
-        val_split_pct: float = 0.2,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize a LightningDataModule for xView2 based DataLoaders.
-
-        Args:
-            root_dir: The ``root`` arugment to pass to the xView2 Dataset classes
-            batch_size: The batch size to use in all created DataLoaders
-            num_workers: The number of workers to use in all created DataLoaders
-            val_split_pct: What percentage of the dataset to use as a validation set
-        """
-        super().__init__()  # type: ignore[no-untyped-call]
-        self.root_dir = root_dir
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        self.val_split_pct = val_split_pct
-
-    def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]:
-        """Transform a single sample from the Dataset.
-
-        Args:
-            sample: input image dictionary
-
-        Returns:
-            preprocessed sample
-        """
-        sample["image"] = sample["image"].float()
-        sample["image"] /= 255.0
-        return sample
-
-    def setup(self, stage: Optional[str] = None) -> None:
-        """Initialize the main ``Dataset`` objects.
-
-        This method is called once per GPU per run.
-
-        Args:
-            stage: stage to set up
-        """
-        transforms = Compose([self.preprocess])
-
-        dataset = XView2(self.root_dir, "train", transforms=transforms)
-
-        if self.val_split_pct > 0.0:
-            self.train_dataset, self.val_dataset, _ = dataset_split(
-                dataset, val_pct=self.val_split_pct, test_pct=0.0
-            )
-        else:
-            self.train_dataset = dataset  # type: ignore[assignment]
-            self.val_dataset = None  # type: ignore[assignment]
-
-        self.test_dataset = XView2(self.root_dir, "test", transforms=transforms)
-
-    def train_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for training.
-
-        Returns:
-            training data loader
-        """
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=True,
-        )
-
-    def val_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for validation.
-
-        Returns:
-            validation data loader
-        """
-        if self.val_split_pct == 0.0:
-            return self.train_dataloader()
-        else:
-            return DataLoader(
-                self.val_dataset,
-                batch_size=self.batch_size,
-                num_workers=self.num_workers,
-                shuffle=False,
-            )
-
-    def test_dataloader(self) -> DataLoader[Any]:
-        """Return a DataLoader for testing.
-
-        Returns:
-            testing data loader
-        """
-        return DataLoader(
-            self.test_dataset,
-            batch_size=self.batch_size,
-            num_workers=self.num_workers,
-            shuffle=False,
-        )
diff --git a/torchgeo/samplers/batch.py b/torchgeo/samplers/batch.py
index 0dcd1a85a..3dc5dcc4b 100644
--- a/torchgeo/samplers/batch.py
+++ b/torchgeo/samplers/batch.py
@@ -10,9 +10,7 @@ from typing import Iterator, List, Optional, Tuple, Union
 from rtree.index import Index, Property
 from torch.utils.data import Sampler
 
-from torchgeo.datasets.geo import GeoDataset
-from torchgeo.datasets.utils import BoundingBox
-
+from ..datasets import BoundingBox, GeoDataset
 from .utils import _to_tuple, get_random_bounding_box
 
 # https://github.com/pytorch/pytorch/issues/60979
diff --git a/torchgeo/samplers/single.py b/torchgeo/samplers/single.py
index 1804d9a2d..d507f698e 100644
--- a/torchgeo/samplers/single.py
+++ b/torchgeo/samplers/single.py
@@ -10,9 +10,7 @@ from typing import Iterator, Optional, Tuple, Union
 from rtree.index import Index, Property
 from torch.utils.data import Sampler
 
-from torchgeo.datasets.geo import GeoDataset
-from torchgeo.datasets.utils import BoundingBox
-
+from ..datasets import BoundingBox, GeoDataset
 from .utils import _to_tuple, get_random_bounding_box
 
 # https://github.com/pytorch/pytorch/issues/60979
diff --git a/torchgeo/samplers/utils.py b/torchgeo/samplers/utils.py
index b8aecd85a..265859eeb 100644
--- a/torchgeo/samplers/utils.py
+++ b/torchgeo/samplers/utils.py
@@ -6,7 +6,7 @@
 import random
 from typing import Tuple, Union
 
-from torchgeo.datasets.utils import BoundingBox
+from ..datasets import BoundingBox
 
 
 def _to_tuple(value: Union[Tuple[float, float], float]) -> Tuple[float, float]: