зеркало из https://github.com/microsoft/torchgeo.git
extract_archive: support deflate64-compressed zip files (#282)
This commit is contained in:
Родитель
b7d35aab64
Коммит
c9520aa3f1
|
@ -9,7 +9,7 @@ experiment:
|
|||
learning_rate: 1e-3
|
||||
learning_rate_schedule_patience: 2
|
||||
in_channels: 4
|
||||
num_classes: 13
|
||||
num_classes: 14
|
||||
num_filters: 1
|
||||
ignore_zeros: False
|
||||
datamodule:
|
||||
|
|
|
@ -46,3 +46,4 @@ dependencies:
|
|||
- sphinx>=4
|
||||
- timm>=0.2.1
|
||||
- torchmetrics
|
||||
- zipfile-deflate64>=0.2
|
||||
|
|
|
@ -88,6 +88,9 @@ datasets =
|
|||
rarfile>=3
|
||||
# scipy 0.9+ required for scipy.io.wavfile.read
|
||||
scipy>=0.9
|
||||
# zipfile-deflate64 0.2+ required for extraction bugfix:
|
||||
# https://github.com/brianhelba/zipfile-deflate64/issues/19
|
||||
zipfile-deflate64>=0.2
|
||||
# Optional developer requirements
|
||||
style =
|
||||
# black 21+ required for Python 3.9 support
|
||||
|
|
Двоичные данные
tests/data/chesapeake/BAYWIDE/Baywide_13Class_20132014.tif
Двоичные данные
tests/data/chesapeake/BAYWIDE/Baywide_13Class_20132014.tif
Двоичный файл не отображается.
Двоичные данные
tests/data/chesapeake/BAYWIDE/Baywide_13Class_20132014.zip
Двоичные данные
tests/data/chesapeake/BAYWIDE/Baywide_13Class_20132014.zip
Двоичный файл не отображается.
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from rasterio.crs import CRS
|
||||
from rasterio.transform import Affine
|
||||
|
||||
SIZE = 128 # image width/height
|
||||
NUM_CLASSES = 14
|
||||
|
||||
np.random.seed(0)
|
||||
|
||||
filename = "Baywide_13Class_20132014"
|
||||
wkt = """
|
||||
PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",
|
||||
GEOGCS["NAD83",
|
||||
DATUM["North_American_Datum_1983",
|
||||
SPHEROID["GRS 1980",6378137,298.257222101004,
|
||||
AUTHORITY["EPSG","7019"]],
|
||||
AUTHORITY["EPSG","6269"]],
|
||||
PRIMEM["Greenwich",0],
|
||||
UNIT["degree",0.0174532925199433,
|
||||
AUTHORITY["EPSG","9122"]],
|
||||
AUTHORITY["EPSG","4269"]],
|
||||
PROJECTION["Albers_Conic_Equal_Area"],
|
||||
PARAMETER["latitude_of_center",23],
|
||||
PARAMETER["longitude_of_center",-96],
|
||||
PARAMETER["standard_parallel_1",29.5],
|
||||
PARAMETER["standard_parallel_2",45.5],
|
||||
PARAMETER["false_easting",0],
|
||||
PARAMETER["false_northing",0],
|
||||
UNIT["metre",1,
|
||||
AUTHORITY["EPSG","9001"]],
|
||||
AXIS["Easting",EAST],
|
||||
AXIS["Northing",NORTH]]
|
||||
"""
|
||||
cmap = {
|
||||
0: (0, 0, 0, 255),
|
||||
1: (0, 197, 255, 255),
|
||||
2: (0, 168, 132, 255),
|
||||
3: (38, 115, 0, 255),
|
||||
4: (76, 230, 0, 255),
|
||||
5: (163, 255, 115, 255),
|
||||
6: (255, 170, 0, 255),
|
||||
7: (255, 0, 0, 255),
|
||||
8: (156, 156, 156, 255),
|
||||
9: (0, 0, 0, 255),
|
||||
10: (115, 115, 0, 255),
|
||||
11: (230, 230, 0, 255),
|
||||
12: (255, 255, 115, 255),
|
||||
13: (197, 0, 255, 255),
|
||||
}
|
||||
|
||||
|
||||
meta = {
|
||||
"driver": "GTiff",
|
||||
"dtype": "uint8",
|
||||
"nodata": None,
|
||||
"width": SIZE,
|
||||
"height": SIZE,
|
||||
"count": 1,
|
||||
"crs": CRS.from_wkt(wkt),
|
||||
"transform": Affine(1.0, 0.0, 1303555.0000000005, 0.0, -1.0, 2535064.999999998),
|
||||
}
|
||||
|
||||
# Remove old data
|
||||
if os.path.exists(f"{filename}.tif"):
|
||||
os.remove(f"{filename}.tif")
|
||||
|
||||
# Create raster file
|
||||
with rasterio.open(f"{filename}.tif", "w", **meta) as f:
|
||||
data = np.random.randint(NUM_CLASSES, size=(SIZE, SIZE), dtype=np.uint8)
|
||||
f.write(data, 1)
|
||||
f.write_colormap(1, cmap)
|
||||
|
||||
# Create zip file
|
||||
# 7z required to create a zip file using the proprietary DEFLATE64 compression algorithm
|
||||
# https://github.com/brianhelba/zipfile-deflate64/issues/19#issuecomment-1006077294
|
||||
subprocess.run(
|
||||
["7z", "a", f"{filename}.zip", "-mm=DEFLATE64", f"{filename}.tif"],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Compute checksums
|
||||
with open(f"{filename}.zip", "rb") as f:
|
||||
md5 = hashlib.md5(f.read()).hexdigest()
|
||||
print(repr(md5))
|
|
@ -33,10 +33,11 @@ class TestChesapeake13:
|
|||
def dataset(
|
||||
self, monkeypatch: Generator[MonkeyPatch, None, None], tmp_path: Path
|
||||
) -> Chesapeake13:
|
||||
pytest.importorskip("zipfile_deflate64")
|
||||
monkeypatch.setattr( # type: ignore[attr-defined]
|
||||
torchgeo.datasets.chesapeake, "download_url", download_url
|
||||
)
|
||||
md5 = "9557b609e614a1f79dec6eb1bb3f3a06"
|
||||
md5 = "fe35a615b8e749b21270472aa98bb42c"
|
||||
monkeypatch.setattr(Chesapeake13, "md5", md5) # type: ignore[attr-defined]
|
||||
url = os.path.join(
|
||||
"tests", "data", "chesapeake", "BAYWIDE", "Baywide_13Class_20132014.zip"
|
||||
|
|
|
@ -41,7 +41,7 @@ def mock_missing_module(monkeypatch: Generator[MonkeyPatch, None, None]) -> None
|
|||
import_orig = builtins.__import__
|
||||
|
||||
def mocked_import(name: str, *args: Any, **kwargs: Any) -> Any:
|
||||
if name in ["rarfile", "radiant_mlhub"]:
|
||||
if name in ["radiant_mlhub", "rarfile", "zipfile_deflate64"]:
|
||||
raise ImportError()
|
||||
return import_orig(name, *args, **kwargs)
|
||||
|
||||
|
@ -93,11 +93,15 @@ def test_mock_missing_module(mock_missing_module: None) -> None:
|
|||
os.path.join("cowc_detection", "COWC_test_list_detection.txt.bz2"),
|
||||
os.path.join("vhr10", "NWPU VHR-10 dataset.rar"),
|
||||
os.path.join("landcoverai", "landcover.ai.v1.zip"),
|
||||
os.path.join("chesapeake", "BAYWIDE", "Baywide_13Class_20132014.zip"),
|
||||
os.path.join("sen12ms", "ROIs1158_spring_lc.tar.gz"),
|
||||
],
|
||||
)
|
||||
def test_extract_archive(src: str, tmp_path: Path) -> None:
|
||||
if src.endswith(".rar"):
|
||||
pytest.importorskip("rarfile", minversion="3")
|
||||
if src.startswith("chesapeake"):
|
||||
pytest.importorskip("zipfile_deflate64")
|
||||
extract_archive(os.path.join("tests", "data", src), str(tmp_path))
|
||||
|
||||
|
||||
|
@ -111,6 +115,11 @@ def test_missing_rarfile(mock_missing_module: None) -> None:
|
|||
)
|
||||
|
||||
|
||||
def test_missing_zipfile_deflate64(mock_missing_module: None) -> None:
|
||||
# Should fallback on Python builtin zipfile
|
||||
extract_archive(os.path.join("tests", "data", "landcoverai", "landcover.ai.v1.zip"))
|
||||
|
||||
|
||||
def test_unsupported_scheme() -> None:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="src file has unknown archival/compression scheme"
|
||||
|
|
|
@ -50,6 +50,9 @@ class TestSemanticSegmentationTask:
|
|||
name: str,
|
||||
classname: Type[LightningDataModule],
|
||||
) -> None:
|
||||
if name == "naipchesapeake":
|
||||
pytest.importorskip("zipfile_deflate64")
|
||||
|
||||
conf = OmegaConf.load(os.path.join("conf", "task_defaults", name + ".yaml"))
|
||||
conf_dict = OmegaConf.to_object(conf.experiment)
|
||||
conf_dict = cast(Dict[Any, Dict[Any, Any]], conf_dict)
|
||||
|
|
|
@ -233,7 +233,15 @@ class ChesapeakeDE(Chesapeake):
|
|||
|
||||
|
||||
class ChesapeakeMD(Chesapeake):
|
||||
"""This subset of the dataset contains data only for Maryland."""
|
||||
"""This subset of the dataset contains data only for Maryland.
|
||||
|
||||
.. note::
|
||||
|
||||
This dataset requires the following additional library to be installed:
|
||||
|
||||
* `zipfile-deflate64 <https://pypi.org/project/zipfile-deflate64/>`_ to extract
|
||||
the proprietary deflate64 compressed zip file.
|
||||
"""
|
||||
|
||||
base_folder = "MD"
|
||||
filename = "MD_STATEWIDE.tif"
|
||||
|
@ -242,7 +250,15 @@ class ChesapeakeMD(Chesapeake):
|
|||
|
||||
|
||||
class ChesapeakeNY(Chesapeake):
|
||||
"""This subset of the dataset contains data only for New York."""
|
||||
"""This subset of the dataset contains data only for New York.
|
||||
|
||||
.. note::
|
||||
|
||||
This dataset requires the following additional library to be installed:
|
||||
|
||||
* `zipfile-deflate64 <https://pypi.org/project/zipfile-deflate64/>`_ to extract
|
||||
the proprietary deflate64 compressed zip file.
|
||||
"""
|
||||
|
||||
base_folder = "NY"
|
||||
filename = "NY_STATEWIDE.tif"
|
||||
|
@ -260,7 +276,15 @@ class ChesapeakePA(Chesapeake):
|
|||
|
||||
|
||||
class ChesapeakeVA(Chesapeake):
|
||||
"""This subset of the dataset contains data only for Virginia."""
|
||||
"""This subset of the dataset contains data only for Virginia.
|
||||
|
||||
.. note::
|
||||
|
||||
This dataset requires the following additional library to be installed:
|
||||
|
||||
* `zipfile-deflate64 <https://pypi.org/project/zipfile-deflate64/>`_ to extract
|
||||
the proprietary deflate64 compressed zip file.
|
||||
"""
|
||||
|
||||
base_folder = "VA"
|
||||
filename = "CIC2014_VA_STATEWIDE.tif"
|
||||
|
|
|
@ -11,7 +11,6 @@ import lzma
|
|||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import (
|
||||
|
@ -77,6 +76,27 @@ class _rarfile:
|
|||
pass
|
||||
|
||||
|
||||
class _zipfile:
|
||||
class ZipFile:
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __enter__(self) -> Any:
|
||||
try:
|
||||
# Supports normal zip files, proprietary deflate64 compression algorithm
|
||||
import zipfile_deflate64 as zipfile
|
||||
except ImportError:
|
||||
# Only supports normal zip files
|
||||
# https://github.com/python/mypy/issues/1153
|
||||
import zipfile # type: ignore[no-redef]
|
||||
|
||||
return zipfile.ZipFile(*self.args, **self.kwargs)
|
||||
|
||||
def __exit__(self, exc_type: None, exc_value: None, traceback: None) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def extract_archive(src: str, dst: Optional[str] = None) -> None:
|
||||
"""Extract an archive.
|
||||
|
||||
|
@ -96,7 +116,7 @@ def extract_archive(src: str, dst: Optional[str] = None) -> None:
|
|||
(".tar", ".tar.gz", ".tar.bz2", ".tar.xz", ".tgz", ".tbz2", ".tbz", ".txz"),
|
||||
tarfile.open,
|
||||
),
|
||||
(".zip", zipfile.ZipFile),
|
||||
(".zip", _zipfile.ZipFile),
|
||||
]
|
||||
|
||||
for suffix, extractor in suffix_and_extractor:
|
||||
|
|
Загрузка…
Ссылка в новой задаче