зеркало из https://github.com/microsoft/torchgeo.git
Add script to compress dataset files (#1326)
* Add script to compress dataset files * Fix type annotation * Add script for L5 L1
This commit is contained in:
Родитель
dddd723d8d
Коммит
777b68376f
|
@ -0,0 +1,68 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import rasterio as rio
|
||||
from tqdm import tqdm
|
||||
from tqdm.contrib.concurrent import thread_map
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
# Can be same directory for in-place compression
|
||||
parser.add_argument("src_dir", help="directory to recursively search for files")
|
||||
parser.add_argument("dst_dir", help="directory to save compressed files in")
|
||||
parser.add_argument("--suffix", default=".tif", help="file suffix")
|
||||
# Could be min/max, 2%/98%, mean ± 2 * std, etc.
|
||||
parser.add_argument(
|
||||
"--min", nargs="+", type=float, required=True, help="minimum range"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max", nargs="+", type=float, required=True, help="maximum range"
|
||||
)
|
||||
parser.add_argument("--num-workers", type=int, default=10, help="number of threads")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.min = np.array(args.min)[:, np.newaxis, np.newaxis]
|
||||
args.max = np.array(args.max)[:, np.newaxis, np.newaxis]
|
||||
|
||||
def compress(src_path: str) -> None:
|
||||
"""Rescale, convert to uint8, and compress an image.
|
||||
|
||||
Args:
|
||||
src_path: Path to an image file.
|
||||
"""
|
||||
global args
|
||||
dst_path = src_path.replace(args.src_dir, args.dst_dir)
|
||||
dst_dir = os.path.dirname(dst_path)
|
||||
os.makedirs(dst_dir, exist_ok=True)
|
||||
with rio.open(src_path, "r") as src:
|
||||
x = src.read()
|
||||
|
||||
x = (x - args.min) / (args.max - args.min)
|
||||
|
||||
# 0-1 -> 0-255
|
||||
x = np.clip(x * 2**8, 0, 2**8).astype(np.uint8)
|
||||
|
||||
profile = src.profile
|
||||
profile["dtype"] = "uint8"
|
||||
profile["compress"] = "lzw"
|
||||
profile["predictor"] = 2
|
||||
with rio.open(dst_path, "w", **profile) as dst:
|
||||
for i, band in enumerate(dst.indexes):
|
||||
dst.write(x[i], band)
|
||||
|
||||
paths = glob.glob(
|
||||
os.path.join(args.src_dir, "**", f"*{args.suffix}"), recursive=True
|
||||
)
|
||||
|
||||
if args.num_workers > 0:
|
||||
thread_map(compress, paths, max_workers=args.num_workers)
|
||||
else:
|
||||
for path in tqdm(paths):
|
||||
compress(path)
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# User-specific parameters
|
||||
ROOT_DIR=data
|
||||
SRC_DIR="$ROOT_DIR/ssl4eo-l5-l1"
|
||||
DST_DIR="$ROOT_DIR/ssl4eo-l5-l1-v2"
|
||||
NUM_WORKERS=40
|
||||
|
||||
# Satellite-specific parameters
|
||||
# https://www.usgs.gov/faqs/how-do-i-use-scale-factor-landsat-level-2-science-products
|
||||
# https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LT05_C02_T1_TOA
|
||||
R_MIN=0
|
||||
R_MAX=0.4
|
||||
|
||||
# https://earthobservatory.nasa.gov/global-maps/MOD_LSTD_M
|
||||
T_MIN=$(echo "273.15 - 25" | bc -l)
|
||||
T_MAX=$(echo "273.15 + 45" | bc -l)
|
||||
|
||||
MIN=($R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $T_MIN $R_MIN)
|
||||
MAX=($R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $T_MAX $R_MAX)
|
||||
|
||||
# Generic parameters
|
||||
SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
|
||||
|
||||
time python3 "$SCRIPT_DIR/compress_dataset.py" \
|
||||
"$SRC_DIR" \
|
||||
"$DST_DIR" \
|
||||
--min ${MIN[@]} \
|
||||
--max ${MAX[@]} \
|
||||
--num-workers $NUM_WORKERS
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# User-specific parameters
|
||||
ROOT_DIR=data
|
||||
SRC_DIR="$ROOT_DIR/ssl4eo-l7-l1"
|
||||
DST_DIR="$ROOT_DIR/ssl4eo-l7-l1-v2"
|
||||
NUM_WORKERS=40
|
||||
|
||||
# Satellite-specific parameters
|
||||
# https://www.usgs.gov/faqs/how-do-i-use-scale-factor-landsat-level-2-science-products
|
||||
# https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LE07_C02_T1_TOA
|
||||
R_MIN=0
|
||||
R_MAX=0.4
|
||||
|
||||
# https://earthobservatory.nasa.gov/global-maps/MOD_LSTD_M
|
||||
T_MIN=$(echo "273.15 - 25" | bc -l)
|
||||
T_MAX=$(echo "273.15 + 45" | bc -l)
|
||||
|
||||
MIN=($R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $T_MIN $T_MIN $R_MIN $R_MIN)
|
||||
MAX=($R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $T_MAX $T_MAX $R_MAX $R_MAX)
|
||||
|
||||
# Generic parameters
|
||||
SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
|
||||
|
||||
time python3 "$SCRIPT_DIR/compress_dataset.py" \
|
||||
"$SRC_DIR" \
|
||||
"$DST_DIR" \
|
||||
--min ${MIN[@]} \
|
||||
--max ${MAX[@]} \
|
||||
--num-workers $NUM_WORKERS
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# User-specific parameters
|
||||
ROOT_DIR=data
|
||||
SRC_DIR="$ROOT_DIR/ssl4eo-l7-l2"
|
||||
DST_DIR="$ROOT_DIR/ssl4eo-l7-l2-v2"
|
||||
NUM_WORKERS=40
|
||||
|
||||
# Satellite-specific parameters
|
||||
# https://www.usgs.gov/faqs/how-do-i-use-scale-factor-landsat-level-2-science-products
|
||||
# https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LE07_C02_T1_L2
|
||||
R_MIN=$(echo "(0 + 0.2) / 0.0000275" | bc -l)
|
||||
R_MAX=$(echo "(0.3 + 0.2) / 0.0000275" | bc -l)
|
||||
|
||||
MIN=$R_MIN
|
||||
MAX=$R_MAX
|
||||
|
||||
# Generic parameters
|
||||
SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
|
||||
|
||||
time python3 "$SCRIPT_DIR/compress_dataset.py" \
|
||||
"$SRC_DIR" \
|
||||
"$DST_DIR" \
|
||||
--min ${MIN[@]} \
|
||||
--max ${MAX[@]} \
|
||||
--num-workers $NUM_WORKERS
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# User-specific parameters
|
||||
ROOT_DIR=data
|
||||
SRC_DIR="$ROOT_DIR/ssl4eo-l8-l1"
|
||||
DST_DIR="$ROOT_DIR/ssl4eo-l8-l1-v2"
|
||||
NUM_WORKERS=40
|
||||
|
||||
# Satellite-specific parameters
|
||||
# https://www.usgs.gov/faqs/how-do-i-use-scale-factor-landsat-level-2-science-products
|
||||
# https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C02_T1_TOA
|
||||
R_MIN=0
|
||||
R_MAX=0.4
|
||||
|
||||
# https://earthobservatory.nasa.gov/global-maps/MOD_LSTD_M
|
||||
T_MIN=$(echo "273.15 - 25" | bc -l)
|
||||
T_MAX=$(echo "273.15 + 45" | bc -l)
|
||||
|
||||
MIN=($R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $R_MIN $T_MIN $T_MIN)
|
||||
MAX=($R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $R_MAX $T_MAX $T_MAX)
|
||||
|
||||
# Generic parameters
|
||||
SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
|
||||
|
||||
time python3 "$SCRIPT_DIR/compress_dataset.py" \
|
||||
"$SRC_DIR" \
|
||||
"$DST_DIR" \
|
||||
--min ${MIN[@]} \
|
||||
--max ${MAX[@]} \
|
||||
--num-workers $NUM_WORKERS
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# User-specific parameters
|
||||
ROOT_DIR=data
|
||||
SRC_DIR="$ROOT_DIR/ssl4eo-l8-l2"
|
||||
DST_DIR="$ROOT_DIR/ssl4eo-l8-l2-v2"
|
||||
NUM_WORKERS=40
|
||||
|
||||
# Satellite-specific parameters
|
||||
# https://www.usgs.gov/faqs/how-do-i-use-scale-factor-landsat-level-2-science-products
|
||||
# https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C02_T1_L2
|
||||
R_MIN=$(echo "(0 + 0.2) / 0.0000275" | bc -l)
|
||||
R_MAX=$(echo "(0.3 + 0.2) / 0.0000275" | bc -l)
|
||||
|
||||
MIN=$R_MIN
|
||||
MAX=$R_MAX
|
||||
|
||||
# Generic parameters
|
||||
SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
|
||||
|
||||
time python3 "$SCRIPT_DIR/compress_dataset.py" \
|
||||
"$SRC_DIR" \
|
||||
"$DST_DIR" \
|
||||
--min ${MIN[@]} \
|
||||
--max ${MAX[@]} \
|
||||
--num-workers $NUM_WORKERS
|
Загрузка…
Ссылка в новой задаче