зеркало из https://github.com/microsoft/torchgeo.git
SSL4EO: Add script to delete excess files (#1425)
* SSL4EO: Add script to delete excess files * Placate black
This commit is contained in:
Родитель
b4758a732a
Коммит
6c09cace3a
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from tqdm import tqdm
|
||||
from tqdm.contrib.concurrent import thread_map
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("root", help="directory to search for scenes")
|
||||
parser.add_argument("--num-workers", type=int, default=10, help="number of threads")
|
||||
parser.add_argument(
|
||||
"--length", type=int, default=250000, help="number of scenes to keep"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
paths = sorted(glob.glob(os.path.join(args.root, "*")))
|
||||
paths = paths[args.length :]
|
||||
|
||||
if args.num_workers > 0:
|
||||
thread_map(shutil.rmtree, paths, max_workers=args.num_workers)
|
||||
else:
|
||||
for path in tqdm(paths):
|
||||
shutil.rmtree(path)
|
|
@ -52,6 +52,12 @@ For each TOA and SR product, we want to create a parallel corpus. This can be do
|
|||
$ bash delete_mismatch.sh
|
||||
```
|
||||
|
||||
To chop this down to 250K locations, you can then run:
|
||||
|
||||
```console
|
||||
$ bash delete_excess.sh
|
||||
```
|
||||
|
||||
You may want to modify `ROOT_DIR`.
|
||||
|
||||
## Compression
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# User-specific parameters
|
||||
ROOT_DIR=data
|
||||
L5_L1="$ROOT_DIR/ssl4eo_l_tm_toa/imgs"
|
||||
L7_L1="$ROOT_DIR/ssl4eo_l_etm_toa/imgs"
|
||||
L7_L2="$ROOT_DIR/ssl4eo_l_etm_sr/imgs"
|
||||
L8_L1="$ROOT_DIR/ssl4eo_l_oli_tirs_toa/imgs"
|
||||
L8_L2="$ROOT_DIR/ssl4eo_l_oli_sr/imgs"
|
||||
NUM_WORKERS=10
|
||||
LENGTH=250000
|
||||
|
||||
# Generic parameters
|
||||
SCRIPT_DIR=$(cd $(dirname $(dirname "${BASH_SOURCE[0]}")) && pwd)
|
||||
|
||||
time python3 "$SCRIPT_DIR/delete_excess.py" "$L5_L1" --num-workers $NUM_WORKERS --length $LENGTH
|
||||
time python3 "$SCRIPT_DIR/delete_excess.py" "$L7_L1" --num-workers $NUM_WORKERS --length $LENGTH
|
||||
time python3 "$SCRIPT_DIR/delete_excess.py" "$L7_L2" --num-workers $NUM_WORKERS --length $LENGTH
|
||||
time python3 "$SCRIPT_DIR/delete_excess.py" "$L8_L1" --num-workers $NUM_WORKERS --length $LENGTH
|
||||
time python3 "$SCRIPT_DIR/delete_excess.py" "$L8_L2" --num-workers $NUM_WORKERS --length $LENGTH
|
Загрузка…
Ссылка в новой задаче