зеркало из https://github.com/microsoft/torchgeo.git
SSL4EO: Add script to delete excess files (#1425)
* SSL4EO: Add script to delete excess files * Placate black
This commit is contained in:
Родитель
b4758a732a
Коммит
6c09cace3a
|
@ -0,0 +1,30 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
from tqdm.contrib.concurrent import thread_map
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("root", help="directory to search for scenes")
|
||||||
|
parser.add_argument("--num-workers", type=int, default=10, help="number of threads")
|
||||||
|
parser.add_argument(
|
||||||
|
"--length", type=int, default=250000, help="number of scenes to keep"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
paths = sorted(glob.glob(os.path.join(args.root, "*")))
|
||||||
|
paths = paths[args.length :]
|
||||||
|
|
||||||
|
if args.num_workers > 0:
|
||||||
|
thread_map(shutil.rmtree, paths, max_workers=args.num_workers)
|
||||||
|
else:
|
||||||
|
for path in tqdm(paths):
|
||||||
|
shutil.rmtree(path)
|
|
@ -52,6 +52,12 @@ For each TOA and SR product, we want to create a parallel corpus. This can be do
|
||||||
$ bash delete_mismatch.sh
|
$ bash delete_mismatch.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To chop this down to 250K locations, you can then run:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ bash delete_excess.sh
|
||||||
|
```
|
||||||
|
|
||||||
You may want to modify `ROOT_DIR`.
|
You may want to modify `ROOT_DIR`.
|
||||||
|
|
||||||
## Compression
|
## Compression
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# User-specific parameters
|
||||||
|
ROOT_DIR=data
|
||||||
|
L5_L1="$ROOT_DIR/ssl4eo_l_tm_toa/imgs"
|
||||||
|
L7_L1="$ROOT_DIR/ssl4eo_l_etm_toa/imgs"
|
||||||
|
L7_L2="$ROOT_DIR/ssl4eo_l_etm_sr/imgs"
|
||||||
|
L8_L1="$ROOT_DIR/ssl4eo_l_oli_tirs_toa/imgs"
|
||||||
|
L8_L2="$ROOT_DIR/ssl4eo_l_oli_sr/imgs"
|
||||||
|
NUM_WORKERS=10
|
||||||
|
LENGTH=250000
|
||||||
|
|
||||||
|
# Generic parameters
|
||||||
|
SCRIPT_DIR=$(cd $(dirname $(dirname "${BASH_SOURCE[0]}")) && pwd)
|
||||||
|
|
||||||
|
time python3 "$SCRIPT_DIR/delete_excess.py" "$L5_L1" --num-workers $NUM_WORKERS --length $LENGTH
|
||||||
|
time python3 "$SCRIPT_DIR/delete_excess.py" "$L7_L1" --num-workers $NUM_WORKERS --length $LENGTH
|
||||||
|
time python3 "$SCRIPT_DIR/delete_excess.py" "$L7_L2" --num-workers $NUM_WORKERS --length $LENGTH
|
||||||
|
time python3 "$SCRIPT_DIR/delete_excess.py" "$L8_L1" --num-workers $NUM_WORKERS --length $LENGTH
|
||||||
|
time python3 "$SCRIPT_DIR/delete_excess.py" "$L8_L2" --num-workers $NUM_WORKERS --length $LENGTH
|
Загрузка…
Ссылка в новой задаче