first version of the pdns download script - not tested yet as the data
is not there
This commit is contained in:
Родитель
a0d3f95a28
Коммит
790375fa8e
|
@ -0,0 +1,109 @@
|
||||||
|
#!/usr/bin/bash
|
||||||
|
|
||||||
|
# ***** Datasets for ICASSP 2022 DNS Challenge 4 - Personalized DNS Track *****
|
||||||
|
|
||||||
|
# NOTE: Before downloading, make sure you have enough space
|
||||||
|
# on your local storage!
|
||||||
|
|
||||||
|
# In all, you will need about 360TB to store the UNPACKED data.
|
||||||
|
# Archived, the same data takes about 200GB total.
|
||||||
|
|
||||||
|
# Please comment out the files you don't need before launching
|
||||||
|
# the script.
|
||||||
|
|
||||||
|
# NOTE: By default, the script *DOES NOT* DOWNLOAD ANY FILES!
|
||||||
|
# Please scroll down and edit this script to pick the
|
||||||
|
# downloading method that works best for you.
|
||||||
|
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
# The directory structure of the unpacked data is:
|
||||||
|
|
||||||
|
# . 358G
|
||||||
|
# +-- datasets_fullband 64G
|
||||||
|
# | +-- impulse_responses 5.9G
|
||||||
|
# | \-- noise_fullband 58G
|
||||||
|
# \-- pdns_training_set 294G
|
||||||
|
# +-- enrollment_embeddings 115M
|
||||||
|
# +-- enrollment_wav 42G
|
||||||
|
# +-- raw/clean 252G
|
||||||
|
# +-- english 168G
|
||||||
|
# +-- french 2.1G
|
||||||
|
# +-- german 53G
|
||||||
|
# +-- italian 17G
|
||||||
|
# +-- russian 6.8G
|
||||||
|
# \-- spanish 5.4G
|
||||||
|
|
||||||
|
BLOB_NAMES=(
|
||||||
|
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_000.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_001.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_002.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_003.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_004.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_005.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_006.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_007.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.english_008.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.french_000.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.german_000.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.german_001.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.german_002.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.german_003.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.italian_000.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.russian_000.tar.bz2
|
||||||
|
pdns_training_set/raw/pdns_training_set.raw.clean.spanish_000.tar.bz2
|
||||||
|
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_000.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_001.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_002.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_003.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.english_004.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.french_000.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.german_000.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.german_001.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.italian_000.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.russian_000.tar.bz2
|
||||||
|
pdns_training_set/enrollment_wav/pdns_training_set.enrollment_wav.spanish_000.tar.bz2
|
||||||
|
|
||||||
|
pdns_training_set/pdns_training_set.enrollment_embeddings_000.tar.bz2
|
||||||
|
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_000.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_001.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_002.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_003.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_004.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_005.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.audioset_006.tar.bz2
|
||||||
|
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.freesound_000.tar.bz2
|
||||||
|
datasets_fullband/noise_fullband/datasets_fullband.noise_fullband.freesound_001.tar.bz2
|
||||||
|
|
||||||
|
datasets_fullband/datasets_fullband.impulse_responses_000.tar.bz2
|
||||||
|
)
|
||||||
|
|
||||||
|
###############################################################
|
||||||
|
|
||||||
|
AZURE_URL="https://dns4public.blob.core.windows.net/dns4archive"
|
||||||
|
|
||||||
|
OUTPUT_PATH="."
|
||||||
|
|
||||||
|
mkdir -p $OUTPUT_PATH/{pdns_training_set/{raw,enrollment_wav},datasets_fullband/noise_fullband}
|
||||||
|
|
||||||
|
for BLOB in ${BLOB_NAMES[@]}
|
||||||
|
do
|
||||||
|
URL="$AZURE_URL/$BLOB"
|
||||||
|
echo "Download: $BLOB"
|
||||||
|
|
||||||
|
# DRY RUN: print HTTP response and Content-Length
|
||||||
|
# WITHOUT downloading the files
|
||||||
|
curl -s -I "$URL" | head -n 2
|
||||||
|
|
||||||
|
# Actually download the files: UNCOMMENT when ready to download
|
||||||
|
# curl "$URL" -o "$OUTPUT_PATH/$BLOB"
|
||||||
|
|
||||||
|
# Same as above, but using wget
|
||||||
|
# wget "$URL" -O "$OUTPUT_PATH/$BLOB"
|
||||||
|
|
||||||
|
# Same, + unpack files on the fly
|
||||||
|
# curl "$URL" | tar -C "$OUTPUT_PATH" -f - -x -j
|
||||||
|
done
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/bash
|
#!/usr/bin/bash
|
||||||
|
|
||||||
# ***** Datasets for ICASSP 2022 DNS Challenge 4 *****
|
# ***** Datasets for ICASSP 2022 DNS Challenge 4 - Main (Real-Time) Track *****
|
||||||
|
|
||||||
# NOTE: Before downloading, make sure you have enough space
|
# NOTE: Before downloading, make sure you have enough space
|
||||||
# on your local storage!
|
# on your local storage!
|
||||||
|
|
Загрузка…
Ссылка в новой задаче