From 95382bd2b9eba895521a38f4d140a11633c91846 Mon Sep 17 00:00:00 2001
From: Hari Dubey <hadubey@microsoft.com>
Date: Sat, 12 Sep 2020 03:35:59 +0000
Subject: [PATCH] updated README, config, .gitignore

---
 .gitignore                  |  1 +
 README.md                   | 15 ++++++++++++++-
 noisyspeech_synthesizer.cfg |  8 ++++----
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index f8bb1feefda..9075122e0e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ training_set5/
 logs/
 test_set2/
 training_set_sept11/
+training_set_sept12/
diff --git a/README.md b/README.md
index 7d16ac70eb9..9ac66d02e7e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Deep Noise Suppression (DNS) Challenge - Interspeech 2020
 
-This repository contains the datasets and scripts required for the DNS challenge. For more details about the challenge, please visit https://dns-challenge.azurewebsites.net/ and refer to our [paper](https://arxiv.org/ftp/arxiv/papers/2001/2001.08662.pdf).
+This repository contains the datasets and scripts required for the DNS challenge. For more details about the challenge, please visit https://dns-challenge.azurewebsites.net/.
 
 ## Repo details:
 * The **datasets** directory contains the clean speech and noise clips.
@@ -101,11 +101,24 @@ The datasets used in this project are licensed as follows:
 * https://librivox.org/; License: https://librivox.org/pages/public-domain/
 * PTDB-TUG: Pitch Tracking Database from Graz University of Technology https://www.spsc.tugraz.at/databases-and-tools/ptdb-tug-pitch-tracking-database-from-graz-university-of-technology.html; License: http://opendatacommons.org/licenses/odbl/1.0/ 
 * Edinburgh 56 speaker dataset: https://datashare.is.ed.ac.uk/handle/10283/2791; License: https://datashare.is.ed.ac.uk/bitstream/handle/10283/2791/license_text?sequence=11&isAllowed=y 
+* VocalSet: A Singing Voice Dataset https://zenodo.org/record/1193957#.X1hkxYtlCHs; License: Creative Commons Attribution 4.0 International
+* Emotion data corpus: CREMA-D (Crowd-sourced Emotional Multimodal Actors Dataset)
+https://github.com/CheyneyComputerScience/CREMA-D; License: http://opendatacommons.org/licenses/dbcl/1.0/
+* The VoxCeleb2 Dataset http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox2.html; License: http://www.robots.ox.ac.uk/~vgg/data/voxceleb/
+The VoxCeleb dataset is available to download for commercial/research purposes under a Creative Commons Attribution 4.0 International License. The copyright remains with the original owners of the video. A complete version of the license can be found here. 
+* VCTK Dataset: https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html; License: This corpus is licensed under Open Data Commons Attribution License (ODC-By) v1.0.
+http://opendatacommons.org/licenses/by/1.0/ 
+
 2. Noise:
 * Audioset: https://research.google.com/audioset/index.html; License: https://creativecommons.org/licenses/by/4.0/
 * Freesound: https://freesound.org/ Only files with CC0 licenses were selected; License: https://creativecommons.org/publicdomain/zero/1.0/
 * Demand: https://zenodo.org/record/1227121#.XRKKxYhKiUk; License: https://creativecommons.org/licenses/by-sa/3.0/deed.en_CA
 
+3. RIR datasets: OpenSLR26 and OpenSLR28:
+* http://www.openslr.org/26/
+* http://www.openslr.org/28/
+* License: Apache 2.0
+
 ## Code license
 MIT License
 
diff --git a/noisyspeech_synthesizer.cfg b/noisyspeech_synthesizer.cfg
index 517a1d4b1af..1f44e84cd43 100644
--- a/noisyspeech_synthesizer.cfg
+++ b/noisyspeech_synthesizer.cfg
@@ -52,9 +52,9 @@ noise_dir: datasets\noise
 speech_dir: datasets\clean\read_speech
 noise_types_excluded: None
 
-noisy_destination: datasets\training_set_sept11\noisy
-clean_destination: datasets\training_set_sept11\clean
-noise_destination: datasets\training_set_sept11\noise
+noisy_destination: datasets\training_set_sept12\noisy
+clean_destination: datasets\training_set_sept12\clean
+noise_destination: datasets\training_set_sept12\noise
 log_dir: logs 
 
 # Config: add singing voice to clean speech
@@ -76,7 +76,7 @@ use_mandarin_data=1
 clean_mandarin: datasets\clean\mandarin_speech
 
 # Config: add reverb to clean speech
-rir_choice: 1
+rir_choice: 3
 # 1 for only real rir, 2 for only synthetic rir, 3 (default) use both real and synthetic
 lower_t60: 0.3 
 # lower bound of t60 range in seconds