some local changes

2020-03-14 23:41:32 +00:00 · 2020-03-14 23:41:32 +00:00 · 714091548d
--- a/NSNet-baseline/Readme.md
+++ b/NSNet-baseline/Readme.md
@ -1,6 +1,6 @@
 # Noise Suppression Net (NSNet) baseline inference script
-* As a baseline for Interspeech 2020 Deep Noise Suppression challenge, we will use the recently developed SE method based on Recurrent Neural Network (RNN). For ease of reference, we will call this method as Noise Suppression Net (NSNet).
+* As a baseline for Interspeech 2020 Deep Noise Suppression challenge, we will use the recently developed SE method based on Recurrent Neural Network (RNN). For ease of reference, we will call this method as Noise Suppression Net (NSNet). The details about this method can be found in the [published paper](https://arxiv.org/pdf/2001.10601.pdf)
 * This method uses log power spectra as input to predict the enhancement gain per frame using a learning machine based on Gated Recurrent Units (GRU) and fully connected layers. Please refer to the paper for more details of the method.
 * NSNet is computationally efficient. It only takes 0.16ms to enhance a 20ms frame on an Intel quad core i5 machine using the ONNX run time v1.1 .
@ -22,3 +22,14 @@ From the NSNet-baseline directory, run nsnet_eval_local.py with the following re
 - --modelpath "Specify the path to the onnx model provided"
 Use default values for the rest. Run to enhance the clips.
 ## Citation:
 The baseline NSNet noise suppression:<br />  
@misc{xia2020weighted,<br />
    title={Weighted Speech Distortion Losses for Neural-network-based Real-time Speech Enhancement},<br />
    author={Yangyang Xia and Sebastian Braun and Chandan K. A. Reddy and Harishchandra Dubey and Ross Cutler and Ivan Tashev},<br />
    year={2020},<br />
    eprint={2001.10601},<br />
    archivePrefix={arXiv},<br />
    primaryClass={eess.AS}<br />
 }
--- a/NSNet-baseline/pycache/audiolib.cpython-36.pyc
+++ b/NSNet-baseline/pycache/audiolib.cpython-36.pyc
--- a/NSNet-baseline/pycache/onnx.cpython-36.pyc
+++ b/NSNet-baseline/pycache/onnx.cpython-36.pyc
--- a/NSNet-baseline/nsnet_eval_local.py
+++ b/NSNet-baseline/nsnet_eval_local.py
@ -62,10 +62,10 @@ def _main():
    logging.debug("NSNet local workers start with %d input files", len(input_filelist))
-    with concurrent.futures.ThreadPoolExecutor(max_workers=args.num_workers) as executor:
+#    with concurrent.futures.ThreadPoolExecutor(max_workers=args.num_workers) as executor:
-        executor.map(worker, input_filelist, chunksize=args.chunksize)
+#        executor.map(worker, input_filelist, chunksize=args.chunksize)
-#    for fname in input_filelist:
+    for fname in input_filelist:
-#        worker(fname)
+        worker(fname)
    logging.info("NSNet local workers complete")
--- a/NSNet-baseline/onnx.py
+++ b/NSNet-baseline/onnx.py
@ -100,7 +100,7 @@ class NSNetInference:
            mask = model_outputs[0].squeeze()
            x_enh = audiolib.istft(
                (xmag * mask) * xphs, sample_rate, self.wind, self.dft_size, zphase=False)
-
+            
            sout[frame_sampleindex:frame_sampleindex + hsize] = x_old + x_enh[0:hsize]
            x_old = x_enh[hsize:fsize]
--- a/README.md
+++ b/README.md
@ -21,15 +21,28 @@ This repository contains the datasets and scripts required for the DNS challenge
 * Run python **noisyspeech_synthesizer_singleprocess.py** to synthesize the data.
 ## Citation:
-@misc{ch2020interspeech,
+For the datasets and the DNS challenge:<br />  
-    title={The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets, Subjective Speech Quality and Testing Framework},
+
@misc{ch2020interspeech,<br />
    title={The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets, Subjective Speech Quality and Testing Framework},<br />
    author={Chandan K. A. Reddy and Ebrahim Beyrami and Harishchandra Dubey and Vishak Gopal and Roger Cheng and Ross Cutler and Sergiy Matusevych and Robert Aichner and Ashkan Aazami and Sebastian Braun and Puneet Rana and Sriram Srinivasan and Johannes Gehrke},
-    year={2020},
+    year={2020},<br />
-    eprint={2001.08662},
+    eprint={2001.08662},<br />
-    archivePrefix={arXiv},
+    archivePrefix={arXiv},<br />
-    primaryClass={cs.SD}
+    primaryClass={cs.SD}<br />
 }
 The baseline NSNet noise suppression:<br />
@misc{xia2020weighted,<br />
    title={Weighted Speech Distortion Losses for Neural-network-based Real-time Speech Enhancement},<br />
    author={Yangyang Xia and Sebastian Braun and Chandan K. A. Reddy and Harishchandra Dubey and Ross Cutler and Ivan Tashev},<br />
    year={2020},<br />
    eprint={2001.10601},<br />
    archivePrefix={arXiv},<br />
    primaryClass={eess.AS}<br />
 }
 # Contributing
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a
--- a/noisyspeech_synthesizer_multiprocessing.py
+++ b/noisyspeech_synthesizer_multiprocessing.py
@ -143,36 +143,48 @@ def main_gen(params, filenum):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''
-    # generate clean speech
+    print("Generating file #" + str(filenum))
    clean, clean_source_files, clean_clipped_files, clean_low_activity_files = \
        gen_audio(True, params, filenum)
    # generate noise
    noise, noise_source_files, noise_clipped_files, noise_low_activity_files = \
        gen_audio(False, params, filenum, len(clean))
-    # mix clean speech and noise
+    clean_clipped_files = []
-    # if specified, use specified SNR value
+    clean_low_activity_files = []
-    if not params['randomize_snr']:
+    noise_clipped_files = []
-        snr = params['snr']
+    noise_low_activity_files = []
-    # use a randomly sampled SNR value between the specified bounds
+
-    else:
+    while True:
-        snr = np.random.randint(params['snr_lower'], params['snr_upper'])
+        # generate clean speech
-        
+        clean, clean_source_files, clean_cf, clean_laf = \
-    clean_snr, noise_snr, noisy_snr, target_level = snr_mixer(params=params, 
+            gen_audio(True, params, filenum)
-                                                              clean=clean, 
+        # generate noise
-                                                              noise=noise, 
+        noise, noise_source_files, noise_cf, noise_laf = \
-                                                              snr=snr)
+            gen_audio(False, params, filenum, len(clean))
-    # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer
+
-    #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
+        clean_clipped_files += clean_cf
-    #                                                                    clean=clean, 
+        clean_low_activity_files += clean_laf
-    #                                                                    noise=noise, 
+        noise_clipped_files += noise_cf
-    #                                                                    snr=snr)
+        noise_low_activity_files += noise_laf
-    # unexpected clipping
+
-    if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr):
+        # mix clean speech and noise
-        print("Warning: File #" + str(filenum) + " has unexpected clipping, " + \
+        # if specified, use specified SNR value
-              "returning without writing audio to disk")            
+        if not params['randomize_snr']:
-        return [], clean_clipped_files, clean_low_activity_files, \
+            snr = params['snr']
-               [], noise_clipped_files, noise_low_activity_files
+        # use a randomly sampled SNR value between the specified bounds
        else:
            snr = np.random.randint(params['snr_lower'], params['snr_upper'])
        clean_snr, noise_snr, noisy_snr, target_level = snr_mixer(params=params, 
                                                                  clean=clean, 
                                                                  noise=noise, 
                                                                  snr=snr)
        # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer
        #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
        #                                                                    clean=clean, 
        #                                                                    noise=noise, 
        #                                                                    snr=snr)
        # unexpected clipping
        if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr):       
            continue
        else:
            break
    # write resultant audio streams to files
    hyphen = '-'
@ -252,7 +264,9 @@ def main_body():
    params['total_hours'] = float(cfg['total_hours'])
    if cfg['fileindex_start'] != 'None' and cfg['fileindex_start'] != 'None':
-        params['num_files'] = int(cfg['fileindex_end'])-int(cfg['fileindex_start'])
+        params['fileindex_start'] = int(cfg['fileindex_start'])
        params['fileindex_end'] = int(cfg['fileindex_end'])    
        params['num_files'] = int(params['fileindex_end'])-int(params['fileindex_start'])
    else:
        params['num_files'] = int((params['total_hours']*60*60)/params['audio_length'])
@ -262,8 +276,11 @@ def main_body():
    params['noise_activity_threshold'] = float(cfg['noise_activity_threshold'])
    params['snr_lower'] = int(cfg['snr_lower'])
    params['snr_upper'] = int(cfg['snr_upper'])
 <<<<<<< HEAD
 #    params['fileindex_start'] = int(cfg['fileindex_start'])
 #    params['fileindex_end'] = int(cfg['fileindex_end'])
 =======
 >>>>>>> f9fdbd480c5c3f8cc5fbaad00028ffecc3e5ea61
    params['randomize_snr'] = utils.str2bool(cfg['randomize_snr'])
    params['target_level_lower'] = int(cfg['target_level_lower'])
    params['target_level_upper'] = int(cfg['target_level_upper'])
--- a/noisyspeech_synthesizer_singleprocess.py
+++ b/noisyspeech_synthesizer_singleprocess.py
@ -23,6 +23,7 @@ MAXTRIES = 50
 MAXFILELEN = 100
 np.random.seed(2)
 random.seed(3)
 def build_audio(is_clean, params, index, audio_samples_length=-1):
    '''Construct an audio signal from source files'''
--- a/unit_tests_synthesizer.py
+++ b/unit_tests_synthesizer.py
@ -43,7 +43,11 @@ def test_zeros_beg_end(audio, num_zeros=16000, low_energy_thresh=LOW_ENERGY_THRE
    end_segment_energy = 20*np.log10(audio[-num_zeros:]**2).mean()**0.5
    return beg_segment_energy < low_energy_thresh or end_segment_energy < low_energy_thresh
-
+def adsp_filtering_test(adsp, without_adsp):
    diff = adsp - without_adsp
    if any(val >0.0001 for val in diff):
 if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', default='noisyspeech_synthesizer.cfg')