Added GPU Support for srnn

2019-07-15 16:13:48 +05:30 · 2019-07-15 16:13:48 +05:30 · 85f122053c
--- a/pytorch/examples/SRNN/README.md
+++ b/pytorch/examples/SRNN/README.md
@ -1,30 +0,0 @@
 # Pytorch Shallow RNN Examples
 This directory includes an example [notebook](SRNN_Example.ipynb) of how to use
 SRNN on the [Google Speech Commands
 Dataset](https://ai.googleblog.com/2017/08/launching-speech-commands-dataset.html).
 `pytorch_edgeml.graph.rnn.SRNN2` implements a 2 layer SRNN network. We will use
 this with an LSTM cell on this dataset. The training routine for SRNN is
 implemented in `pytorch_edgeml.trainer.srnnTrainer` and will be used as part of
 this example.
 **Tested With:** pytorch > 1.1.0 with Python 2 and Python 3
 ## Fetching Data
 The script - [fetch_google.sh](fetch_google.py), can be used to  automatically
 download the data. You can also manually download and extract the data.
 [process_google.py](process_google.py), will perform feature extraction on this
 dataset and write numpy files that confirm to the required format.
 To run this script, please use:
    ./fetch_google.py
    python process_google.py
 With the provided configuration, you can expect a validation accuracy of about
 92%.
 Copyright (c) Microsoft Corporation. All rights reserved. 
 Licensed under the MIT license.
--- a/pytorch/examples/SRNN/fetch_google.sh
+++ b/pytorch/examples/SRNN/fetch_google.sh
@ -1,23 +0,0 @@
 #/bin/bash
 # If OUT_DIR is modified, please make sure it is reflected in process_google.py
 # as well.
 OUT_DIR='./GoogleSpeech/'
 mkdir -pv $OUT_DIR
 mkdir -pv $OUT_DIR/Raw
 mkdir -pv $OUT_DIR/Extracted
 echo "Downloading dataset."
 echo ""
 URL='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz'
 cd $OUT_DIR/Raw
 wget $URL
 if [ $? -eq 0 ]; then
 	echo "Download complete. Extracting files . . ."
 else
 	echo "Fail"
 	exit
 fi
 tar -xzf speech_commands_v0.01.tar.gz
 echo "Done. Please run process_google.py for feature extraction"
--- a/pytorch/examples/SRNN/process_google.py
+++ b/pytorch/examples/SRNN/process_google.py
@ -1,265 +0,0 @@
 # Google Speech data feature extraction
 # Note that the 'testing_list.txt' and 'validation_list.txt'
 # that provided is used to create test and validation
 # sets. Everything that is not in these sets is considered
 # for training.
 # The testing_list and validation_list and by extension
 # the training set has the following property.
 #     If one audio sample of a user is in either one of these
 #     sets, then all audio samples of that user will also be
 #     in that set.
 #     As long as the same methodology of creating testing
 #     and validation set that google used - as outlined in
 #     their README is used, the testing and validation set
 #     will be consistent. That is, the will always contain
 #     the same set of examples
 # Sampling is not supported yet.
 from python_speech_features import fbank
 import os
 import glob
 import numpy as np
 import scipy.io.wavfile as r
 import random
 # Various version can be created depending on which labels are chosen and which
 # are moved to the negative (noise) set. We use LABELMAP13 for most of our
 # experiments.
 LABELMAP30 = {
    '_background_noise_': 1, 'bed': 2, 'bird': 3,
    'cat': 4, 'dog': 5, 'down': 6, 'eight': 7,
    'five': 8, 'four': 9, 'go': 10, 'happy': 11,
    'house': 12, 'left': 13, 'marvin': 14, 'nine': 15,
    'no': 16, 'off': 17, 'on': 18, 'one': 19,
    'right': 20, 'seven': 21, 'sheila': 22, 'six': 23,
    'stop': 24, 'three': 25, 'tree': 26, 'two': 27,
    'up': 28, 'wow': 29, 'yes': 30, 'zero': 31
 }
 LABELMAP13 = {
    'go': 1, 'no': 2, 'on': 3, 'up': 4, 'bed': 5, 'cat': 6,
    'dog': 7, 'off': 8, 'one': 9, 'six': 10, 'two': 11,
    'yes': 12,
    'wow': 0, 'bird': 0, 'down': 0, 'five': 0, 'four': 0,
    'left': 0, 'nine': 0, 'stop': 0, 'tree': 0, 'zero': 0,
    'eight': 0, 'happy': 0, 'house': 0, 'right': 0, 'seven': 0,
    'three': 0, 'marvin': 0, 'sheila': 0, '_background_noise_': 0
 }
 LABELMAP12 = {
    'yes': 1, 'no': 2, 'up': 3, 'down': 4, 'left': 5, 'right': 6,
    'on': 7, 'off': 8, 'stop': 9, 'go': 10,
    'bed':0, 'cat':0, 'dog':0, 'one':0, 'six':0, 'two':0,
    'wow':0, 'bird':0, 'five':0, 'four':0, 'nine':0, 'tree':0,
    'zero':0, 'eight':0, 'happy':0, 'house':0, 'seven':0, 'three':0,
    'marvin':0, 'sheila':0, '_background_noise_':0
 }
 def createFileList(audioFileDir, testingList,
                   validationList, outPrefix,
                   labelMap):
    '''
    audioFileDir: The directory containing the directories
        with audio files.
    testingList: the `testing_list.txt` file
    validationList: the `validation_list.txt` file
    Reads all the files in audioFileDir and creates
    a list of files that are not part of testingList
    or validationList.
    WARNING: _background_noise_ is ignored
    Then testingList, validationList and trainginList
    are converted into numpy arrays with their labels
    This is written as
        outPrefix + '_testList.npy'
        outPrefix + '_trainList.npy'
        outPrefix + '_validationList.npy'
    '''
    dirs = os.listdir(audioFileDir)
    dirs = [x for x in dirs if os.path.isdir(os.path.join(audioFileDir, x))]
    assert(len(dirs) == 31), (len(dirs))
    for x in dirs:
        msg = '%s found without label map' % x
        assert x in labelMap, msg
    allFileList = []
    for fol in dirs:
        if fol == '_background_noise_':
            print("Ignoring %s" % fol)
            continue
        path = audioFileDir + '/' + fol + '/'
        files = []
        for w in os.listdir(path):
            if not w.endswith('.wav'):
                print("Ignoring %s" % w)
                continue
            files.append(fol + '/' + w)
        allFileList.extend(files)
    assert(len(allFileList) == len(set(allFileList)))
    fil = open(testingList, 'r')
    testingList = fil.readlines()
    testingList = [x.strip() for x in testingList]
    fil.close()
    fil = open(validationList, 'r')
    validationList = fil.readlines()
    validationList = [x.strip() for x in validationList]
    originalLen = len(allFileList)
    allFileList = set(allFileList) - set(validationList)
    assert len(allFileList) < originalLen
    assert originalLen == len(allFileList) + len(validationList)
    originalLen = len(allFileList)
    allFileList = set(allFileList) - set(testingList)
    assert len(allFileList) < originalLen
    assert originalLen == len(allFileList) + len(testingList)
    trainingList = list(allFileList)
    testingList = list(testingList)
    validationList = list(validationList)
    np.save(outPrefix + 'file_train.npy', trainingList)
    np.save(outPrefix + 'file_test.npy', testingList)
    np.save(outPrefix + 'file_val.npy', validationList)
 def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen,
                    winstep):
    '''
    Reads audio from files specified in fileList, extracts features and assigns
    labels to them.
    fileList: List of audio file names.
    LABELMAP: The label map to use.
    maxlen: maximum length of the audio file. Every other
        files is zero padded to maxlen
    numFilt: number of filters to use in MFCC
    samplerate: sample rate of the audio file. All files are
        assumed to be of same sample rate
    winLen: winLen to use for fbank in seconds
    winstep: winstep for fbank in seconds
    '''
    def __extractFeatures(stackedWav, numSteps, numFilt,
                          samplerate, winlen, winstep):
        '''
        [number of waves, Len(wave)]
        returns [number of waves, numSteps, numFilt]
        All waves are assumed to be of fixed length
        '''
        assert stackedWav.ndim == 2, 'Should be [number of waves, len(wav)]'
        extractedList = []
        eps = 1e-10
        for sample in stackedWav:
            temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen,
                            winstep=winstep, nfilt=numFilt,
                            winfunc=np.hamming)
            temp = np.log(temp + eps)
            assert temp.ndim == 2, 'Should be [numSteps, numFilt]'
            assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]'
            extractedList.append(temp)
        return np.array(extractedList)
    fileList = np.array(fileList)
    assert(fileList.ndim == 1)
    allSamples = np.zeros((len(fileList), maxlen))
    i = 0
    for i,file in enumerate(fileList):
        _, x = r.read(file)
        assert(len(x) <= maxlen)
        allSamples[i, maxlen - len(x):maxlen] += x
        i += 1
    assert allSamples.ndim == 2
    winstepSamples = winstep * samplerate
    winlenSamples = winlen * samplerate
    assert(winstepSamples.is_integer())
    assert(winlenSamples.is_integer())
    numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1)
    x = __extractFeatures(allSamples, numSteps, numFilt, samplerate, winlen,
                          winstep)
    y_ = [t.split('/') for t in fileList]
    y_ = [t[-2] for t in y_]
    y = []
    for t in y_:
        assert t in LABELMAP
        y.append(LABELMAP[t])
    def to_onehot(indices, numClasses):
        assert indices.ndim == 1
        n = max(indices) + 1
        assert numClasses <= n
        b = np.zeros((len(indices), numClasses))
        b[np.arange(len(indices)), indices] = 1
        return b
    y = to_onehot(np.array(y), np.max(y) + 1)
    return x, y
 if __name__=='__main__':
    # ----------------------------------------- #
    # Configuration
    # ----------------------------------------- #
    seed = 42
    maxlen = 16000
    numFilt = 32
    samplerate = 16000
    winlen = 0.025
    winstep = 0.010
    # 13 for google 13, 11 for google 12
    numLabels = 13 # 0 not assigned
    samplerate=16000
    # For creation of training file list, testing file list
    # and validation list. 
    audioFileDir = './GoogleSpeech/Raw/'
    testingList = './GoogleSpeech/Raw/testing_list.txt'
    validationList = './GoogleSpeech/Raw/validation_list.txt'
    outDir = './GoogleSpeech/Extracted/'
    # ----------------------------------------- #
    np.random.seed(seed)
    random.seed(seed)
    assert(numLabels in [13, 11])
    if numLabels == 13:
        values = [LABELMAP13[x] for x in LABELMAP13]
        values = set(values)
        assert(len(values) == 13)
        LABELMAP = LABELMAP13
    if numLabels == 11:
        values = [LABELMAP12[x] for x in LABELMAP12]
        values = set(values)
        assert(len(values) == 11)
        LABELMAP = LABELMAP12
    print("Peforming file creation")
    createFileList(audioFileDir, testingList, validationList,
                   outDir, LABELMAP)
    trainFileList = np.load(outDir + 'file_train.npy')
    testFileList = np.load(outDir + 'file_test.npy')
    valFileList = np.load(outDir + 'file_val.npy')
    print("Number of train files:", len(trainFileList))
    print("Number of test files", len(testFileList))
    print("Number of val files", len(valFileList))
    print("Performing feature extraction")
    trainFileList_ = [audioFileDir + x for x in trainFileList]
    valFileList_ = [audioFileDir + x for x in valFileList]
    testFileList_ = [audioFileDir + x for x in testFileList]
    x_test, y_test = extractFeatures(testFileList_, LABELMAP, maxlen, numFilt,
                                     samplerate, winlen, winstep)
    x_val, y_val = extractFeatures(valFileList_, LABELMAP, maxlen, numFilt,
                                   samplerate, winlen, winstep)
    x_train, y_train = extractFeatures(trainFileList_, LABELMAP, maxlen,
                                       numFilt, samplerate, winlen, winstep)
    np.save(outDir + 'x_train', x_train);np.save(outDir + 'y_train', y_train)
    np.save(outDir + 'x_test', x_test);np.save(outDir + 'y_test', y_test)
    np.save(outDir + 'x_val', x_val);np.save(outDir + 'y_val', y_val)
    print("Shape train", x_train.shape, y_train.shape)
    print("Shape test", x_test.shape, y_test.shape)
    print("Shape val", x_val.shape, y_val.shape)