From 85f122053c91dce18477dd132f67ff58acc70191 Mon Sep 17 00:00:00 2001 From: SachinG007 Date: Mon, 15 Jul 2019 16:13:48 +0530 Subject: [PATCH] Added GPU Support for srnn --- pytorch/examples/SRNN/README.md | 30 --- pytorch/examples/SRNN/fetch_google.sh | 23 -- pytorch/examples/SRNN/process_google.py | 265 ------------------------ 3 files changed, 318 deletions(-) delete mode 100644 pytorch/examples/SRNN/README.md delete mode 100644 pytorch/examples/SRNN/fetch_google.sh delete mode 100644 pytorch/examples/SRNN/process_google.py diff --git a/pytorch/examples/SRNN/README.md b/pytorch/examples/SRNN/README.md deleted file mode 100644 index 28f36c15..00000000 --- a/pytorch/examples/SRNN/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# Pytorch Shallow RNN Examples - -This directory includes an example [notebook](SRNN_Example.ipynb) of how to use -SRNN on the [Google Speech Commands -Dataset](https://ai.googleblog.com/2017/08/launching-speech-commands-dataset.html). - -`pytorch_edgeml.graph.rnn.SRNN2` implements a 2 layer SRNN network. We will use -this with an LSTM cell on this dataset. The training routine for SRNN is -implemented in `pytorch_edgeml.trainer.srnnTrainer` and will be used as part of -this example. - -**Tested With:** pytorch > 1.1.0 with Python 2 and Python 3 - -## Fetching Data - -The script - [fetch_google.sh](fetch_google.py), can be used to automatically -download the data. You can also manually download and extract the data. -[process_google.py](process_google.py), will perform feature extraction on this -dataset and write numpy files that confirm to the required format. - - To run this script, please use: - - ./fetch_google.py - python process_google.py - -With the provided configuration, you can expect a validation accuracy of about -92%. - -Copyright (c) Microsoft Corporation. All rights reserved. -Licensed under the MIT license. diff --git a/pytorch/examples/SRNN/fetch_google.sh b/pytorch/examples/SRNN/fetch_google.sh deleted file mode 100644 index 61cf9a90..00000000 --- a/pytorch/examples/SRNN/fetch_google.sh +++ /dev/null @@ -1,23 +0,0 @@ -#/bin/bash - -# If OUT_DIR is modified, please make sure it is reflected in process_google.py -# as well. -OUT_DIR='./GoogleSpeech/' -mkdir -pv $OUT_DIR -mkdir -pv $OUT_DIR/Raw -mkdir -pv $OUT_DIR/Extracted - -echo "Downloading dataset." -echo "" -URL='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz' -cd $OUT_DIR/Raw -wget $URL - -if [ $? -eq 0 ]; then - echo "Download complete. Extracting files . . ." -else - echo "Fail" - exit -fi -tar -xzf speech_commands_v0.01.tar.gz -echo "Done. Please run process_google.py for feature extraction" diff --git a/pytorch/examples/SRNN/process_google.py b/pytorch/examples/SRNN/process_google.py deleted file mode 100644 index afce89a3..00000000 --- a/pytorch/examples/SRNN/process_google.py +++ /dev/null @@ -1,265 +0,0 @@ - -# Google Speech data feature extraction - -# Note that the 'testing_list.txt' and 'validation_list.txt' -# that provided is used to create test and validation -# sets. Everything that is not in these sets is considered -# for training. - -# The testing_list and validation_list and by extension -# the training set has the following property. - -# If one audio sample of a user is in either one of these -# sets, then all audio samples of that user will also be -# in that set. - -# As long as the same methodology of creating testing -# and validation set that google used - as outlined in -# their README is used, the testing and validation set -# will be consistent. That is, the will always contain -# the same set of examples - -# Sampling is not supported yet. - -from python_speech_features import fbank -import os -import glob -import numpy as np -import scipy.io.wavfile as r -import random - - -# Various version can be created depending on which labels are chosen and which -# are moved to the negative (noise) set. We use LABELMAP13 for most of our -# experiments. -LABELMAP30 = { - '_background_noise_': 1, 'bed': 2, 'bird': 3, - 'cat': 4, 'dog': 5, 'down': 6, 'eight': 7, - 'five': 8, 'four': 9, 'go': 10, 'happy': 11, - 'house': 12, 'left': 13, 'marvin': 14, 'nine': 15, - 'no': 16, 'off': 17, 'on': 18, 'one': 19, - 'right': 20, 'seven': 21, 'sheila': 22, 'six': 23, - 'stop': 24, 'three': 25, 'tree': 26, 'two': 27, - 'up': 28, 'wow': 29, 'yes': 30, 'zero': 31 -} - - -LABELMAP13 = { - 'go': 1, 'no': 2, 'on': 3, 'up': 4, 'bed': 5, 'cat': 6, - 'dog': 7, 'off': 8, 'one': 9, 'six': 10, 'two': 11, - 'yes': 12, - 'wow': 0, 'bird': 0, 'down': 0, 'five': 0, 'four': 0, - 'left': 0, 'nine': 0, 'stop': 0, 'tree': 0, 'zero': 0, - 'eight': 0, 'happy': 0, 'house': 0, 'right': 0, 'seven': 0, - 'three': 0, 'marvin': 0, 'sheila': 0, '_background_noise_': 0 -} - -LABELMAP12 = { - 'yes': 1, 'no': 2, 'up': 3, 'down': 4, 'left': 5, 'right': 6, - 'on': 7, 'off': 8, 'stop': 9, 'go': 10, - 'bed':0, 'cat':0, 'dog':0, 'one':0, 'six':0, 'two':0, - 'wow':0, 'bird':0, 'five':0, 'four':0, 'nine':0, 'tree':0, - 'zero':0, 'eight':0, 'happy':0, 'house':0, 'seven':0, 'three':0, - 'marvin':0, 'sheila':0, '_background_noise_':0 -} - -def createFileList(audioFileDir, testingList, - validationList, outPrefix, - labelMap): - ''' - audioFileDir: The directory containing the directories - with audio files. - testingList: the `testing_list.txt` file - validationList: the `validation_list.txt` file - - Reads all the files in audioFileDir and creates - a list of files that are not part of testingList - or validationList. - - WARNING: _background_noise_ is ignored - - Then testingList, validationList and trainginList - are converted into numpy arrays with their labels - - This is written as - outPrefix + '_testList.npy' - outPrefix + '_trainList.npy' - outPrefix + '_validationList.npy' - ''' - dirs = os.listdir(audioFileDir) - dirs = [x for x in dirs if os.path.isdir(os.path.join(audioFileDir, x))] - assert(len(dirs) == 31), (len(dirs)) - for x in dirs: - msg = '%s found without label map' % x - assert x in labelMap, msg - - allFileList = [] - for fol in dirs: - if fol == '_background_noise_': - print("Ignoring %s" % fol) - continue - path = audioFileDir + '/' + fol + '/' - files = [] - for w in os.listdir(path): - if not w.endswith('.wav'): - print("Ignoring %s" % w) - continue - files.append(fol + '/' + w) - allFileList.extend(files) - assert(len(allFileList) == len(set(allFileList))) - - fil = open(testingList, 'r') - testingList = fil.readlines() - testingList = [x.strip() for x in testingList] - fil.close() - fil = open(validationList, 'r') - validationList = fil.readlines() - validationList = [x.strip() for x in validationList] - originalLen = len(allFileList) - allFileList = set(allFileList) - set(validationList) - assert len(allFileList) < originalLen - assert originalLen == len(allFileList) + len(validationList) - originalLen = len(allFileList) - allFileList = set(allFileList) - set(testingList) - assert len(allFileList) < originalLen - assert originalLen == len(allFileList) + len(testingList) - - trainingList = list(allFileList) - testingList = list(testingList) - validationList = list(validationList) - np.save(outPrefix + 'file_train.npy', trainingList) - np.save(outPrefix + 'file_test.npy', testingList) - np.save(outPrefix + 'file_val.npy', validationList) - - -def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen, - winstep): - ''' - Reads audio from files specified in fileList, extracts features and assigns - labels to them. - - fileList: List of audio file names. - LABELMAP: The label map to use. - maxlen: maximum length of the audio file. Every other - files is zero padded to maxlen - numFilt: number of filters to use in MFCC - samplerate: sample rate of the audio file. All files are - assumed to be of same sample rate - winLen: winLen to use for fbank in seconds - winstep: winstep for fbank in seconds - ''' - def __extractFeatures(stackedWav, numSteps, numFilt, - samplerate, winlen, winstep): - ''' - [number of waves, Len(wave)] - returns [number of waves, numSteps, numFilt] - All waves are assumed to be of fixed length - ''' - assert stackedWav.ndim == 2, 'Should be [number of waves, len(wav)]' - extractedList = [] - eps = 1e-10 - for sample in stackedWav: - temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen, - winstep=winstep, nfilt=numFilt, - winfunc=np.hamming) - temp = np.log(temp + eps) - assert temp.ndim == 2, 'Should be [numSteps, numFilt]' - assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]' - extractedList.append(temp) - return np.array(extractedList) - - fileList = np.array(fileList) - assert(fileList.ndim == 1) - allSamples = np.zeros((len(fileList), maxlen)) - i = 0 - for i,file in enumerate(fileList): - _, x = r.read(file) - assert(len(x) <= maxlen) - allSamples[i, maxlen - len(x):maxlen] += x - i += 1 - assert allSamples.ndim == 2 - winstepSamples = winstep * samplerate - winlenSamples = winlen * samplerate - assert(winstepSamples.is_integer()) - assert(winlenSamples.is_integer()) - numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1) - x = __extractFeatures(allSamples, numSteps, numFilt, samplerate, winlen, - winstep) - y_ = [t.split('/') for t in fileList] - y_ = [t[-2] for t in y_] - y = [] - for t in y_: - assert t in LABELMAP - y.append(LABELMAP[t]) - - def to_onehot(indices, numClasses): - assert indices.ndim == 1 - n = max(indices) + 1 - assert numClasses <= n - b = np.zeros((len(indices), numClasses)) - b[np.arange(len(indices)), indices] = 1 - return b - y = to_onehot(np.array(y), np.max(y) + 1) - return x, y - -if __name__=='__main__': - # ----------------------------------------- # - # Configuration - # ----------------------------------------- # - seed = 42 - maxlen = 16000 - numFilt = 32 - samplerate = 16000 - winlen = 0.025 - winstep = 0.010 - # 13 for google 13, 11 for google 12 - numLabels = 13 # 0 not assigned - samplerate=16000 - # For creation of training file list, testing file list - # and validation list. - audioFileDir = './GoogleSpeech/Raw/' - testingList = './GoogleSpeech/Raw/testing_list.txt' - validationList = './GoogleSpeech/Raw/validation_list.txt' - outDir = './GoogleSpeech/Extracted/' - # ----------------------------------------- # - np.random.seed(seed) - random.seed(seed) - assert(numLabels in [13, 11]) - if numLabels == 13: - values = [LABELMAP13[x] for x in LABELMAP13] - values = set(values) - assert(len(values) == 13) - LABELMAP = LABELMAP13 - if numLabels == 11: - values = [LABELMAP12[x] for x in LABELMAP12] - values = set(values) - assert(len(values) == 11) - LABELMAP = LABELMAP12 - - print("Peforming file creation") - createFileList(audioFileDir, testingList, validationList, - outDir, LABELMAP) - trainFileList = np.load(outDir + 'file_train.npy') - testFileList = np.load(outDir + 'file_test.npy') - valFileList = np.load(outDir + 'file_val.npy') - print("Number of train files:", len(trainFileList)) - print("Number of test files", len(testFileList)) - print("Number of val files", len(valFileList)) - print("Performing feature extraction") - trainFileList_ = [audioFileDir + x for x in trainFileList] - valFileList_ = [audioFileDir + x for x in valFileList] - testFileList_ = [audioFileDir + x for x in testFileList] - x_test, y_test = extractFeatures(testFileList_, LABELMAP, maxlen, numFilt, - samplerate, winlen, winstep) - x_val, y_val = extractFeatures(valFileList_, LABELMAP, maxlen, numFilt, - samplerate, winlen, winstep) - x_train, y_train = extractFeatures(trainFileList_, LABELMAP, maxlen, - numFilt, samplerate, winlen, winstep) - np.save(outDir + 'x_train', x_train);np.save(outDir + 'y_train', y_train) - np.save(outDir + 'x_test', x_test);np.save(outDir + 'y_test', y_test) - np.save(outDir + 'x_val', x_val);np.save(outDir + 'y_val', y_val) - print("Shape train", x_train.shape, y_train.shape) - print("Shape test", x_test.shape, y_test.shape) - print("Shape val", x_val.shape, y_val.shape) - -