зеркало из https://github.com/microsoft/EdgeML.git
Added GPU Support for srnn
This commit is contained in:
Родитель
b20ebd47e4
Коммит
85f122053c
|
@ -1,30 +0,0 @@
|
||||||
# Pytorch Shallow RNN Examples
|
|
||||||
|
|
||||||
This directory includes an example [notebook](SRNN_Example.ipynb) of how to use
|
|
||||||
SRNN on the [Google Speech Commands
|
|
||||||
Dataset](https://ai.googleblog.com/2017/08/launching-speech-commands-dataset.html).
|
|
||||||
|
|
||||||
`pytorch_edgeml.graph.rnn.SRNN2` implements a 2 layer SRNN network. We will use
|
|
||||||
this with an LSTM cell on this dataset. The training routine for SRNN is
|
|
||||||
implemented in `pytorch_edgeml.trainer.srnnTrainer` and will be used as part of
|
|
||||||
this example.
|
|
||||||
|
|
||||||
**Tested With:** pytorch > 1.1.0 with Python 2 and Python 3
|
|
||||||
|
|
||||||
## Fetching Data
|
|
||||||
|
|
||||||
The script - [fetch_google.sh](fetch_google.py), can be used to automatically
|
|
||||||
download the data. You can also manually download and extract the data.
|
|
||||||
[process_google.py](process_google.py), will perform feature extraction on this
|
|
||||||
dataset and write numpy files that confirm to the required format.
|
|
||||||
|
|
||||||
To run this script, please use:
|
|
||||||
|
|
||||||
./fetch_google.py
|
|
||||||
python process_google.py
|
|
||||||
|
|
||||||
With the provided configuration, you can expect a validation accuracy of about
|
|
||||||
92%.
|
|
||||||
|
|
||||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
Licensed under the MIT license.
|
|
|
@ -1,23 +0,0 @@
|
||||||
#/bin/bash
|
|
||||||
|
|
||||||
# If OUT_DIR is modified, please make sure it is reflected in process_google.py
|
|
||||||
# as well.
|
|
||||||
OUT_DIR='./GoogleSpeech/'
|
|
||||||
mkdir -pv $OUT_DIR
|
|
||||||
mkdir -pv $OUT_DIR/Raw
|
|
||||||
mkdir -pv $OUT_DIR/Extracted
|
|
||||||
|
|
||||||
echo "Downloading dataset."
|
|
||||||
echo ""
|
|
||||||
URL='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz'
|
|
||||||
cd $OUT_DIR/Raw
|
|
||||||
wget $URL
|
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "Download complete. Extracting files . . ."
|
|
||||||
else
|
|
||||||
echo "Fail"
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
tar -xzf speech_commands_v0.01.tar.gz
|
|
||||||
echo "Done. Please run process_google.py for feature extraction"
|
|
|
@ -1,265 +0,0 @@
|
||||||
|
|
||||||
# Google Speech data feature extraction
|
|
||||||
|
|
||||||
# Note that the 'testing_list.txt' and 'validation_list.txt'
|
|
||||||
# that provided is used to create test and validation
|
|
||||||
# sets. Everything that is not in these sets is considered
|
|
||||||
# for training.
|
|
||||||
|
|
||||||
# The testing_list and validation_list and by extension
|
|
||||||
# the training set has the following property.
|
|
||||||
|
|
||||||
# If one audio sample of a user is in either one of these
|
|
||||||
# sets, then all audio samples of that user will also be
|
|
||||||
# in that set.
|
|
||||||
|
|
||||||
# As long as the same methodology of creating testing
|
|
||||||
# and validation set that google used - as outlined in
|
|
||||||
# their README is used, the testing and validation set
|
|
||||||
# will be consistent. That is, the will always contain
|
|
||||||
# the same set of examples
|
|
||||||
|
|
||||||
# Sampling is not supported yet.
|
|
||||||
|
|
||||||
from python_speech_features import fbank
|
|
||||||
import os
|
|
||||||
import glob
|
|
||||||
import numpy as np
|
|
||||||
import scipy.io.wavfile as r
|
|
||||||
import random
|
|
||||||
|
|
||||||
|
|
||||||
# Various version can be created depending on which labels are chosen and which
|
|
||||||
# are moved to the negative (noise) set. We use LABELMAP13 for most of our
|
|
||||||
# experiments.
|
|
||||||
LABELMAP30 = {
|
|
||||||
'_background_noise_': 1, 'bed': 2, 'bird': 3,
|
|
||||||
'cat': 4, 'dog': 5, 'down': 6, 'eight': 7,
|
|
||||||
'five': 8, 'four': 9, 'go': 10, 'happy': 11,
|
|
||||||
'house': 12, 'left': 13, 'marvin': 14, 'nine': 15,
|
|
||||||
'no': 16, 'off': 17, 'on': 18, 'one': 19,
|
|
||||||
'right': 20, 'seven': 21, 'sheila': 22, 'six': 23,
|
|
||||||
'stop': 24, 'three': 25, 'tree': 26, 'two': 27,
|
|
||||||
'up': 28, 'wow': 29, 'yes': 30, 'zero': 31
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
LABELMAP13 = {
|
|
||||||
'go': 1, 'no': 2, 'on': 3, 'up': 4, 'bed': 5, 'cat': 6,
|
|
||||||
'dog': 7, 'off': 8, 'one': 9, 'six': 10, 'two': 11,
|
|
||||||
'yes': 12,
|
|
||||||
'wow': 0, 'bird': 0, 'down': 0, 'five': 0, 'four': 0,
|
|
||||||
'left': 0, 'nine': 0, 'stop': 0, 'tree': 0, 'zero': 0,
|
|
||||||
'eight': 0, 'happy': 0, 'house': 0, 'right': 0, 'seven': 0,
|
|
||||||
'three': 0, 'marvin': 0, 'sheila': 0, '_background_noise_': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
LABELMAP12 = {
|
|
||||||
'yes': 1, 'no': 2, 'up': 3, 'down': 4, 'left': 5, 'right': 6,
|
|
||||||
'on': 7, 'off': 8, 'stop': 9, 'go': 10,
|
|
||||||
'bed':0, 'cat':0, 'dog':0, 'one':0, 'six':0, 'two':0,
|
|
||||||
'wow':0, 'bird':0, 'five':0, 'four':0, 'nine':0, 'tree':0,
|
|
||||||
'zero':0, 'eight':0, 'happy':0, 'house':0, 'seven':0, 'three':0,
|
|
||||||
'marvin':0, 'sheila':0, '_background_noise_':0
|
|
||||||
}
|
|
||||||
|
|
||||||
def createFileList(audioFileDir, testingList,
|
|
||||||
validationList, outPrefix,
|
|
||||||
labelMap):
|
|
||||||
'''
|
|
||||||
audioFileDir: The directory containing the directories
|
|
||||||
with audio files.
|
|
||||||
testingList: the `testing_list.txt` file
|
|
||||||
validationList: the `validation_list.txt` file
|
|
||||||
|
|
||||||
Reads all the files in audioFileDir and creates
|
|
||||||
a list of files that are not part of testingList
|
|
||||||
or validationList.
|
|
||||||
|
|
||||||
WARNING: _background_noise_ is ignored
|
|
||||||
|
|
||||||
Then testingList, validationList and trainginList
|
|
||||||
are converted into numpy arrays with their labels
|
|
||||||
|
|
||||||
This is written as
|
|
||||||
outPrefix + '_testList.npy'
|
|
||||||
outPrefix + '_trainList.npy'
|
|
||||||
outPrefix + '_validationList.npy'
|
|
||||||
'''
|
|
||||||
dirs = os.listdir(audioFileDir)
|
|
||||||
dirs = [x for x in dirs if os.path.isdir(os.path.join(audioFileDir, x))]
|
|
||||||
assert(len(dirs) == 31), (len(dirs))
|
|
||||||
for x in dirs:
|
|
||||||
msg = '%s found without label map' % x
|
|
||||||
assert x in labelMap, msg
|
|
||||||
|
|
||||||
allFileList = []
|
|
||||||
for fol in dirs:
|
|
||||||
if fol == '_background_noise_':
|
|
||||||
print("Ignoring %s" % fol)
|
|
||||||
continue
|
|
||||||
path = audioFileDir + '/' + fol + '/'
|
|
||||||
files = []
|
|
||||||
for w in os.listdir(path):
|
|
||||||
if not w.endswith('.wav'):
|
|
||||||
print("Ignoring %s" % w)
|
|
||||||
continue
|
|
||||||
files.append(fol + '/' + w)
|
|
||||||
allFileList.extend(files)
|
|
||||||
assert(len(allFileList) == len(set(allFileList)))
|
|
||||||
|
|
||||||
fil = open(testingList, 'r')
|
|
||||||
testingList = fil.readlines()
|
|
||||||
testingList = [x.strip() for x in testingList]
|
|
||||||
fil.close()
|
|
||||||
fil = open(validationList, 'r')
|
|
||||||
validationList = fil.readlines()
|
|
||||||
validationList = [x.strip() for x in validationList]
|
|
||||||
originalLen = len(allFileList)
|
|
||||||
allFileList = set(allFileList) - set(validationList)
|
|
||||||
assert len(allFileList) < originalLen
|
|
||||||
assert originalLen == len(allFileList) + len(validationList)
|
|
||||||
originalLen = len(allFileList)
|
|
||||||
allFileList = set(allFileList) - set(testingList)
|
|
||||||
assert len(allFileList) < originalLen
|
|
||||||
assert originalLen == len(allFileList) + len(testingList)
|
|
||||||
|
|
||||||
trainingList = list(allFileList)
|
|
||||||
testingList = list(testingList)
|
|
||||||
validationList = list(validationList)
|
|
||||||
np.save(outPrefix + 'file_train.npy', trainingList)
|
|
||||||
np.save(outPrefix + 'file_test.npy', testingList)
|
|
||||||
np.save(outPrefix + 'file_val.npy', validationList)
|
|
||||||
|
|
||||||
|
|
||||||
def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen,
|
|
||||||
winstep):
|
|
||||||
'''
|
|
||||||
Reads audio from files specified in fileList, extracts features and assigns
|
|
||||||
labels to them.
|
|
||||||
|
|
||||||
fileList: List of audio file names.
|
|
||||||
LABELMAP: The label map to use.
|
|
||||||
maxlen: maximum length of the audio file. Every other
|
|
||||||
files is zero padded to maxlen
|
|
||||||
numFilt: number of filters to use in MFCC
|
|
||||||
samplerate: sample rate of the audio file. All files are
|
|
||||||
assumed to be of same sample rate
|
|
||||||
winLen: winLen to use for fbank in seconds
|
|
||||||
winstep: winstep for fbank in seconds
|
|
||||||
'''
|
|
||||||
def __extractFeatures(stackedWav, numSteps, numFilt,
|
|
||||||
samplerate, winlen, winstep):
|
|
||||||
'''
|
|
||||||
[number of waves, Len(wave)]
|
|
||||||
returns [number of waves, numSteps, numFilt]
|
|
||||||
All waves are assumed to be of fixed length
|
|
||||||
'''
|
|
||||||
assert stackedWav.ndim == 2, 'Should be [number of waves, len(wav)]'
|
|
||||||
extractedList = []
|
|
||||||
eps = 1e-10
|
|
||||||
for sample in stackedWav:
|
|
||||||
temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen,
|
|
||||||
winstep=winstep, nfilt=numFilt,
|
|
||||||
winfunc=np.hamming)
|
|
||||||
temp = np.log(temp + eps)
|
|
||||||
assert temp.ndim == 2, 'Should be [numSteps, numFilt]'
|
|
||||||
assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]'
|
|
||||||
extractedList.append(temp)
|
|
||||||
return np.array(extractedList)
|
|
||||||
|
|
||||||
fileList = np.array(fileList)
|
|
||||||
assert(fileList.ndim == 1)
|
|
||||||
allSamples = np.zeros((len(fileList), maxlen))
|
|
||||||
i = 0
|
|
||||||
for i,file in enumerate(fileList):
|
|
||||||
_, x = r.read(file)
|
|
||||||
assert(len(x) <= maxlen)
|
|
||||||
allSamples[i, maxlen - len(x):maxlen] += x
|
|
||||||
i += 1
|
|
||||||
assert allSamples.ndim == 2
|
|
||||||
winstepSamples = winstep * samplerate
|
|
||||||
winlenSamples = winlen * samplerate
|
|
||||||
assert(winstepSamples.is_integer())
|
|
||||||
assert(winlenSamples.is_integer())
|
|
||||||
numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1)
|
|
||||||
x = __extractFeatures(allSamples, numSteps, numFilt, samplerate, winlen,
|
|
||||||
winstep)
|
|
||||||
y_ = [t.split('/') for t in fileList]
|
|
||||||
y_ = [t[-2] for t in y_]
|
|
||||||
y = []
|
|
||||||
for t in y_:
|
|
||||||
assert t in LABELMAP
|
|
||||||
y.append(LABELMAP[t])
|
|
||||||
|
|
||||||
def to_onehot(indices, numClasses):
|
|
||||||
assert indices.ndim == 1
|
|
||||||
n = max(indices) + 1
|
|
||||||
assert numClasses <= n
|
|
||||||
b = np.zeros((len(indices), numClasses))
|
|
||||||
b[np.arange(len(indices)), indices] = 1
|
|
||||||
return b
|
|
||||||
y = to_onehot(np.array(y), np.max(y) + 1)
|
|
||||||
return x, y
|
|
||||||
|
|
||||||
if __name__=='__main__':
|
|
||||||
# ----------------------------------------- #
|
|
||||||
# Configuration
|
|
||||||
# ----------------------------------------- #
|
|
||||||
seed = 42
|
|
||||||
maxlen = 16000
|
|
||||||
numFilt = 32
|
|
||||||
samplerate = 16000
|
|
||||||
winlen = 0.025
|
|
||||||
winstep = 0.010
|
|
||||||
# 13 for google 13, 11 for google 12
|
|
||||||
numLabels = 13 # 0 not assigned
|
|
||||||
samplerate=16000
|
|
||||||
# For creation of training file list, testing file list
|
|
||||||
# and validation list.
|
|
||||||
audioFileDir = './GoogleSpeech/Raw/'
|
|
||||||
testingList = './GoogleSpeech/Raw/testing_list.txt'
|
|
||||||
validationList = './GoogleSpeech/Raw/validation_list.txt'
|
|
||||||
outDir = './GoogleSpeech/Extracted/'
|
|
||||||
# ----------------------------------------- #
|
|
||||||
np.random.seed(seed)
|
|
||||||
random.seed(seed)
|
|
||||||
assert(numLabels in [13, 11])
|
|
||||||
if numLabels == 13:
|
|
||||||
values = [LABELMAP13[x] for x in LABELMAP13]
|
|
||||||
values = set(values)
|
|
||||||
assert(len(values) == 13)
|
|
||||||
LABELMAP = LABELMAP13
|
|
||||||
if numLabels == 11:
|
|
||||||
values = [LABELMAP12[x] for x in LABELMAP12]
|
|
||||||
values = set(values)
|
|
||||||
assert(len(values) == 11)
|
|
||||||
LABELMAP = LABELMAP12
|
|
||||||
|
|
||||||
print("Peforming file creation")
|
|
||||||
createFileList(audioFileDir, testingList, validationList,
|
|
||||||
outDir, LABELMAP)
|
|
||||||
trainFileList = np.load(outDir + 'file_train.npy')
|
|
||||||
testFileList = np.load(outDir + 'file_test.npy')
|
|
||||||
valFileList = np.load(outDir + 'file_val.npy')
|
|
||||||
print("Number of train files:", len(trainFileList))
|
|
||||||
print("Number of test files", len(testFileList))
|
|
||||||
print("Number of val files", len(valFileList))
|
|
||||||
print("Performing feature extraction")
|
|
||||||
trainFileList_ = [audioFileDir + x for x in trainFileList]
|
|
||||||
valFileList_ = [audioFileDir + x for x in valFileList]
|
|
||||||
testFileList_ = [audioFileDir + x for x in testFileList]
|
|
||||||
x_test, y_test = extractFeatures(testFileList_, LABELMAP, maxlen, numFilt,
|
|
||||||
samplerate, winlen, winstep)
|
|
||||||
x_val, y_val = extractFeatures(valFileList_, LABELMAP, maxlen, numFilt,
|
|
||||||
samplerate, winlen, winstep)
|
|
||||||
x_train, y_train = extractFeatures(trainFileList_, LABELMAP, maxlen,
|
|
||||||
numFilt, samplerate, winlen, winstep)
|
|
||||||
np.save(outDir + 'x_train', x_train);np.save(outDir + 'y_train', y_train)
|
|
||||||
np.save(outDir + 'x_test', x_test);np.save(outDir + 'y_test', y_test)
|
|
||||||
np.save(outDir + 'x_val', x_val);np.save(outDir + 'y_val', y_val)
|
|
||||||
print("Shape train", x_train.shape, y_train.shape)
|
|
||||||
print("Shape test", x_test.shape, y_test.shape)
|
|
||||||
print("Shape val", x_val.shape, y_val.shape)
|
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче