Merge branch 'master' into josh-readme

This commit is contained in:
Josh Meyer 2019-03-19 23:51:11 +06:00 коммит произвёл GitHub
Родитель 19d9210051 141f9df15a
Коммит 3a157319e4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
81 изменённых файлов: 1983 добавлений и 242 удалений

Просмотреть файл

@ -2,11 +2,15 @@
set -xe
pip3 install -r <(grep -v tensorflow requirements.txt)
pip3 install tensorflow-gpu==1.13.0-rc2
apt-get install -y python3-venv
python3 -m venv /tmp/venv
source /tmp/venv/bin/activate
pip install -r <(grep -v tensorflow requirements.txt)
pip install tensorflow-gpu==1.13.0-rc2
# Install ds_ctcdecoder package from TaskCluster
pip3 install $(python3 util/taskcluster.py --decoder)
pip install $(python3 util/taskcluster.py --decoder)
mkdir -p ../keep/summaries
@ -15,7 +19,7 @@ fis="${data}/LDC/fisher"
swb="${data}/LDC/LDC97S62/swb"
lbs="${data}/OpenSLR/LibriSpeech/librivox"
python3 -u DeepSpeech.py \
python -u DeepSpeech.py \
--train_files "${fis}-train.csv","${swb}-train.csv","${lbs}-train-clean-100.csv","${lbs}-train-clean-360.csv","${lbs}-train-other-500.csv" \
--dev_files "${lbs}-dev-clean.csv"\
--test_files "${lbs}-test-clean.csv" \

Просмотреть файл

@ -26,6 +26,7 @@ tasks:
scopes: [
"queue:create-task:lowest:{{ taskcluster.docker.provisionerId }}/deepspeech-worker",
"queue:create-task:lowest:{{ taskcluster.docker.provisionerId }}/deepspeech-win",
"queue:create-task:lowest:{{ taskcluster.docker.provisionerId }}/deepspeech-kvm-worker",
"queue:create-task:lowest:deepspeech-provisioner/ds-macos-light",
"queue:create-task:lowest:deepspeech-provisioner/ds-scriptworker",

Просмотреть файл

@ -12,7 +12,6 @@ import evaluate
import numpy as np
import progressbar
import shutil
import tempfile
import tensorflow as tf
import traceback
@ -30,9 +29,9 @@ from util.text import Alphabet
#TODO: remove once fully switched to 1.13
try:
from tensorflow.contrib.lite.python import tflite_convert # 1.12
import tensorflow.lite as lite # 1.13
except ImportError:
from tensorflow.lite.python import tflite_convert # 1.13
import tensorflow.contrib.lite as lite # 1.12
# Graph Creation
@ -664,18 +663,23 @@ def test():
def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
batch_size = batch_size if batch_size > 0 else None
# Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input]
input_tensor = tf.placeholder(tf.float32, [batch_size, n_steps if n_steps > 0 else None, 2*Config.n_context+1, Config.n_input], name='input_node')
seq_length = tf.placeholder(tf.int32, [batch_size], name='input_lengths')
if not tflite:
previous_state_c = variable_on_worker_level('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
previous_state_h = variable_on_worker_level('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
if batch_size <= 0:
# no state management since n_step is expected to be dynamic too (see below)
previous_state = previous_state_c = previous_state_h = None
else:
previous_state_c = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c')
previous_state_h = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h')
if not tflite:
previous_state_c = variable_on_worker_level('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
previous_state_h = variable_on_worker_level('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
else:
previous_state_c = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c')
previous_state_h = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h')
previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h)
previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h)
no_dropout = [0.0] * 6
@ -696,9 +700,23 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
# Apply softmax for CTC decoder
logits = tf.nn.softmax(logits)
new_state_c, new_state_h = layers['rnn_output_state']
if batch_size <= 0:
if tflite:
raise NotImplementedError('dynamic batch_size does not support tflite nor streaming')
if n_steps > 0:
raise NotImplementedError('dynamic batch_size expect n_steps to be dynamic too')
return (
{
'input': input_tensor,
'input_lengths': seq_length,
},
{
'outputs': tf.identity(logits, name='logits'),
},
layers
)
# Initial zero state
new_state_c, new_state_h = layers['rnn_output_state']
if not tflite:
zero_state = tf.zeros([batch_size, Config.n_cell_dim], tf.float32)
initialize_c = tf.assign(previous_state_c, zero_state)
@ -749,7 +767,7 @@ def export():
tf.reset_default_graph()
session = tf.Session(config=Config.session_config)
inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=FLAGS.n_steps, tflite=FLAGS.export_tflite)
inputs, outputs, _ = create_inference_graph(batch_size=FLAGS.export_batch_size, n_steps=FLAGS.n_steps, tflite=FLAGS.export_tflite)
input_names = ",".join(tensor.op.name for tensor in inputs.values())
output_names_tensors = [ tensor.op.name for tensor in outputs.values() if isinstance(tensor, Tensor) ]
output_names_ops = [ tensor.name for tensor in outputs.values() if isinstance(tensor, Operation) ]
@ -785,7 +803,7 @@ def export():
os.makedirs(FLAGS.export_dir)
def do_graph_freeze(output_file=None, output_node_names=None, variables_blacklist=None):
freeze_graph.freeze_graph_with_def_protos(
return freeze_graph.freeze_graph_with_def_protos(
input_graph_def=session.graph_def,
input_saver_def=saver.as_saver_def(),
input_checkpoint=checkpoint_path,
@ -800,39 +818,16 @@ def export():
if not FLAGS.export_tflite:
do_graph_freeze(output_file=output_graph_path, output_node_names=output_names, variables_blacklist='previous_state_c,previous_state_h')
else:
temp_fd, temp_freeze = tempfile.mkstemp(dir=FLAGS.export_dir)
os.close(temp_fd)
do_graph_freeze(output_file=temp_freeze, output_node_names=output_names, variables_blacklist='')
frozen_graph = do_graph_freeze(output_node_names=output_names, variables_blacklist='')
output_tflite_path = os.path.join(FLAGS.export_dir, output_filename.replace('.pb', '.tflite'))
class TFLiteFlags():
def __init__(self):
self.graph_def_file = temp_freeze
self.inference_type = 'FLOAT'
self.input_arrays = input_names
self.input_shapes = input_shapes
self.output_arrays = output_names
self.output_file = output_tflite_path
self.output_format = 'TFLITE'
self.post_training_quantize = True
default_empty = [
'inference_input_type',
'mean_values',
'default_ranges_min', 'default_ranges_max',
'drop_control_dependency',
'reorder_across_fake_quant',
'change_concat_input_ranges',
'allow_custom_ops',
'converter_mode',
'dump_graphviz_dir',
'dump_graphviz_video'
]
for e in default_empty:
self.__dict__[e] = None
converter = lite.TFLiteConverter(frozen_graph, input_tensors=inputs.values(), output_tensors=outputs.values())
converter.post_training_quantize = True
tflite_model = converter.convert()
with open(output_tflite_path, 'wb') as fout:
fout.write(tflite_model)
flags = TFLiteFlags()
tflite_convert._convert_model(flags)
os.unlink(temp_freeze)
log_info('Exported model for TF Lite engine as {}'.format(os.path.basename(output_tflite_path)))
log_info('Models exported at %s' % (FLAGS.export_dir))
@ -857,7 +852,6 @@ def do_single_file_inference(input_file_path):
checkpoint_path = checkpoint.model_checkpoint_path
saver.restore(session, checkpoint_path)
session.run(outputs['initialize_state'])
features = audiofile_to_input_vector(input_file_path, Config.n_input, Config.n_context)

Просмотреть файл

@ -57,6 +57,7 @@ See the output of `deepspeech -h` for more information on the use of `deepspeech
* [Python 3.6](https://www.python.org/)
* [Git Large File Storage](https://git-lfs.github.com/)
* Mac or Linux environment
* Go to [build README](examples/net_framework/README.md) to start building DeepSpeech for Windows from source.
## Getting the code
@ -203,7 +204,7 @@ npm install deepspeech-gpu
See the [release notes](https://github.com/mozilla/DeepSpeech/releases) to find which GPUs are supported. Please ensure you have the required [CUDA dependency](#cuda-dependency).
See [nodejs_wav](examples/nodejs_wav) for an example of how to use the bindings.
See [client.js](native_client/javascript/client.js) for an example of how to use the bindings. Or download the [wav example](examples/nodejs_wav).
### Installing bindings from source

Просмотреть файл

@ -1 +1 @@
0.5.0-alpha.1
0.5.0-alpha.2

Просмотреть файл

@ -8,17 +8,17 @@ import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
import csv
import sox
import tarfile
import subprocess
import progressbar
from glob import glob
from os import path
from sox import Transformer
from threading import RLock
from multiprocessing.dummy import Pool
from multiprocessing import cpu_count
from util.text import validate_label
from util.downloader import maybe_download, SIMPLE_BAR
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
@ -66,7 +66,7 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
samples.append((row['filename'], row['text']))
# Mutable counters for the concurrent embedded routine
counter = { 'all': 0, 'too_short': 0, 'too_long': 0 }
counter = { 'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0 }
lock = RLock()
num_samples = len(samples)
rows = []
@ -78,9 +78,19 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
wav_filename = path.splitext(mp3_filename)[0] + ".wav"
_maybe_convert_wav(mp3_filename, wav_filename)
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
file_size = path.getsize(wav_filename)
file_size = -1
if path.exists(wav_filename):
file_size = path.getsize(wav_filename)
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
label = validate_label(sample[1])
with lock:
if int(frames/SAMPLE_RATE*1000/10/2) < len(str(sample[1])):
if file_size == -1:
# Excluding samples that failed upon conversion
counter['failed'] += 1
elif label is None:
# Excluding samples that failed on label validation
counter['invalid_label'] += 1
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
# Excluding samples that are too short to fit the transcript
counter['too_short'] += 1
elif frames/SAMPLE_RATE > MAX_SECS:
@ -88,7 +98,7 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
counter['too_long'] += 1
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, sample[1]))
rows.append((wav_filename, file_size, label))
counter['all'] += 1
print('Importing mp3 files...')
@ -108,7 +118,11 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
for filename, file_size, transcript in bar(rows):
writer.writerow({ 'wav_filename': filename, 'wav_filesize': file_size, 'transcript': transcript })
print('Imported %d samples.' % (counter['all'] - counter['too_short'] - counter['too_long']))
print('Imported %d samples.' % (counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long']))
if counter['failed'] > 0:
print('Skipped %d samples that failed upon conversion.' % counter['failed'])
if counter['invalid_label'] > 0:
print('Skipped %d samples that failed on transcript validation.' % counter['invalid_label'])
if counter['too_short'] > 0:
print('Skipped %d samples that were too short to match the transcript.' % counter['too_short'])
if counter['too_long'] > 0:
@ -116,9 +130,12 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
def _maybe_convert_wav(mp3_filename, wav_filename):
if not path.exists(wav_filename):
transformer = Transformer()
transformer = sox.Transformer()
transformer.convert(samplerate=SAMPLE_RATE)
transformer.build(mp3_filename, wav_filename)
try:
transformer.build(mp3_filename, wav_filename)
except sox.core.SoxError:
pass
if __name__ == "__main__":
_download_and_preprocess_data(sys.argv[1])

56
bin/import_cv2.py Normal file → Executable file
Просмотреть файл

@ -8,28 +8,29 @@ import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
import csv
import sox
import subprocess
import progressbar
from os import path
from sox import Transformer
from threading import RLock
from multiprocessing.dummy import Pool
from multiprocessing import cpu_count
from util.downloader import SIMPLE_BAR
from util.text import validate_label
'''
Broadly speaking, this script takes the audio downloaded from Common Voice
for a certain language, in addition to the *.tsv files output by CorporaCeator,
for a certain language, in addition to the *.tsv files output by CorporaCreator,
and the script formats the data and transcripts to be in a state usable by
DeepSpeech.py
Usage:
$ python3 import_cv2.py /path/to/audio/data_dir /path/to/tsv_dir
Input:
Input:
(1) audio_dir (string) path to dir of audio downloaded from Common Voice
(2) tsv_dir (string) path to dir containing {train,test,dev}.tsv files
(2) tsv_dir (string) path to dir containing {train,test,dev}.tsv files
which were generated by CorporaCreator
Ouput:
@ -53,30 +54,41 @@ def _preprocess_data(audio_dir, tsv_dir):
def _maybe_convert_set(audio_dir, input_tsv):
output_csv = path.join(audio_dir,os.path.split(input_tsv)[-1].replace('tsv', 'csv'))
print("Saving new DeepSpeech-formatted CSV file to: ", output_csv)
# Get audiofile path and transcript for each sentence in tsv
samples = []
with open(input_tsv) as input_tsv_file:
reader = csv.DictReader(input_tsv_file, delimiter='\t')
for row in reader:
samples.append((row['path'], row['sentence']))
# Keep track of how many samples are good vs. problematic
counter = { 'all': 0, 'too_short': 0, 'too_long': 0 }
counter = { 'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0 }
lock = RLock()
num_samples = len(samples)
rows = []
def one_sample(sample):
""" Take a audio file, and optionally convert it to 16kHz WAV """
mp3_filename = path.join(audio_dir, sample[0])
if not path.splitext(mp3_filename.lower())[1] == '.mp3':
mp3_filename += ".mp3"
# Storing wav files next to the mp3 ones - just with a different suffix
wav_filename = path.splitext(mp3_filename)[0] + ".wav"
_maybe_convert_wav(mp3_filename, wav_filename)
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
file_size = path.getsize(wav_filename)
file_size = -1
if path.exists(wav_filename):
file_size = path.getsize(wav_filename)
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
label = validate_label(sample[1])
with lock:
if int(frames/SAMPLE_RATE*1000/10/2) < len(str(sample[1])):
if file_size == -1:
# Excluding samples that failed upon conversion
counter['failed'] += 1
elif label is None:
# Excluding samples that failed on label validation
counter['invalid_label'] += 1
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
# Excluding samples that are too short to fit the transcript
counter['too_short'] += 1
elif frames/SAMPLE_RATE > MAX_SECS:
@ -84,9 +96,9 @@ def _maybe_convert_set(audio_dir, input_tsv):
counter['too_long'] += 1
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, sample[1]))
rows.append((wav_filename, file_size, label))
counter['all'] += 1
print("Importing mp3 files...")
pool = Pool(cpu_count())
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
@ -95,7 +107,7 @@ def _maybe_convert_set(audio_dir, input_tsv):
bar.update(num_samples)
pool.close()
pool.join()
with open(output_csv, 'w') as output_csv_file:
print('Writing CSV file for DeepSpeech.py as: ', output_csv)
writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
@ -103,8 +115,12 @@ def _maybe_convert_set(audio_dir, input_tsv):
bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
for filename, file_size, transcript in bar(rows):
writer.writerow({ 'wav_filename': filename, 'wav_filesize': file_size, 'transcript': transcript })
print('Imported %d samples.' % (counter['all'] - counter['too_short'] - counter['too_long']))
print('Imported %d samples.' % (counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long']))
if counter['failed'] > 0:
print('Skipped %d samples that failed upon conversion.' % counter['failed'])
if counter['invalid_label'] > 0:
print('Skipped %d samples that failed on transcript validation.' % counter['invalid_label'])
if counter['too_short'] > 0:
print('Skipped %d samples that were too short to match the transcript.' % counter['too_short'])
if counter['too_long'] > 0:
@ -112,9 +128,13 @@ def _maybe_convert_set(audio_dir, input_tsv):
def _maybe_convert_wav(mp3_filename, wav_filename):
if not path.exists(wav_filename):
transformer = Transformer()
transformer = sox.Transformer()
transformer.convert(samplerate=SAMPLE_RATE)
transformer.build(mp3_filename, wav_filename)
try:
transformer.build(mp3_filename, wav_filename)
except sox.core.SoxError:
pass
if __name__ == "__main__":
audio_dir = sys.argv[1]

Просмотреть файл

@ -23,7 +23,7 @@ python -u DeepSpeech.py \
--train_batch_size 1 \
--dev_batch_size 1 \
--test_batch_size 1 \
--n_hidden 494 \
--epoch 75 \
--n_hidden 100 \
--epoch 200 \
--checkpoint_dir "$checkpoint_dir" \
"$@"

9
data/README.md Normal file
Просмотреть файл

@ -0,0 +1,9 @@
# Language-Specific Data
This directory contains language-specific data files. Most importantly, you will find here:
1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt`
2. A binary n-gram language model compiled by `kenlm` in `data/lm/lm.binary`
3. A trie model compiled by `generate_trie.cpp` in `data/lm/trie`
For more information on how to create these resources, see `data/lm/README.md`

Просмотреть файл

@ -20,10 +20,49 @@ sudo apt-get install ffmpeg
Here is an example for a local audio file:
```bash
node ./index.js --audio <AUDIO_FILE> --model $HOME/models/output_graph.pbmm --alphabet $HOME/models/alphabet.txt
node ./index.js --audio <AUDIO_FILE> \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
Here is an example for a remote RTMP-Stream:
```bash
node ./index.js --audio rtmp://<IP>:1935/live/teststream --model $HOME/models/output_graph.pbmm --alphabet $HOME/models/alphabet.txt
node ./index.js --audio rtmp://<IP>:1935/live/teststream \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
## Examples
Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)).
```bash
node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
```bash
node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
```bash
node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
Real time streaming inference in combination with a RTMP server.
```bash
node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
## Notes
To get the best result mapped on to your own scenario, it might be helpful to adjust the parameters `VAD_MODE` and `DEBUNCE_TIME`.

Просмотреть файл

@ -4,11 +4,12 @@ const VAD = require("node-vad");
const Ds = require('deepspeech');
const argparse = require('argparse');
const util = require('util');
const { spawn } = require('child_process');
// These constants control the beam search decoder
// Beam width used in the CTC decoder when building candidate transcriptions
const BEAM_WIDTH = 1024;
const BEAM_WIDTH = 500;
// The alpha hyperparameter of the CTC decoder. Language Model weight
const LM_ALPHA = 0.75;
@ -44,7 +45,7 @@ parser.addArgument(['--model'], {required: true, help: 'Path to the model (proto
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'});
let args = parser.parseArgs();
@ -67,51 +68,71 @@ if (args['lm'] && args['trie']) {
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
}
const vad = new VAD(VAD.Mode.NORMAL);
const voice = {START: true, STOP: false};
let sctx = model.setupStream(150, 16000);
let state = voice.STOP;
// Default initial allocation = 3 seconds := 150
const PRE_ALLOC_FRAMES = 150;
// Default is 16kHz
const AUDIO_SAMPLE_RATE = 16000;
// Defines different thresholds for voice detection
// NORMAL: Suitable for high bitrate, low-noise data. May classify noise as voice, too.
// LOW_BITRATE: Detection mode optimised for low-bitrate audio.
// AGGRESSIVE: Detection mode best suited for somewhat noisy, lower quality audio.
// VERY_AGGRESSIVE: Detection mode with lowest miss-rate. Works well for most inputs.
const VAD_MODE = VAD.Mode.NORMAL;
// const VAD_MODE = VAD.Mode.LOW_BITRATE;
// const VAD_MODE = VAD.Mode.AGGRESSIVE;
// const VAD_MODE = VAD.Mode.VERY_AGGRESSIVE;
// Time in milliseconds for debouncing speech active state
const DEBOUNCE_TIME = 20;
// Create voice activity stream
const VAD_STREAM = VAD.createStream({
mode: VAD_MODE,
audioFrequency: AUDIO_SAMPLE_RATE,
debounceTime: DEBOUNCE_TIME
});
// Spawn ffmpeg process
const ffmpeg = spawn('ffmpeg', [
'-hide_banner',
'-nostats',
'-loglevel', 'fatal',
'-i', args['audio'],
'-vn',
'-acodec', 'pcm_s16le',
'-ac', 1,
'-ar', AUDIO_SAMPLE_RATE,
'-f', 's16le',
'pipe:'
]);
let audioLength = 0;
let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
function finishStream() {
const model_load_start = process.hrtime();
console.error('Running inference.');
console.log('Transcription: ', model.finishStream(sctx));
const model_load_end = process.hrtime(model_load_start);
console.error('Inference took %ds.', totalTime(model_load_end));
console.error('Inference took %ds for %ds audio file.', totalTime(model_load_end), audioLength.toPrecision(4));
audioLength = 0;
}
let ffmpeg = require('child_process').spawn('ffmpeg', [
'-hide_banner',
'-nostats',
'-loglevel', 'fatal',
'-i', args['audio'],
'-af', 'highpass=f=200,lowpass=f=3000',
'-vn',
'-acodec', 'pcm_s16le',
'-ac', 1,
'-ar', 16000,
'-f', 's16le',
'pipe:'
]);
ffmpeg.stdout.on('data', chunk => {
vad.processAudio(chunk, 16000).then(res => {
switch (res) {
case VAD.Event.SILENCE:
if (state === voice.START) {
state = voice.STOP;
finishStream();
sctx = model.setupStream(150,16000);
}
break;
case VAD.Event.VOICE:
state = voice.START;
model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
break;
}
});
});
ffmpeg.stdout.on('close', code => {
function intermediateDecode() {
finishStream();
});
sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
}
function feedAudioContent(chunk) {
audioLength += (chunk.length / 2) * ( 1 / AUDIO_SAMPLE_RATE);
model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
}
function processVad(data) {
if (data.speech.start||data.speech.state) feedAudioContent(data.audioData)
else if (data.speech.end) { feedAudioContent(data.audioData); intermediateDecode() }
}
ffmpeg.stdout.pipe(VAD_STREAM).on('data', processVad);

86
examples/mic_vad_streaming/mic_vad_streaming.py Normal file → Executable file
Просмотреть файл

@ -1,12 +1,13 @@
import time, logging
from datetime import datetime
import threading, collections, queue, os, os.path
import wave
import pyaudio
import webrtcvad
from halo import Halo
import deepspeech
import numpy as np
import pyaudio
import wave
import webrtcvad
from halo import Halo
from scipy import signal
logging.basicConfig(level=20)
@ -14,28 +15,61 @@ class Audio(object):
"""Streams raw audio from microphone. Data is received in a separate thread, and stored in a buffer, to be read from."""
FORMAT = pyaudio.paInt16
RATE = 16000
# Network/VAD rate-space
RATE_PROCESS = 16000
CHANNELS = 1
BLOCKS_PER_SECOND = 50
BLOCK_SIZE = int(RATE / float(BLOCKS_PER_SECOND))
def __init__(self, callback=None):
def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS):
def proxy_callback(in_data, frame_count, time_info, status):
callback(in_data)
return (None, pyaudio.paContinue)
if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data)
self.buffer_queue = queue.Queue()
self.sample_rate = self.RATE
self.block_size = self.BLOCK_SIZE
self.device = device
self.input_rate = input_rate
self.sample_rate = self.RATE_PROCESS
self.block_size = int(self.RATE_PROCESS / float(self.BLOCKS_PER_SECOND))
self.block_size_input = int(self.input_rate / float(self.BLOCKS_PER_SECOND))
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.block_size,
stream_callback=proxy_callback)
kwargs = {
'format': self.FORMAT,
'channels': self.CHANNELS,
'rate': self.input_rate,
'input': True,
'frames_per_buffer': self.block_size_input,
'stream_callback': proxy_callback,
}
# if not default device
if self.device:
kwargs['input_device_index'] = self.device
self.stream = self.pa.open(**kwargs)
self.stream.start_stream()
def resample(self, data, input_rate):
"""
Microphone may not support our native processing sampling rate, so
resample from input_rate to RATE_PROCESS here for webrtcvad and
deepspeech
Args:
data (binary): Input audio stream
input_rate (int): Input audio rate to resample from
"""
data16 = np.fromstring(string=data, dtype=np.int16)
resample_size = int(len(data16) / self.input_rate * self.RATE_PROCESS)
resample = signal.resample(data16, resample_size)
resample16 = np.array(resample, dtype=np.int16)
return resample16.tostring()
def read_resampled(self):
"""Return a block of audio data resampled to 16000hz, blocking if necessary."""
return self.resample(data=self.buffer_queue.get(),
input_rate=self.input_rate)
def read(self):
"""Return a block of audio data, blocking if necessary."""
return self.buffer_queue.get()
@ -58,17 +92,22 @@ class Audio(object):
wf.writeframes(data)
wf.close()
class VADAudio(Audio):
"""Filter & segment audio with voice activity detection."""
def __init__(self, aggressiveness=3):
super().__init__()
def __init__(self, aggressiveness=3, device=None, input_rate=None):
super().__init__(device=device, input_rate=input_rate)
self.vad = webrtcvad.Vad(aggressiveness)
def frame_generator(self):
"""Generator that yields all audio frames from microphone."""
while True:
yield self.read()
if self.input_rate == self.RATE_PROCESS:
while True:
yield self.read()
else:
while True:
yield self.read_resampled()
def vad_collector(self, padding_ms=300, ratio=0.75, frames=None):
"""Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None.
@ -121,7 +160,9 @@ def main(ARGS):
model.enableDecoderWithLM(ARGS.alphabet, ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta)
# Start audio with VAD
vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness)
vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
device=ARGS.device,
input_rate=ARGS.rate)
print("Listening (ctrl-C to exit)...")
frames = vad_audio.vad_collector()
@ -148,6 +189,7 @@ def main(ARGS):
if __name__ == '__main__':
BEAM_WIDTH = 500
DEFAULT_SAMPLE_RATE = 16000
LM_ALPHA = 0.75
LM_BETA = 1.85
N_FEATURES = 26
@ -171,6 +213,10 @@ if __name__ == '__main__':
help="Path to the language model binary file. Default: lm.binary")
parser.add_argument('-t', '--trie', default='trie',
help="Path to the language model trie file created with native_client/generate_trie. Default: trie")
parser.add_argument('-d', '--device', type=int, default=None,
help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device()")
parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
parser.add_argument('-nf', '--n_features', type=int, default=N_FEATURES,
help=f"Number of MFCC features to use. Default: {N_FEATURES}")
parser.add_argument('-nc', '--n_context', type=int, default=N_CONTEXT,

Просмотреть файл

@ -11,34 +11,20 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSp
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|Any CPU
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|Any CPU
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|Any CPU.Build.0 = Release|Any CPU
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|x64
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|x64
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.ActiveCfg = Release|x64
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.Build.0 = Release|x64
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.ActiveCfg = Debug|x64
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.Build.0 = Debug|x64
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|Any CPU.Build.0 = Release|Any CPU
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.ActiveCfg = Release|x64
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.Build.0 = Release|x64
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|x64.ActiveCfg = Debug|x64
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|x64.Build.0 = Debug|x64
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|Any CPU.Build.0 = Release|Any CPU
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|x64.ActiveCfg = Release|x64
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|x64.Build.0 = Release|x64
EndGlobalSection

Просмотреть файл

@ -10,7 +10,7 @@ namespace DeepSpeechClient
/// <summary>
/// Client of the Mozilla's deepspeech implementation.
/// </summary>
public class DeepSpeech : IDeepSpeech, IDisposable
public class DeepSpeech : IDeepSpeech
{
private unsafe ModelState** _modelStatePP;
private unsafe ModelState* _modelStateP;

Просмотреть файл

@ -1,32 +1,91 @@
namespace DeepSpeechClient.Interfaces
using System;
namespace DeepSpeechClient.Interfaces
{
public interface IDeepSpeech
/// <summary>
/// Client interface of the Mozilla's deepspeech implementation.
/// </summary>
public interface IDeepSpeech : IDisposable
{
/// <summary>
/// Prints the versions of Tensorflow and DeepSpeech.
/// </summary>
void PrintVersions();
/// <summary>
/// Create an object providing an interface to a trained DeepSpeech model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
/// <param name="aNContext">The context window the model was trained with.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <returns>Zero on success, non-zero on failure.</returns>
unsafe int CreateModel(string aModelPath, uint aNCep,
uint aNContext,
string aAlphabetConfigPath,
uint aBeamWidth);
/// <summary>
/// Enable decoding using beam scoring with a KenLM language model.
/// </summary>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aLMPath">The path to the language model binary file.</param>
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <returns>Zero on success, non-zero on failure (invalid arguments).</returns>
unsafe int EnableDecoderWithLM(string aAlphabetConfigPath,
string aLMPath,
string aTriePath,
float aLMAlpha,
float aLMBeta);
/// <summary>
/// Use the DeepSpeech model to perform Speech-To-Text.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <param name="aSampleRate">The sample-rate of the audio signal.</param>
/// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns>
unsafe string SpeechToText(short[] aBuffer,
uint aBufferSize,
uint aSampleRate);
/// <summary>
/// Destroy a streaming state without decoding the computed logits.
/// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
/// </summary>
unsafe void DiscardStream();
/// <summary>
/// Creates a new streaming inference state.
/// </summary>
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
/// One timestep is equivalent to two window lengths(20ms).
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <returns>Zero for success, non-zero on failure</returns>
unsafe int SetupStream(uint aPreAllocFrames, uint aSampleRate);
/// <summary>
/// Feeds audio samples to an ongoing streaming inference.
/// </summary>
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate.</param>
unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize);
/// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference. This is an expensive process as the decoder implementation isn't
/// currently capable of streaming, so it always starts from the beginning of the audio.
/// </summary>
/// <returns>The STT intermediate result. The user is responsible for freeing the string.</returns>
unsafe string IntermediateDecode();
/// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary>
/// <returns>The STT result. The user is responsible for freeing the string.</returns>
unsafe string FinishStream();
}
}

Просмотреть файл

@ -1,4 +1,5 @@
using DeepSpeechClient;
using DeepSpeechClient.Interfaces;
using NAudio.Wave;
using System;
using System.Collections.Generic;
@ -43,7 +44,7 @@ namespace CSharpExamples
Stopwatch stopwatch = new Stopwatch();
using (DeepSpeech sttClient = new DeepSpeech())
using (IDeepSpeech sttClient = new DeepSpeech())
{
var result = 1;
Console.WriteLine("Loading model...");
@ -109,7 +110,6 @@ namespace CSharpExamples
Console.WriteLine("Error loding the model.");
}
}
Console.ReadKey();
}
}
}

Просмотреть файл

@ -2,6 +2,7 @@
using CSCore.CoreAudioAPI;
using CSCore.SoundIn;
using CSCore.Streams;
using DeepSpeechClient.Interfaces;
using Microsoft.Win32;
using System;
using System.Collections.Concurrent;
@ -19,7 +20,7 @@ namespace DeepSpeechWPF
/// </summary>
public partial class MainWindow : Window
{
private readonly DeepSpeechClient.DeepSpeech _sttClient;
private readonly IDeepSpeech _sttClient;
private const uint N_CEP = 26;
private const uint N_CONTEXT = 9;

Просмотреть файл

Просмотреть файл

@ -0,0 +1,9 @@
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<NativeLibs Include="$(MSBuildThisFileDirectory)\*.so" />
<None Include="@(NativeLibs)">
<Link>%(FileName)%(Extension)</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,21 @@
<?xml version="1.0"?>
<package>
<metadata>
<id>$NUPKG_ID</id>
<version>$NUPKG_VERSION</version>
<title>DeepSpeech</title>
<authors>Mozilla</authors>
<owners>Mozilla</owners>
<license type="expression">MPL-2.0</license>
<projectUrl>http://github.com/mozilla/DeepSpeech</projectUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>A library for running inference with a DeepSpeech model</description>
<copyright>Copyright (c) 2019 Mozilla Corporation</copyright>
<tags>native speech speech_recognition</tags>
</metadata>
<files>
<file src="build\**" target="build/"/>
<file src="lib\**" target="lib/"/>
<file src="tools\**" target="tools/"/>
</files>
</package>

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,134 @@
# Building DeepSpeech native client for Windows
Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the `native_client`.
**Table of Contents**
- [Prerequisites](#prerequisites)
- [Getting the code](#getting-the-code)
- [Configuring the paths](#configuring-the-paths)
- [Adding environment variables](#adding-environment-variables)
- [MSYS2 paths](#msys2-paths)
- [BAZEL path](#bazel-path)
- [Python path](#python-path)
- [CUDA paths](#cuda-paths)
- [Building the native_client](#building-the-native_client)
- [Build for CPU](#cpu)
- [Build with CUDA support](#gpu-with-cuda)
- [Using the generated library](#using-the-generated-library)
## Prerequisites
* [Python 3.6](https://www.python.org/)
* [Git Large File Storage](https://git-lfs.github.com/)
* [MSYS2(x86_64)](https://www.msys2.org/)
* [Bazel v0.17.2](https://github.com/bazelbuild/bazel/releases)
* [Windows 10 SDK](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk)
* Windows 10
* [Visual Studio 2017 Community](https://visualstudio.microsoft.com/vs/community/)
Inside the Visual Studio Installer enable `MS Build Tools` and `VC++ 2015.3 v14.00 (v140) toolset for desktop`.
If you want to enable CUDA support you need to install:
* [CUDA 9.0 and cuDNN 7.3.1](https://developer.nvidia.com/cuda-90-download-archive)
It may compile with other versions, as we don't extensively test other versions, we highly recommend sticking to the recommended versions in order to avoid compilation errors caused by incompatible versions.
## Getting the code
We need to clone `mozilla/DeepSpeech` and `mozilla/tensorflow`.
```bash
git clone https://github.com/mozilla/DeepSpeech
```
```bash
git clone https://github.com/mozilla/tensorflow
```
## Configuring the paths
We need to create a symbolic link, for this example let's suppose that we cloned into `D:\cloned` and now the structure looks like:
.
├── D:\
│ ├── cloned # Contains DeepSpeech and tensorflow side by side
│ │ ├── DeepSpeech # Root of the cloned DeepSpeech
│ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow
└── ...
Change your path accordingly to your path structure, for the structure above we are going to use the following command:
```bash
mklink /d "D:\cloned\tensorflow\native_client" "D:\cloned\DeepSpeech\native_client"
```
## Adding environment variables
After you have installed the requirements there are few environment variables that we need to add to our `PATH` variable of the system variables.
#### MSYS2 paths
For MSYS2 we need to add `bin` directory, if you installed in the default route the path that we need to add should looks like `C:\msys64\usr\bin`. Now we can run `pacman`:
```bash
pacman -Syu
```
```bash
pacman -Su
```
```bash
pacman -S patch unzip
```
#### BAZEL path
For BAZEL we need to add the path to the executable, make sure you rename the executable to `bazel`.
To check the version installed you can run:
```bash
bazel version
```
#### PYTHON path
Add your `python.exe` path to the `PATH` variable.
#### CUDA paths
If you run CUDA enabled `native_client` we need to add the following to the `PATH` variable.
```
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\bin
```
### Building the native_client
There's one last command to run before building, you need to run the [configure.py](https://github.com/mozilla/tensorflow/blob/master/configure.py) inside `tensorflow` cloned directory.
At this point we are ready to start building the `native_client`, go to `tensorflow` directory that you cloned, following our examples should be `D:\cloned\tensorflow`.
#### CPU
We will add AVX/AVX2 support in the command, please make sure that your CPU supports these instructions before adding the flags, if not you can remove them.
```bash
bazel build -c opt --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so
```
#### GPU with CUDA
If you enabled CUDA in [configure.py](https://github.com/mozilla/tensorflow/blob/master/configure.py) configuration command now you can add `--config=cuda` to compile with CUDA support.
```bash
bazel build -c opt --config=cuda --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so
```
Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally you should see it stops and shows the path to the generated `libdeepspeech.so`.
## Using the generated library
As for now we can only use the generated `libdeepspeech.so` with the C# clients, go to [DeepSpeech/examples/net_framework/CSharpExamples/](https://github.com/mozilla/DeepSpeech/tree/master/examples/net_framework/CSharpExamples) in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy `libdeepspeech.so` to the generated `x64/Debug` or `x64/Release` directory.

Просмотреть файл

@ -0,0 +1,59 @@
# NodeJS voice recognition example using Mozilla DeepSpeech
Download the pre-trained model (1.8GB):
```
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz
tar xvfz deepspeech-0.4.1-models.tar.gz
```
Edit references to models path if necessary:
```
let modelPath = './models/output_graph.pbmm';
let alphabetPath = './models/alphabet.txt';
let lmPath = './models/lm.binary';
let triePath = './models/trie';
```
Install Sox (for .wav file loading):
```
brew install sox
```
Download test audio files:
```
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz
tar xfvz audio-0.4.1.tar.gz
```
Install NPM dependencies:
```
npm install
```
Run:
```
node index.js
```
Result should be something like:
```
audio length 1.975
result: experience proves this
```
Try other wav files with an argument:
```
node index.js audio/2830-3980-0043.wav
node index.js audio/8455-210777-0068.wav
node index.js audio/4507-16021-0012.wav
```

Просмотреть файл

@ -0,0 +1,72 @@
const DeepSpeech = require('deepspeech');
const Fs = require('fs');
const Sox = require('sox-stream');
const MemoryStream = require('memory-stream');
const Duplex = require('stream').Duplex;
const Wav = require('node-wav');
const BEAM_WIDTH = 1024;
const N_FEATURES = 26;
const N_CONTEXT = 9;
let modelPath = './models/output_graph.pbmm';
let alphabetPath = './models/alphabet.txt';
let model = new DeepSpeech.Model(modelPath, N_FEATURES, N_CONTEXT, alphabetPath, BEAM_WIDTH);
const LM_ALPHA = 0.75;
const LM_BETA = 1.85;
let lmPath = './models/lm.binary';
let triePath = './models/trie';
model.enableDecoderWithLM(alphabetPath, lmPath, triePath, LM_ALPHA, LM_BETA);
let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';
if (!Fs.existsSync(audioFile)) {
console.log('file missing:', audioFile);
process.exit();
}
const buffer = Fs.readFileSync(audioFile);
const result = Wav.decode(buffer);
if (result.sampleRate < 16000) {
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
}
function bufferToStream(buffer) {
let stream = new Duplex();
stream.push(buffer);
stream.push(null);
return stream;
}
let audioStream = new MemoryStream();
bufferToStream(buffer).
pipe(Sox({
global: {
'no-dither': true,
},
output: {
bits: 16,
rate: 16000,
channels: 1,
encoding: 'signed-integer',
endian: 'little',
compression: 0.0,
type: 'raw'
}
})).
pipe(audioStream);
audioStream.on('finish', () => {
let audioBuffer = audioStream.toBuffer();
const audioLength = (audioBuffer.length / 2) * ( 1 / 16000);
console.log('audio length', audioLength);
let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000);
console.log('result:', result);
});

Просмотреть файл

@ -0,0 +1,17 @@
{
"name": "deepspeech-nodejs_wav",
"version": "1.0.0",
"description": "Simple audio processing",
"main": "index.js",
"scripts": {
"start": "node ./index.js"
},
"dependencies": {
"argparse": "^1.0.10",
"deepspeech": "^0.4.1",
"node-wav": "0.0.2",
"sox-stream": "^2.0.3",
"util": "^0.11.1"
},
"license": "Public domain"
}

Просмотреть файл

@ -13,25 +13,25 @@
include definitions.mk
default: deepspeech
default: $(DEEPSPEECH_BIN)
clean:
clean: bindings-clean
rm -f deepspeech
deepspeech: client.cc
$(CXX) -std=c++11 -o deepspeech $(CFLAGS) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS)
$(DEEPSPEECH_BIN): client.cc
$(CXX) $(CFLAGS) $(CFLAGS_DEEPSPEECH) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS)
ifeq ($(OS),Darwin)
install_name_tool -change $$TASKCLUSTER_TASK_DIR/homebrew/opt/sox/lib/libsox.3.dylib @rpath/libsox.3.dylib deepspeech
install_name_tool -change bazel-out/local-opt/bin/native_client/libdeepspeech.so @rpath/libdeepspeech.so deepspeech
endif
run: deepspeech
run: $(DEEPSPEECH_BIN)
${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./deepspeech ${ARGS}
debug: deepspeech
debug: $(DEEPSPEECH_BIN)
${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} gdb --args ./deepspeech ${ARGS}
install: deepspeech
install: $(DEEPSPEECH_BIN)
install -d ${PREFIX}/lib
install -m 0644 ${TFDIR}/bazel-bin/native_client/libdeepspeech.so ${PREFIX}/lib/
install -d ${PREFIX}/bin

Просмотреть файл

@ -1,7 +1,11 @@
#ifndef __ARGS_H__
#define __ARGS_H__
#if defined(_MSC_VER)
#include "getopt_win.h"
#else
#include <getopt.h>
#endif
#include <iostream>
#include "deepspeech.h"

Просмотреть файл

@ -2,22 +2,33 @@
#include <stdio.h>
#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <math.h>
#include <string.h>
#ifndef __ANDROID__
#include <sox.h>
#endif // __ANDROID__
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sstream>
#include <string>
#if defined(__ANDROID__) || defined(_MSC_VER)
#define NO_SOX
#endif
#if defined(_MSC_VER)
#define NO_DIR
#endif
#ifndef NO_SOX
#include <sox.h>
#endif
#ifndef NO_DIR
#include <dirent.h>
#include <unistd.h>
#endif // NO_DIR
#include "deepspeech.h"
#include "args.h"
@ -61,7 +72,7 @@ GetAudioBuffer(const char* path)
{
ds_audio_buffer res = {0};
#ifndef __ANDROID__
#ifndef NO_SOX
sox_format_t* input = sox_open_read(path, NULL, NULL, NULL);
assert(input);
@ -150,9 +161,9 @@ GetAudioBuffer(const char* path)
// Close sox handles
sox_close(output);
sox_close(input);
#endif // __ANDROID__
#endif // NO_SOX
#ifdef __ANDROID__
#ifdef NO_SOX
// FIXME: Hack and support only 16kHz mono 16-bits PCM
FILE* wave = fopen(path, "r");
@ -160,19 +171,15 @@ GetAudioBuffer(const char* path)
unsigned short audio_format;
fseek(wave, 20, SEEK_SET); rv = fread(&audio_format, 2, 1, wave);
assert(rv == 2);
unsigned short num_channels;
fseek(wave, 22, SEEK_SET); rv = fread(&num_channels, 2, 1, wave);
assert(rv == 2);
unsigned int sample_rate;
fseek(wave, 24, SEEK_SET); rv = fread(&sample_rate, 4, 1, wave);
assert(rv == 2);
unsigned short bits_per_sample;
fseek(wave, 34, SEEK_SET); rv = fread(&bits_per_sample, 2, 1, wave);
assert(rv == 2);
assert(audio_format == 1); // 1 is PCM
assert(num_channels == 1); // MONO
@ -185,16 +192,14 @@ GetAudioBuffer(const char* path)
fprintf(stderr, "bits_per_sample=%d\n", bits_per_sample);
fseek(wave, 40, SEEK_SET); rv = fread(&res.buffer_size, 4, 1, wave);
assert(rv == 2);
fprintf(stderr, "res.buffer_size=%ld\n", res.buffer_size);
fseek(wave, 44, SEEK_SET);
res.buffer = (char*)malloc(sizeof(char) * res.buffer_size);
rv = fread(res.buffer, sizeof(char), res.buffer_size, wave);
assert(rv == res.buffer_size);
fclose(wave);
#endif // __ANDROID__
#endif // NO_SOX
#ifdef __APPLE__
res.buffer_size = (size_t)(output->olength * 2);
@ -261,8 +266,10 @@ main(int argc, char **argv)
}
}
#ifndef NO_SOX
// Initialise SOX
assert(sox_init() == SOX_SUCCESS);
#endif
struct stat wav_info;
if (0 != stat(audio, &wav_info)) {
@ -270,11 +277,14 @@ main(int argc, char **argv)
}
switch (wav_info.st_mode & S_IFMT) {
#ifndef _MSC_VER
case S_IFLNK:
#endif
case S_IFREG:
ProcessFile(ctx, audio, show_times);
break;
#ifndef NO_DIR
case S_IFDIR:
{
printf("Running on directory %s\n", audio);
@ -297,16 +307,17 @@ main(int argc, char **argv)
closedir(wav_dir);
}
break;
#endif
default:
printf("Unexpected type for %s: %d\n", audio, (wav_info.st_mode & S_IFMT));
break;
}
#ifndef __ANDROID__
#ifndef NO_SOX
// Deinitialise and quit
sox_quit();
#endif // __ANDROID__
#endif // NO_SOX
DS_DestroyModel(ctx);

Просмотреть файл

@ -5,6 +5,17 @@ TFDIR ?= $(abspath $(NC_DIR)/../../tensorflow)
PREFIX ?= /usr/local
SO_SEARCH ?= $(TFDIR)/bazel-bin/
TOOL_AS := as
TOOL_CC := gcc
TOOL_CXX := c++
TOOL_LD := ld
TOOL_LDD := ldd
DEEPSPEECH_BIN := deepspeech
CFLAGS_DEEPSPEECH := -std=c++11 -o $(DEEPSPEECH_BIN)
LINK_DEEPSPEECH := -ldeepspeech
LINK_PATH_DEEPSPEECH := -L${TFDIR}/bazel-bin/native_client
ifeq ($(TARGET),host)
TOOLCHAIN :=
CFLAGS :=
@ -18,6 +29,19 @@ PYTHON_PLATFORM_NAME := --plat-name manylinux1_x86_64
endif
endif
ifeq ($(TARGET),host-win)
DEEPSPEECH_BIN := deepspeech.exe
TOOLCHAIN := '$(VCINSTALLDIR)\bin\amd64\'
TOOL_CC := cl.exe
TOOL_CXX := cl.exe
TOOL_LD := link.exe
LINK_DEEPSPEECH := $(TFDIR)\bazel-bin\native_client\libdeepspeech.so.if.lib
LINK_PATH_DEEPSPEECH :=
CFLAGS_DEEPSPEECH := -nologo -Fe$(DEEPSPEECH_BIN)
SOX_CFLAGS :=
SOX_LDFLAGS :=
endif
ifeq ($(TARGET),rpi3)
TOOLCHAIN ?= ${TFDIR}/bazel-$(shell basename "${TFDIR}")/external/LinaroArmGcc72/bin/arm-linux-gnueabihf-
RASPBIAN ?= $(abspath $(NC_DIR)/../multistrap-raspbian-stretch)
@ -72,15 +96,15 @@ endif
CFLAGS += $(EXTRA_CFLAGS)
CXXFLAGS += $(EXTRA_CXXFLAGS)
LIBS := -ldeepspeech $(EXTRA_LIBS)
LDFLAGS_DIRS := -L${TFDIR}/bazel-bin/native_client $(EXTRA_LDFLAGS)
LIBS := $(LINK_DEEPSPEECH) $(EXTRA_LIBS)
LDFLAGS_DIRS := $(LINK_PATH_DEEPSPEECH) $(EXTRA_LDFLAGS)
LDFLAGS += $(LDFLAGS_NEEDED) $(LDFLAGS_RPATH) $(LDFLAGS_DIRS) $(LIBS)
AS := $(TOOLCHAIN)as
CC := $(TOOLCHAIN)gcc
CXX := $(TOOLCHAIN)c++
LD := $(TOOLCHAIN)ld
LDD := $(TOOLCHAIN)ldd $(TOOLCHAIN_LDD_OPTS)
AS := $(TOOLCHAIN)$(TOOL_AS)
CC := $(TOOLCHAIN)$(TOOL_CC)
CXX := $(TOOLCHAIN)$(TOOL_CXX)
LD := $(TOOLCHAIN)$(TOOL_LD)
LDD := $(TOOLCHAIN)$(TOOL_LDD) $(TOOLCHAIN_LDD_OPTS)
RPATH_PYTHON := '-Wl,-rpath,\$$ORIGIN/lib/' $(LDFLAGS_RPATH)
RPATH_NODEJS := '-Wl,-rpath,$$\$$ORIGIN/../'

Просмотреть файл

@ -4,6 +4,10 @@ if [ `uname` = "Darwin" ]; then
export PATH="/Users/build-user/TaskCluster/Workdir/tasks/tc-workdir/homebrew/opt/coreutils/libexec/gnubin:${PATH}"
fi
if [ `uname -o` = "Msys" ]; then
export PATH="/c/Program Files/Git/bin/:${PATH}"
fi
DS_GIT_DIR="$(realpath "$(dirname "$(realpath "$0")")/../.git/")"
if [ ! -d "${DS_GIT_DIR}" ]; then
return 1

653
native_client/getopt_win.h Normal file
Просмотреть файл

@ -0,0 +1,653 @@
#ifndef __GETOPT_H__
/**
* DISCLAIMER
* This file is part of the mingw-w64 runtime package.
*
* The mingw-w64 runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/*
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Dieter Baron and Thomas Klausner.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#pragma warning(disable:4996);
#define __GETOPT_H__
/* All the headers include this file. */
#include <crtdefs.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <windows.h>
#ifdef __cplusplus
extern "C" {
#endif
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
#ifdef REPLACE_GETOPT
int opterr = 1; /* if error message should be printed */
int optind = 1; /* index into parent argv vector */
int optopt = '?'; /* character checked for validity */
#undef optreset /* see getopt.h */
#define optreset __mingw_optreset
int optreset; /* reset getopt */
char *optarg; /* argument associated with option */
#endif
//extern int optind; /* index of first non-option in argv */
//extern int optopt; /* single option character, as parsed */
//extern int opterr; /* flag to enable built-in diagnostics... */
// /* (user may set to zero, to suppress) */
//
//extern char *optarg; /* pointer to argument of current option */
#define PRINT_ERROR ((opterr) && (*options != ':'))
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
/* return values */
#define BADCH (int)'?'
#define BADARG ((*options == ':') ? (int)':' : (int)'?')
#define INORDER (int)1
#ifndef __CYGWIN__
#define __progname __argv[0]
#else
extern char __declspec(dllimport) *__progname;
#endif
#ifdef __CYGWIN__
static char EMSG[] = "";
#else
#define EMSG ""
#endif
static int getopt_internal(int, char * const *, const char *,
const struct option *, int *, int);
static int parse_long_options(char * const *, const char *,
const struct option *, int *, int);
static int gcd(int, int);
static void permute_args(int, int, int, char * const *);
static char *place = EMSG; /* option letter processing */
/* XXX: set optreset to 1 rather than these two */
static int nonopt_start = -1; /* first non option argument (for permute) */
static int nonopt_end = -1; /* first option after non options (for permute) */
/* Error messages */
static const char recargchar[] = "option requires an argument -- %c";
static const char recargstring[] = "option requires an argument -- %s";
static const char ambig[] = "ambiguous option -- %.*s";
static const char noarg[] = "option doesn't take an argument -- %.*s";
static const char illoptchar[] = "unknown option -- %c";
static const char illoptstring[] = "unknown option -- %s";
static void
_vwarnx(const char *fmt,va_list ap)
{
(void)fprintf(stderr,"%s: ",__progname);
if (fmt != NULL)
(void)vfprintf(stderr,fmt,ap);
(void)fprintf(stderr,"\n");
}
static void
warnx(const char *fmt,...)
{
va_list ap;
va_start(ap,fmt);
_vwarnx(fmt,ap);
va_end(ap);
}
/*
* Compute the greatest common divisor of a and b.
*/
static int
gcd(int a, int b)
{
int c;
c = a % b;
while (c != 0) {
a = b;
b = c;
c = a % b;
}
return (b);
}
/*
* Exchange the block from nonopt_start to nonopt_end with the block
* from nonopt_end to opt_end (keeping the same order of arguments
* in each block).
*/
static void
permute_args(int panonopt_start, int panonopt_end, int opt_end,
char * const *nargv)
{
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
char *swap;
/*
* compute lengths of blocks and number and size of cycles
*/
nnonopts = panonopt_end - panonopt_start;
nopts = opt_end - panonopt_end;
ncycle = gcd(nnonopts, nopts);
cyclelen = (opt_end - panonopt_start) / ncycle;
for (i = 0; i < ncycle; i++) {
cstart = panonopt_end+i;
pos = cstart;
for (j = 0; j < cyclelen; j++) {
if (pos >= panonopt_end)
pos -= nnonopts;
else
pos += nopts;
swap = nargv[pos];
/* LINTED const cast */
((char **) nargv)[pos] = nargv[cstart];
/* LINTED const cast */
((char **)nargv)[cstart] = swap;
}
}
}
#ifdef REPLACE_GETOPT
/*
* getopt --
* Parse argc/argv argument vector.
*
* [eventually this will replace the BSD getopt]
*/
int
getopt(int nargc, char * const *nargv, const char *options)
{
/*
* We don't pass FLAG_PERMUTE to getopt_internal() since
* the BSD getopt(3) (unlike GNU) has never done this.
*
* Furthermore, since many privileged programs call getopt()
* before dropping privileges it makes sense to keep things
* as simple (and bug-free) as possible.
*/
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
}
#endif /* REPLACE_GETOPT */
//extern int getopt(int nargc, char * const *nargv, const char *options);
#ifdef _BSD_SOURCE
/*
* BSD adds the non-standard `optreset' feature, for reinitialisation
* of `getopt' parsing. We support this feature, for applications which
* proclaim their BSD heritage, before including this header; however,
* to maintain portability, developers are advised to avoid it.
*/
# define optreset __mingw_optreset
extern int optreset;
#endif
#ifdef __cplusplus
}
#endif
/*
* POSIX requires the `getopt' API to be specified in `unistd.h';
* thus, `unistd.h' includes this header. However, we do not want
* to expose the `getopt_long' or `getopt_long_only' APIs, when
* included in this manner. Thus, close the standard __GETOPT_H__
* declarations block, and open an additional __GETOPT_LONG_H__
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
#ifdef __cplusplus
extern "C" {
#endif
struct option /* specification for a long form option... */
{
const char *name; /* option name, without leading hyphens */
int has_arg; /* does it take an argument? */
int *flag; /* where to save its status, or NULL */
int val; /* its associated status value */
};
enum /* permitted values for its `has_arg' field... */
{
no_argument = 0, /* option never takes an argument */
required_argument, /* option always requires an argument */
optional_argument /* option may take an argument */
};
/*
* parse_long_options --
* Parse long options in argc/argv argument vector.
* Returns -1 if short_too is set and the option does not match long_options.
*/
static int
parse_long_options(char * const *nargv, const char *options,
const struct option *long_options, int *idx, int short_too)
{
char *current_argv, *has_equal;
size_t current_argv_len;
int i, ambiguous, match;
#define IDENTICAL_INTERPRETATION(_x, _y) \
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && \
long_options[(_x)].flag == long_options[(_y)].flag && \
long_options[(_x)].val == long_options[(_y)].val)
current_argv = place;
match = -1;
ambiguous = 0;
optind++;
if ((has_equal = strchr(current_argv, '=')) != NULL) {
/* argument found (--option=arg) */
current_argv_len = has_equal - current_argv;
has_equal++;
} else
current_argv_len = strlen(current_argv);
for (i = 0; long_options[i].name; i++) {
/* find matching long option */
if (strncmp(current_argv, long_options[i].name,
current_argv_len))
continue;
if (strlen(long_options[i].name) == current_argv_len) {
/* exact match */
match = i;
ambiguous = 0;
break;
}
/*
* If this is a known short option, don't allow
* a partial match of a single character.
*/
if (short_too && current_argv_len == 1)
continue;
if (match == -1) /* partial match */
match = i;
else if (!IDENTICAL_INTERPRETATION(i, match))
ambiguous = 1;
}
if (ambiguous) {
/* ambiguous abbreviation */
if (PRINT_ERROR)
warnx(ambig, (int)current_argv_len,
current_argv);
optopt = 0;
return (BADCH);
}
if (match != -1) { /* option found */
if (long_options[match].has_arg == no_argument
&& has_equal) {
if (PRINT_ERROR)
warnx(noarg, (int)current_argv_len,
current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
return (BADARG);
}
if (long_options[match].has_arg == required_argument ||
long_options[match].has_arg == optional_argument) {
if (has_equal)
optarg = has_equal;
else if (long_options[match].has_arg ==
required_argument) {
/*
* optional argument doesn't use next nargv
*/
optarg = nargv[optind++];
}
}
if ((long_options[match].has_arg == required_argument)
&& (optarg == NULL)) {
/*
* Missing argument; leading ':' indicates no error
* should be generated.
*/
if (PRINT_ERROR)
warnx(recargstring,
current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
--optind;
return (BADARG);
}
} else { /* unknown option */
if (short_too) {
--optind;
return (-1);
}
if (PRINT_ERROR)
warnx(illoptstring, current_argv);
optopt = 0;
return (BADCH);
}
if (idx)
*idx = match;
if (long_options[match].flag) {
*long_options[match].flag = long_options[match].val;
return (0);
} else
return (long_options[match].val);
#undef IDENTICAL_INTERPRETATION
}
/*
* getopt_internal --
* Parse argc/argv argument vector. Called by user level routines.
*/
static int
getopt_internal(int nargc, char * const *nargv, const char *options,
const struct option *long_options, int *idx, int flags)
{
char *oli; /* option letter list index */
int optchar, short_too;
static int posixly_correct = -1;
if (options == NULL)
return (-1);
/*
* XXX Some GNU programs (like cvs) set optind to 0 instead of
* XXX using optreset. Work around this braindamage.
*/
if (optind == 0)
optind = optreset = 1;
/*
* Disable GNU extensions if POSIXLY_CORRECT is set or options
* string begins with a '+'.
*
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
* optreset != 0 for GNU compatibility.
*/
if (posixly_correct == -1 || optreset != 0)
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
if (*options == '-')
flags |= FLAG_ALLARGS;
else if (posixly_correct || *options == '+')
flags &= ~FLAG_PERMUTE;
if (*options == '+' || *options == '-')
options++;
optarg = NULL;
if (optreset)
nonopt_start = nonopt_end = -1;
start:
if (optreset || !*place) { /* update scanning pointer */
optreset = 0;
if (optind >= nargc) { /* end of argument vector */
place = EMSG;
if (nonopt_end != -1) {
/* do permutation, if we have to */
permute_args(nonopt_start, nonopt_end,
optind, nargv);
optind -= nonopt_end - nonopt_start;
}
else if (nonopt_start != -1) {
/*
* If we skipped non-options, set optind
* to the first of them.
*/
optind = nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
if (*(place = nargv[optind]) != '-' ||
(place[1] == '\0' && strchr(options, '-') == NULL)) {
place = EMSG; /* found non-option */
if (flags & FLAG_ALLARGS) {
/*
* GNU extension:
* return non-option as argument to option 1
*/
optarg = nargv[optind++];
return (INORDER);
}
if (!(flags & FLAG_PERMUTE)) {
/*
* If no permutation wanted, stop parsing
* at first non-option.
*/
return (-1);
}
/* do permutation */
if (nonopt_start == -1)
nonopt_start = optind;
else if (nonopt_end != -1) {
permute_args(nonopt_start, nonopt_end,
optind, nargv);
nonopt_start = optind -
(nonopt_end - nonopt_start);
nonopt_end = -1;
}
optind++;
/* process next argument */
goto start;
}
if (nonopt_start != -1 && nonopt_end == -1)
nonopt_end = optind;
/*
* If we have "-" do nothing, if "--" we are done.
*/
if (place[1] != '\0' && *++place == '-' && place[1] == '\0') {
optind++;
place = EMSG;
/*
* We found an option (--), so if we skipped
* non-options, we have to permute.
*/
if (nonopt_end != -1) {
permute_args(nonopt_start, nonopt_end,
optind, nargv);
optind -= nonopt_end - nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
}
/*
* Check long options if:
* 1) we were passed some
* 2) the arg is not just "-"
* 3) either the arg starts with -- we are getopt_long_only()
*/
if (long_options != NULL && place != nargv[optind] &&
(*place == '-' || (flags & FLAG_LONGONLY))) {
short_too = 0;
if (*place == '-')
place++; /* --foo long option */
else if (*place != ':' && strchr(options, *place) != NULL)
short_too = 1; /* could be short option too */
optchar = parse_long_options(nargv, options, long_options,
idx, short_too);
if (optchar != -1) {
place = EMSG;
return (optchar);
}
}
if ((optchar = (int)*place++) == (int)':' ||
(optchar == (int)'-' && *place != '\0') ||
(oli = (char*)strchr(options, optchar)) == NULL) {
/*
* If the user specified "-" and '-' isn't listed in
* options, return -1 (non-option) as per POSIX.
* Otherwise, it is an unknown option character (or ':').
*/
if (optchar == (int)'-' && *place == '\0')
return (-1);
if (!*place)
++optind;
if (PRINT_ERROR)
warnx(illoptchar, optchar);
optopt = optchar;
return (BADCH);
}
if (long_options != NULL && optchar == 'W' && oli[1] == ';') {
/* -W long-option */
if (*place) /* no space */
/* NOTHING */;
else if (++optind >= nargc) { /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
} else /* white space */
place = nargv[optind];
optchar = parse_long_options(nargv, options, long_options,
idx, 0);
place = EMSG;
return (optchar);
}
if (*++oli != ':') { /* doesn't take argument */
if (!*place)
++optind;
} else { /* takes (optional) argument */
optarg = NULL;
if (*place) /* no white space */
optarg = place;
else if (oli[1] != ':') { /* arg not optional */
if (++optind >= nargc) { /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
} else
optarg = nargv[optind];
}
place = EMSG;
++optind;
}
/* dump back option letter */
return (optchar);
}
/*
* getopt_long --
* Parse argc/argv argument vector.
*/
int
getopt_long(int nargc, char * const *nargv, const char *options,
const struct option *long_options, int *idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx,
FLAG_PERMUTE));
}
/*
* getopt_long_only --
* Parse argc/argv argument vector.
*/
int
getopt_long_only(int nargc, char * const *nargv, const char *options,
const struct option *long_options, int *idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx,
FLAG_PERMUTE|FLAG_LONGONLY));
}
//extern int getopt_long(int nargc, char * const *nargv, const char *options,
// const struct option *long_options, int *idx);
//extern int getopt_long_only(int nargc, char * const *nargv, const char *options,
// const struct option *long_options, int *idx);
/*
* Previous MinGW implementation had...
*/
#ifndef HAVE_DECL_GETOPT
/*
* ...for the long form API only; keep this for compatibility.
*/
# define HAVE_DECL_GETOPT 1
#endif
#ifdef __cplusplus
}
#endif
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */

Просмотреть файл

@ -48,7 +48,6 @@ android {
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar'])
implementation 'com.android.support:appcompat-v7:27.1.1'
testImplementation 'junit:junit:4.12'
androidTestImplementation 'com.android.support.test:runner:1.0.2'

Просмотреть файл

@ -6,6 +6,10 @@ PROJECT_VERSION ?= $(shell cat ../../VERSION | tr -d '\n')
include ../definitions.mk
ifeq ($(TARGET),host-win)
LIBS := '$(shell cygpath -w $(subst .lib,,$(LIBS)))'
endif
default: build
clean:
@ -32,7 +36,7 @@ configure: deepspeech_wrap.cxx package.json
$(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE)
build: configure deepspeech_wrap.cxx
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS="$(LIBS)" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_ABI_TARGET) rebuild $(NODE_BUILD_VERBOSE)
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_ABI_TARGET) rebuild $(NODE_BUILD_VERBOSE)
copy-deps: build
$(call copy_missing_libs,lib/binding/*/*/*/deepspeech.node,lib/binding/*/*/)

Просмотреть файл

@ -4,7 +4,7 @@
"target_name": "deepspeech",
"sources": [ "deepspeech_wrap.cxx" ],
"libraries": [
"${LIBS}"
"$(LIBS)"
],
"include_dirs": [
"../"

Просмотреть файл

@ -8,7 +8,7 @@ bindings-clean:
bindings-build:
pip install --quiet $(PYTHON_PACKAGES) wheel==0.31.0 setuptools==39.1.0
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME)
PATH=$(TOOLCHAIN):$$PATH AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME)
MANIFEST.in: bindings-build
> $@

Просмотреть файл

@ -39,11 +39,27 @@ class BuildExtFirst(build):
('build_clib', build.has_c_libraries),
('build_scripts', build.has_scripts)]
# Properly pass arguments for linking, setuptools will perform some checks
def lib_dirs_split(a):
if os.name == 'posix':
return a.split('-L')[1:]
if os.name == 'nt':
return []
def libs_split(a):
if os.name == 'posix':
return a.split('-l')[1:]
if os.name == 'nt':
return a.split('.lib')[0:1]
ds_ext = Extension('deepspeech._impl',
['impl.i'],
include_dirs = [ numpy_include, '../' ],
library_dirs = list(map(lambda x: x.strip(), os.getenv('MODEL_LDFLAGS', '').split('-L')[1:])),
libraries = list(map(lambda x: x.strip(), os.getenv('MODEL_LIBS', '').split('-l')[1:])))
library_dirs = list(map(lambda x: x.strip(), lib_dirs_split(os.getenv('MODEL_LDFLAGS', '')))),
libraries = list(map(lambda x: x.strip(), libs_split(os.getenv('MODEL_LIBS', ''))))
)
setup(name = project_name,
description = 'A library for running inference on a DeepSpeech model',

Просмотреть файл

@ -23,3 +23,4 @@ build:
tests_cmdline: ''
convert_graphdef: ''
benchmark_model_bin: ''
tensorflow_git_desc: 'TensorFlow: v1.12.0-21-ge763555'

5
taskcluster/README.md Normal file
Просмотреть файл

@ -0,0 +1,5 @@
# Taskcluster
This directory contains files associated with Taskcluster -- a task execution framework for Mozilla's Continuous Integration system.
Please consult the [existing Taskcluster documentation](https://docs.taskcluster.net/docs).

Просмотреть файл

@ -12,7 +12,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-arm64/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-arm64/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/android-build.sh arm64-v8a"
package: "taskcluster/android-package.sh arm64-v8a"

Просмотреть файл

@ -12,7 +12,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-armv7/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-armv7/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/android-build.sh armeabi-v7a"
package: "taskcluster/android-package.sh armeabi-v7a"

Просмотреть файл

@ -13,7 +13,7 @@ build:
system_setup:
>
apt-get -qq -y install curl && ${swig.packages.install_script}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-armv7/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-armv7/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/android-apk-build.sh"
package: "taskcluster/android-apk-package.sh"

Просмотреть файл

@ -12,7 +12,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-arm64/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-arm64/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/android-build.sh x86_64"
package: "taskcluster/android-package.sh x86_64"

Просмотреть файл

@ -6,7 +6,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.osx.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.osx/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.osx/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/host-build.sh"
package: "taskcluster/package.sh"

Просмотреть файл

@ -6,7 +6,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.osx-ctc.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.osx/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.osx/artifacts/public/home.tar.xz"
maxRunTime: 14400
scripts:
build: 'taskcluster/decoder-build.sh'

Просмотреть файл

@ -14,7 +14,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/host-build.sh"
package: "taskcluster/package.sh"

Просмотреть файл

@ -14,7 +14,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/home.tar.xz"
scripts:
build: 'taskcluster/decoder-build.sh'
package: 'taskcluster/decoder-package.sh'

Просмотреть файл

@ -12,7 +12,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.gpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.gpu/artifacts/public/home.tar.xz"
maxRunTime: 14400
scripts:
build: "taskcluster/cuda-build.sh"

Просмотреть файл

@ -4,7 +4,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm64"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm64"
- "index.project.deepspeech.deepspeech.native_client.arm64.${event.head.sha}"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.arm64/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.arm64/artifacts/public/home.tar.xz"
## multistrap 2.2.0-ubuntu1 is broken in 14.04: https://bugs.launchpad.net/ubuntu/+source/multistrap/+bug/1313787
system_setup:
>

Просмотреть файл

@ -4,7 +4,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm"
- "index.project.deepspeech.deepspeech.native_client.arm.${event.head.sha}"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.arm/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.arm/artifacts/public/home.tar.xz"
## multistrap 2.2.0-ubuntu1 is broken in 14.04: https://bugs.launchpad.net/ubuntu/+source/multistrap/+bug/1313787
system_setup:
>

Просмотреть файл

@ -16,7 +16,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/node-build.sh"
package: "taskcluster/node-package.sh"

Просмотреть файл

@ -11,6 +11,8 @@ build:
- "android-arm64-cpu-opt"
- "android-armv7-cpu-opt"
- "android-java-opt"
- "win-amd64-cpu-opt"
- "win-amd64-gpu-opt"
allowed:
- "tag"
ref_match: "refs/tags/"
@ -39,8 +41,13 @@ build:
- "linux-arm64-cpu-opt"
- "android-arm64-cpu-opt"
- "android-armv7-cpu-opt"
- "win-amd64-cpu-opt"
- "win-amd64-gpu-opt"
java_aar:
- "android-java-opt"
nuget:
- "win-amd64-cpu-opt"
- "win-amd64-gpu-opt"
metadata:
name: "DeepSpeech GitHub Packages"
description: "Trigger Uploading of DeepSpeech Packages to GitHub release page"

Просмотреть файл

@ -17,6 +17,7 @@ build:
javascript: []
java_aar:
- "android-java-opt"
nuget: []
metadata:
name: "DeepSpeech Android lib Packages"
description: "Trigger Uploading of DeepSpeech Android lib to JCenter registry"

Просмотреть файл

@ -21,6 +21,7 @@ build:
- "linux-amd64-gpu-opt"
# CPU package with all archs
- "node-package"
nuget: []
metadata:
name: "DeepSpeech NPM Packages"
description: "Trigger Uploading of DeepSpeech Packages to NPM registry"

Просмотреть файл

@ -0,0 +1,25 @@
build:
template_file: simple-task.tyml
dependencies:
# Make sure builds are ready
- "win-amd64-cpu-opt"
- "win-amd64-gpu-opt"
allowed:
- "tag"
ref_match: "refs/tags/"
routes:
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
upload_targets:
- "nuget"
artifacts_deps:
python: []
cpp: []
javascript: []
java_aar: []
nuget:
- "win-amd64-cpu-opt"
- "win-amd64-gpu-opt"
metadata:
name: "DeepSpeech NuGet Packages"
description: "Trigger Uploading of DeepSpeech .Net Framework bindings to NuGet"

Просмотреть файл

@ -25,6 +25,7 @@ build:
- "linux-amd64-gpu-opt"
- "linux-rpi3-cpu-opt"
# - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
nuget: []
metadata:
name: "DeepSpeech PyPi Packages"
description: "Trigger Uploading of DeepSpeech Packages to PyPi"

Просмотреть файл

@ -44,6 +44,10 @@ then:
$map: { $eval: build.artifacts_deps.cpp }
each(b):
$eval: as_slugid(b)
nuget:
$map: { $eval: build.artifacts_deps.nuget }
each(b):
$eval: as_slugid(b)
metadata:
name: ${build.metadata.name}

Просмотреть файл

@ -38,7 +38,7 @@ then:
DEEPSPEECH_ARTIFACTS_ROOT_ARM64: https://queue.taskcluster.net/v1/task/${android_arm64_build}/artifacts/public
DEEPSPEECH_ARTIFACTS_ROOT_ARMV7: https://queue.taskcluster.net/v1/task/${android_armv7_build}/artifacts/public
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
command:
- "/bin/bash"

Просмотреть файл

@ -43,7 +43,7 @@ then:
PIP_DEFAULT_TIMEOUT: "60"
PIP_EXTRA_INDEX_URL: "https://lissyx.github.io/deepspeech-python-wheels/"
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Debian Stretch
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
command:
- "/bin/bash"

Просмотреть файл

@ -0,0 +1,10 @@
build:
template_file: test-win-opt-base.tyml
dependencies:
- "win-amd64-cpu-opt"
- "test-training_upstream-linux-amd64-py27mu-opt"
args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-cppwin-ds-tests.sh"
metadata:
name: "DeepSpeech Windows AMD64 CPU C++ tests"
description: "Testing DeepSpeech C++ for Windows/AMD64, CPU only, optimized version"

Просмотреть файл

@ -41,7 +41,7 @@ then:
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pb
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
command:
- - "/bin/bash"

Просмотреть файл

@ -45,7 +45,7 @@ then:
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
DECODER_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public
PIP_DEFAULT_TIMEOUT: "60"
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
command:
- "/bin/bash"

Просмотреть файл

@ -4,7 +4,7 @@ build:
- "test-training_upstream-linux-amd64-py27mu-opt"
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-lite_benchmark_model-ds-tests.sh"
benchmark_model_bin: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/lite_benchmark_model"
benchmark_model_bin: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/lite_benchmark_model"
metadata:
name: "DeepSpeech Linux AMD64 CPU TF Lite benchmark_model"
description: "Testing DeepSpeech TF Lite benchmark_model for Linux/AMD64, CPU only, optimized version"

Просмотреть файл

@ -0,0 +1,10 @@
build:
template_file: test-win-opt-base.tyml
dependencies:
- "win-amd64-cpu-opt"
- "test-training_upstream-linux-amd64-py27mu-opt"
args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-netframework-ds-tests.sh"
metadata:
name: "DeepSpeech Windows AMD64 CPU .Net Framework tests"
description: "Testing DeepSpeech .Net Framework for Windows/AMD64, CPU only, optimized version"

Просмотреть файл

@ -43,7 +43,7 @@ then:
PIP_DEFAULT_TIMEOUT: "60"
PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple"
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Raspbian Stretch / PiWheels
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
command:
- "/bin/bash"

Просмотреть файл

@ -7,7 +7,7 @@ build:
apt-get -qq -y install ${python.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-train-tests.sh 2.7.14:mu"
convert_graphdef: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/convert_graphdef_memmapped_format"
convert_graphdef: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/convert_graphdef_memmapped_format"
metadata:
name: "DeepSpeech Linux AMD64 CPU upstream training Py2.7 mu"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 using upstream TensorFlow Python 2.7 mu, CPU only, optimized version"

Просмотреть файл

@ -1,12 +0,0 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-ctc-opt"
system_setup:
>
apt-get -qq -y install ${python.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-train-tests.sh 3.4.8:m"
metadata:
name: "DeepSpeech Linux AMD64 CPU upstream training Py3.4"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 using upstream TensorFlow Python 3.4, CPU only, optimized version"

Просмотреть файл

@ -0,0 +1,80 @@
$if: '(event.event != "push") && (event.event != "tag")'
then:
taskId: ${taskcluster.taskId}
provisionerId: ${taskcluster.docker.provisionerId}
workerType: ${taskcluster.docker.workerTypeWin}
taskGroupId: ${taskcluster.taskGroupId}
schedulerId: ${taskcluster.schedulerId}
dependencies:
$map: { $eval: build.dependencies }
each(b):
$eval: as_slugid(b)
created: { $fromNow: '0 sec' }
deadline: { $fromNow: '1 day' }
expires: { $fromNow: '7 days' }
extra:
github:
{ $eval: taskcluster.github_events.pull_request }
routes:
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
scopes: [
"queue:route:notify.irc-channel.*",
]
payload:
maxRunTime: { $eval: to_int(build.maxRunTime) }
mounts:
- file: msys2-base-x86_64.tar.xz
content:
sha256: 4e799b5c3efcf9efcb84923656b7bcff16f75a666911abd6620ea8e5e1e9870c
url: >-
https://sourceforge.net/projects/msys2/files/Base/x86_64/msys2-base-x86_64-20180531.tar.xz/download
env:
$let:
training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
win_amd64_build: { $eval: as_slugid("win-amd64-cpu-opt") }
node_package: { $eval: as_slugid("node-package") }
in:
DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${win_amd64_build}/artifacts/public
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pb
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
TC_MSYS_VERSION: 'MSYS_NT-6.3'
MSYS: 'winsymlinks:nativestrict'
command:
- >-
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
- .\msys64\usr\bin\bash.exe --login -cx "exit"
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
- .\msys64\usr\bin\bash.exe --login -cxe "
export LC_ALL=C &&
export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&
export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" &&
export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" &&
export TASKCLUSTER_TMP_DIR="$TASKCLUSTER_TASK_DIR/tmp" &&
export PIP_DEFAULT_TIMEOUT=60 &&
(mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR &&
env &&
ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64 &&
git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ &&
cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} &&
cd $TASKCLUSTER_TASK_DIR &&
pacman --noconfirm -R bsdtar &&
pacman --noconfirm -S tar vim &&
/bin/bash ${build.args.tests_cmdline} ;
cd $TASKCLUSTER_TASK_DIR/../ && rm -fr tc-workdir/ && exit $TASKCLUSTER_TASK_EXIT_CODE"
metadata:
name: ${build.metadata.name}
description: ${build.metadata.description}
owner: ${event.head.user.email}
source: ${event.head.repo.url}

Просмотреть файл

@ -0,0 +1,17 @@
build:
template_file: win-opt-base.tyml
routes:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.win"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.win"
- "index.project.deepspeech.deepspeech.native_client.win.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.win/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/win-build.sh"
package: "taskcluster/win-package.sh"
nc_asset_name: "native_client.amd64.cpu.win.tar.xz"
maxRunTime: 14400
metadata:
name: "DeepSpeech Windows AMD64 CPU"
description: "Building DeepSpeech for Windows AMD64, CPU only, optimized version"

Просмотреть файл

@ -0,0 +1,17 @@
build:
template_file: win-opt-base.tyml
routes:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.win-cuda"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.win-cuda"
- "index.project.deepspeech.deepspeech.native_client.win-cuda.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.win-cuda/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/win-build.sh --cuda"
package: "taskcluster/win-package.sh"
nc_asset_name: "native_client.amd64.gpu.win.tar.xz"
maxRunTime: 14400
metadata:
name: "DeepSpeech Windows AMD64 CUDA"
description: "Building DeepSpeech for Windows AMD64, CUDA-enabled, optimized version"

42
taskcluster/win-build.sh Executable file
Просмотреть файл

@ -0,0 +1,42 @@
#!/bin/bash
set -xe
cuda=$1
source $(dirname "$0")/../tc-tests-utils.sh
source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
BAZEL_TARGETS="
//native_client:libdeepspeech.so
//native_client:generate_trie
"
if [ "${cuda}" = "--cuda" ]; then
BAZEL_ENV_FLAGS="TF_NEED_CUDA=1 ${TF_CUDA_FLAGS}"
BAZEL_BUILD_FLAGS="${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS}"
PROJECT_NAME="DeepSpeech-GPU"
else
PROJECT_NAME="DeepSpeech"
BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}"
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
fi
SYSTEM_TARGET=host-win
do_bazel_build
if [ "${cuda}" = "--cuda" ]; then
cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-bin/native_client/liblibdeepspeech.so.ifso ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-bin/native_client/libdeepspeech.so.if.lib
fi
export PATH=$PATH:$(cygpath ${ChocolateyInstall})/bin
do_deepspeech_binary_build
do_deepspeech_netframework_build
do_nuget_build "${PROJECT_NAME}"
shutdown_bazel

Просмотреть файл

@ -0,0 +1,89 @@
taskId: ${taskcluster.taskId}
provisionerId: ${taskcluster.docker.provisionerId}
workerType: ${taskcluster.docker.workerTypeWin}
taskGroupId: ${taskcluster.taskGroupId}
schedulerId: ${taskcluster.schedulerId}
dependencies:
$map: { $eval: build.dependencies }
each(b):
$eval: as_slugid(b)
created: { $fromNow: '0 sec' }
deadline: { $fromNow: '1 day' }
expires:
$if: '(event.event == "push") || (event.event == "tag")'
then: { $fromNow: '6 months' }
else: { $fromNow: '7 days' }
extra:
nc_asset_name: { $eval: build.nc_asset_name }
github:
$if: '(event.event == "push") || (event.event == "tag")'
then: { $eval: taskcluster.github_events.merge }
else: { $eval: taskcluster.github_events.pull_request }
routes:
$if: '(event.event == "push") || (event.event == "tag")'
then:
{ $eval: build.routes }
payload:
maxRunTime: { $eval: to_int(build.maxRunTime) }
mounts:
- file: msys2-base-x86_64.tar.xz
content:
sha256: 4e799b5c3efcf9efcb84923656b7bcff16f75a666911abd6620ea8e5e1e9870c
url: >-
https://sourceforge.net/projects/msys2/files/Base/x86_64/msys2-base-x86_64-20180531.tar.xz/download
env:
$let:
training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
in:
TC_MSYS_VERSION: 'MSYS_NT-6.3'
MSYS: 'winsymlinks:nativestrict'
TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow}
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
command:
- >-
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
- .\msys64\usr\bin\bash.exe --login -cx "exit"
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
- echo .\msys64\usr\bin\bash.exe --login -cxe "
export LC_ALL=C &&
export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&
export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" &&
export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" &&
(mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR &&
env &&
ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64 &&
(wget -O - $TENSORFLOW_BUILD_ARTIFACT | 7z x -txz -si -so | 7z x -o$TASKCLUSTER_TASK_DIR -aoa -ttar -si ) &&
git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ &&
cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} &&
ln -s $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/native_client/ $TASKCLUSTER_TASK_DIR/DeepSpeech/tf/native_client &&
cd $TASKCLUSTER_TASK_DIR &&
pacman --noconfirm -R bsdtar &&
pacman --noconfirm -S tar make &&
$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} &&
$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ;
echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh &&
cd $TASKCLUSTER_TASK_DIR/../ && rm -fr tc-workdir/ && exit $TASKCLUSTER_TASK_EXIT_CODE" | cmd
/k ""C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat"" x64
- .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh &&
exit $TASKCLUSTER_TASK_EXIT_CODE"
artifacts:
- type: "directory"
path: "public/"
expires:
$if: '(event.event == "push") || (event.event == "tag")'
then: { $fromNow: '6 months' }
else: { $fromNow: '7 days' }
metadata:
name: ${build.metadata.name}
description: ${build.metadata.description}
owner: ${event.head.user.email}
source: ${event.head.repo.url}

17
taskcluster/win-package.sh Executable file
Просмотреть файл

@ -0,0 +1,17 @@
#!/bin/bash
set -xe
arm_flavor=$1
source $(dirname "$0")/../tc-tests-utils.sh
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
package_native_client "native_client.tar.xz"
cp ${DS_ROOT_TASK}/DeepSpeech/ds/examples/net_framework/CSharpExamples/*.nupkg ${TASKCLUSTER_ARTIFACTS}/
cp ${DS_ROOT_TASK}/DeepSpeech/ds/examples/net_framework/CSharpExamples/DeepSpeechConsole/bin/x64/Release/DeepSpeechConsole.exe ${TASKCLUSTER_ARTIFACTS}/

Просмотреть файл

@ -4,6 +4,7 @@ taskcluster:
provisionerId: aws-provisioner-v1
workerType: deepspeech-worker
workerTypeKvm: deepspeech-kvm-worker
workerTypeWin: deepspeech-win
dockerrpi3:
provisionerId: deepspeech-provisioner
workerType: ds-rpi3

13
tc-cppwin-ds-tests.sh Normal file
Просмотреть файл

@ -0,0 +1,13 @@
#!/bin/bash
set -xe
source $(dirname "$0")/tc-tests-utils.sh
download_material "${TASKCLUSTER_TMP_DIR}/ds"
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
check_tensorflow_version
run_basic_inference_tests

Просмотреть файл

@ -0,0 +1,19 @@
#!/bin/bash
set -xe
cuda=$1
source $(dirname "$0")/tc-tests-utils.sh
if [ "${cuda}" = "--cuda" ]; then
PROJECT_NAME="DeepSpeech-GPU"
else
PROJECT_NAME="DeepSpeech"
fi
download_data
install_nuget "${PROJECT_NAME}"
run_netframework_inference_tests

Просмотреть файл

@ -7,6 +7,11 @@ if [ "${OS}" = "Linux" ]; then
export DS_ROOT_TASK=${HOME}
fi;
if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
export PLATFORM_EXE_SUFFIX=.exe
fi;
if [ "${OS}" = "Darwin" ]; then
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
export SWIG_LIB="$(find ${DS_ROOT_TASK}/homebrew/Cellar/swig/ -type f -name "swig.swg" | xargs dirname)"
@ -33,6 +38,16 @@ export DS_VERSION="$(cat ${DS_DSDIR}/VERSION)"
export ANDROID_SDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/SDK/
export ANDROID_NDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/android-ndk-r18b/
TAR=${TAR:-"tar"}
XZ=${XZ:-"pixz -9"}
UNXZ=${UNXZ:-"pixz -d"}
if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
TAR=/usr/bin/tar.exe
XZ="xz -9 -T0 -c -"
UNXZ="xz -9 -T0 -d"
fi
model_source="${DEEPSPEECH_TEST_MODEL}"
model_name="$(basename "${model_source}")"
model_name_mmap="$(basename -s ".pb" "${model_source}").pbmm"
@ -172,11 +187,21 @@ assert_correct_ldc93s1()
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
}
assert_working_ldc93s1()
{
assert_working_inference "$1" "she had your dark suit in greasy wash water all year"
}
assert_correct_ldc93s1_lm()
{
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
}
assert_working_ldc93s1_lm()
{
assert_working_inference "$1" "she had your dark suit in greasy wash water all year"
}
assert_correct_multi_ldc93s1()
{
assert_shows_something "$1" "/LDC93S1.wav%she had your dark suit in greasy wash water all year%"
@ -226,7 +251,19 @@ run_tflite_basic_inference_tests()
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
}
run_all_inference_tests()
run_netframework_inference_tests()
{
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
assert_working_ldc93s1 "${phrase_pbmodel_nolm}"
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
assert_working_ldc93s1 "${phrase_pbmodel_nolm}"
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}"
}
run_basic_inference_tests()
{
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
@ -236,6 +273,11 @@ run_all_inference_tests()
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}"
}
run_all_inference_tests()
{
run_basic_inference_tests
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav)
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}"
@ -299,7 +341,7 @@ generic_download_tarxz()
mkdir -p ${target_dir} || true
wget ${url} -O - | pixz -d | tar -C ${target_dir} -xf -
wget ${url} -O - | ${UNXZ} | ${TAR} -C ${target_dir} -xf -
}
download_native_client_files()
@ -307,6 +349,45 @@ download_native_client_files()
generic_download_tarxz "$1" "${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz"
}
install_nuget()
{
PROJECT_NAME=$1
if [ -z "${PROJECT_NAME}" ]; then
exit "Please call with a valid PROJECT_NAME"
exit 1
fi;
nuget="${PROJECT_NAME}.${DS_VERSION}.nupkg"
export PATH=$PATH:$(cygpath ${ChocolateyInstall})/bin
mkdir -p "${TASKCLUSTER_TMP_DIR}/repo/"
mkdir -p "${TASKCLUSTER_TMP_DIR}/ds/"
wget -O - "${DEEPSPEECH_ARTIFACTS_ROOT}/${nuget}" | gunzip > "${TASKCLUSTER_TMP_DIR}/${PROJECT_NAME}.${DS_VERSION}.nupkg"
wget -O - "${DEEPSPEECH_ARTIFACTS_ROOT}/DeepSpeechConsole.exe" | gunzip > "${TASKCLUSTER_TMP_DIR}/ds/DeepSpeechConsole.exe"
nuget sources add -Name repo -Source $(cygpath -w "${TASKCLUSTER_TMP_DIR}/repo/")
cd "${TASKCLUSTER_TMP_DIR}"
nuget add $(cygpath -w "${TASKCLUSTER_TMP_DIR}/${nuget}") -source repo
cd "${TASKCLUSTER_TMP_DIR}/ds/"
nuget list -Source repo -Prerelease
nuget install ${PROJECT_NAME} -Source repo -Prerelease
ls -halR "${PROJECT_NAME}.${DS_VERSION}"
nuget install NAudio
cp NAudio*/lib/net35/NAudio.dll ${TASKCLUSTER_TMP_DIR}/ds/
cp ${PROJECT_NAME}.${DS_VERSION}/build/libdeepspeech.so ${TASKCLUSTER_TMP_DIR}/ds/
cp ${PROJECT_NAME}.${DS_VERSION}/lib/net46/DeepSpeechClient.dll ${TASKCLUSTER_TMP_DIR}/ds/
ls -hal ${TASKCLUSTER_TMP_DIR}/ds/
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
}
download_data()
{
wget -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"
@ -390,6 +471,8 @@ is_patched_bazel()
{
bazel_version=$(bazel version | grep 'Build label:' | cut -d':' -f2)
bazel shutdown
if [ -z "${bazel_version}" ]; then
return 0;
else
@ -473,6 +556,12 @@ do_bazel_build()
verify_bazel_rebuild "${DS_ROOT_TASK}/DeepSpeech/tf/bazel_monolithic.log"
}
shutdown_bazel()
{
cd ${DS_ROOT_TASK}/DeepSpeech/tf
bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown
}
do_bazel_shared_build()
{
cd ${DS_ROOT_TASK}/DeepSpeech/tf
@ -491,7 +580,7 @@ do_deepspeech_binary_build()
EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" \
EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" \
EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \
deepspeech
deepspeech${PLATFORM_EXE_SUFFIX}
}
do_deepspeech_ndk_build()
@ -509,6 +598,83 @@ do_deepspeech_ndk_build()
TARGET_ARCH_ABI=${arch_abi}
}
do_deepspeech_netframework_build()
{
cd ${DS_DSDIR}/examples/net_framework/CSharpExamples
# Setup dependencies
nuget install DeepSpeechConsole/packages.config -OutputDirectory packages/
nuget install DeepSpeechWPF/packages.config -OutputDirectory packages/
MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe')"
# We need MSYS2_ARG_CONV_EXCL='/' otherwise the '/' of CLI parameters gets mangled and disappears
# We build the .NET Client for .NET Framework v4.5,v4.6,v4.7
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
DeepSpeechClient/DeepSpeechClient.csproj \
/p:Configuration=Release \
/p:Platform=x64 \
/p:TargetFrameworkVersion="v4.5" \
/p:OutputPath=bin/nuget/x64/v4.5
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
DeepSpeechClient/DeepSpeechClient.csproj \
/p:Configuration=Release \
/p:Platform=x64 \
/p:TargetFrameworkVersion="v4.6" \
/p:OutputPath=bin/nuget/x64/v4.6
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
DeepSpeechClient/DeepSpeechClient.csproj \
/p:Configuration=Release \
/p:Platform=x64 \
/p:TargetFrameworkVersion="v4.7" \
/p:OutputPath=bin/nuget/x64/v4.7
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
DeepSpeechConsole/DeepSpeechConsole.csproj \
/p:Configuration=Release \
/p:Platform=x64
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
DeepSpeechWPF/DeepSpeech.WPF.csproj \
/p:Configuration=Release \
/p:Platform=x64
}
do_nuget_build()
{
PROJECT_NAME=$1
if [ -z "${PROJECT_NAME}" ]; then
exit "Please call with a valid PROJECT_NAME"
exit 1
fi;
cd ${DS_DSDIR}/examples/net_framework/CSharpExamples
cp ${DS_TFDIR}/bazel-bin/native_client/libdeepspeech.so nupkg/build
# We copy the generated clients for .NET into the Nuget framework dirs
mkdir -p nupkg/lib/net45/
cp DeepSpeechClient/bin/nuget/x64/v4.5/DeepSpeechClient.dll nupkg/lib/net45/
mkdir -p nupkg/lib/net46/
cp DeepSpeechClient/bin/nuget/x64/v4.6/DeepSpeechClient.dll nupkg/lib/net46/
mkdir -p nupkg/lib/net47/
cp DeepSpeechClient/bin/nuget/x64/v4.7/DeepSpeechClient.dll nupkg/lib/net47/
PROJECT_VERSION=$(shell cat ../../../VERSION | tr -d '\n' | tr -d '\r')
sed \
-e "s/\$NUPKG_ID/${PROJECT_NAME}/" \
-e "s/\$NUPKG_VERSION/${PROJECT_VERSION}/" \
nupkg/deepspeech.nuspec.in > nupkg/deepspeech.nuspec && cat nupkg/deepspeech.nuspec
nuget pack nupkg/deepspeech.nuspec
}
# Hack to extract Ubuntu's 16.04 libssl 1.0.2 packages and use them during the
# local build of Python.
#
@ -533,8 +699,8 @@ maybe_ssl102_py37()
mkdir -p ${PY37_OPENSSL_DIR}
wget -P ${TASKCLUSTER_TMP_DIR} \
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.14_amd64.deb \
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.14_amd64.deb
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.15_amd64.deb \
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.15_amd64.deb
for deb in ${TASKCLUSTER_TMP_DIR}/libssl*.deb; do
dpkg -x ${deb} ${PY37_OPENSSL_DIR}
@ -778,13 +944,13 @@ package_native_client()
echo "Please specify artifact name."
fi;
tar -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
${TAR} -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie${PLATFORM_EXE_SUFFIX} \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/ deepspeech \
-C ${deepspeech_dir}/native_client/ deepspeech${PLATFORM_EXE_SUFFIX} \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
| ${XZ} > "${artifacts_dir}/${artifact_name}"
}
package_native_client_ndk()

Просмотреть файл

@ -37,6 +37,7 @@ for inFile in (inFiles):
with open(inFile, "r") as csvFile:
reader = csv.reader(csvFile)
try:
next(reader, None) # skip the file header (i.e. "transcript")
for row in reader:
allText |= set(str(row[2]))
except IndexError as ie:

Просмотреть файл

@ -133,6 +133,7 @@ def validate_label(label):
label = label.replace(".", "")
label = label.replace(",", "")
label = label.replace("?", "")
label = label.replace("\"", "")
label = label.strip()
return label.lower()