зеркало из https://github.com/mozilla/DeepSpeech.git
Merge branch 'master' into josh-readme
This commit is contained in:
Коммит
3a157319e4
12
.compute
12
.compute
|
@ -2,11 +2,15 @@
|
|||
|
||||
set -xe
|
||||
|
||||
pip3 install -r <(grep -v tensorflow requirements.txt)
|
||||
pip3 install tensorflow-gpu==1.13.0-rc2
|
||||
apt-get install -y python3-venv
|
||||
python3 -m venv /tmp/venv
|
||||
source /tmp/venv/bin/activate
|
||||
|
||||
pip install -r <(grep -v tensorflow requirements.txt)
|
||||
pip install tensorflow-gpu==1.13.0-rc2
|
||||
|
||||
# Install ds_ctcdecoder package from TaskCluster
|
||||
pip3 install $(python3 util/taskcluster.py --decoder)
|
||||
pip install $(python3 util/taskcluster.py --decoder)
|
||||
|
||||
mkdir -p ../keep/summaries
|
||||
|
||||
|
@ -15,7 +19,7 @@ fis="${data}/LDC/fisher"
|
|||
swb="${data}/LDC/LDC97S62/swb"
|
||||
lbs="${data}/OpenSLR/LibriSpeech/librivox"
|
||||
|
||||
python3 -u DeepSpeech.py \
|
||||
python -u DeepSpeech.py \
|
||||
--train_files "${fis}-train.csv","${swb}-train.csv","${lbs}-train-clean-100.csv","${lbs}-train-clean-360.csv","${lbs}-train-other-500.csv" \
|
||||
--dev_files "${lbs}-dev-clean.csv"\
|
||||
--test_files "${lbs}-test-clean.csv" \
|
||||
|
|
|
@ -26,6 +26,7 @@ tasks:
|
|||
|
||||
scopes: [
|
||||
"queue:create-task:lowest:{{ taskcluster.docker.provisionerId }}/deepspeech-worker",
|
||||
"queue:create-task:lowest:{{ taskcluster.docker.provisionerId }}/deepspeech-win",
|
||||
"queue:create-task:lowest:{{ taskcluster.docker.provisionerId }}/deepspeech-kvm-worker",
|
||||
"queue:create-task:lowest:deepspeech-provisioner/ds-macos-light",
|
||||
"queue:create-task:lowest:deepspeech-provisioner/ds-scriptworker",
|
||||
|
|
|
@ -12,7 +12,6 @@ import evaluate
|
|||
import numpy as np
|
||||
import progressbar
|
||||
import shutil
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
import traceback
|
||||
|
||||
|
@ -30,9 +29,9 @@ from util.text import Alphabet
|
|||
|
||||
#TODO: remove once fully switched to 1.13
|
||||
try:
|
||||
from tensorflow.contrib.lite.python import tflite_convert # 1.12
|
||||
import tensorflow.lite as lite # 1.13
|
||||
except ImportError:
|
||||
from tensorflow.lite.python import tflite_convert # 1.13
|
||||
import tensorflow.contrib.lite as lite # 1.12
|
||||
|
||||
|
||||
# Graph Creation
|
||||
|
@ -664,18 +663,23 @@ def test():
|
|||
|
||||
|
||||
def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
|
||||
batch_size = batch_size if batch_size > 0 else None
|
||||
# Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input]
|
||||
input_tensor = tf.placeholder(tf.float32, [batch_size, n_steps if n_steps > 0 else None, 2*Config.n_context+1, Config.n_input], name='input_node')
|
||||
seq_length = tf.placeholder(tf.int32, [batch_size], name='input_lengths')
|
||||
|
||||
if not tflite:
|
||||
previous_state_c = variable_on_worker_level('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
|
||||
previous_state_h = variable_on_worker_level('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
|
||||
if batch_size <= 0:
|
||||
# no state management since n_step is expected to be dynamic too (see below)
|
||||
previous_state = previous_state_c = previous_state_h = None
|
||||
else:
|
||||
previous_state_c = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c')
|
||||
previous_state_h = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h')
|
||||
if not tflite:
|
||||
previous_state_c = variable_on_worker_level('previous_state_c', [batch_size, Config.n_cell_dim], initializer=None)
|
||||
previous_state_h = variable_on_worker_level('previous_state_h', [batch_size, Config.n_cell_dim], initializer=None)
|
||||
else:
|
||||
previous_state_c = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c')
|
||||
previous_state_h = tf.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h')
|
||||
|
||||
previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h)
|
||||
previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h)
|
||||
|
||||
no_dropout = [0.0] * 6
|
||||
|
||||
|
@ -696,9 +700,23 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
|
|||
# Apply softmax for CTC decoder
|
||||
logits = tf.nn.softmax(logits)
|
||||
|
||||
new_state_c, new_state_h = layers['rnn_output_state']
|
||||
if batch_size <= 0:
|
||||
if tflite:
|
||||
raise NotImplementedError('dynamic batch_size does not support tflite nor streaming')
|
||||
if n_steps > 0:
|
||||
raise NotImplementedError('dynamic batch_size expect n_steps to be dynamic too')
|
||||
return (
|
||||
{
|
||||
'input': input_tensor,
|
||||
'input_lengths': seq_length,
|
||||
},
|
||||
{
|
||||
'outputs': tf.identity(logits, name='logits'),
|
||||
},
|
||||
layers
|
||||
)
|
||||
|
||||
# Initial zero state
|
||||
new_state_c, new_state_h = layers['rnn_output_state']
|
||||
if not tflite:
|
||||
zero_state = tf.zeros([batch_size, Config.n_cell_dim], tf.float32)
|
||||
initialize_c = tf.assign(previous_state_c, zero_state)
|
||||
|
@ -749,7 +767,7 @@ def export():
|
|||
tf.reset_default_graph()
|
||||
session = tf.Session(config=Config.session_config)
|
||||
|
||||
inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=FLAGS.n_steps, tflite=FLAGS.export_tflite)
|
||||
inputs, outputs, _ = create_inference_graph(batch_size=FLAGS.export_batch_size, n_steps=FLAGS.n_steps, tflite=FLAGS.export_tflite)
|
||||
input_names = ",".join(tensor.op.name for tensor in inputs.values())
|
||||
output_names_tensors = [ tensor.op.name for tensor in outputs.values() if isinstance(tensor, Tensor) ]
|
||||
output_names_ops = [ tensor.name for tensor in outputs.values() if isinstance(tensor, Operation) ]
|
||||
|
@ -785,7 +803,7 @@ def export():
|
|||
os.makedirs(FLAGS.export_dir)
|
||||
|
||||
def do_graph_freeze(output_file=None, output_node_names=None, variables_blacklist=None):
|
||||
freeze_graph.freeze_graph_with_def_protos(
|
||||
return freeze_graph.freeze_graph_with_def_protos(
|
||||
input_graph_def=session.graph_def,
|
||||
input_saver_def=saver.as_saver_def(),
|
||||
input_checkpoint=checkpoint_path,
|
||||
|
@ -800,39 +818,16 @@ def export():
|
|||
if not FLAGS.export_tflite:
|
||||
do_graph_freeze(output_file=output_graph_path, output_node_names=output_names, variables_blacklist='previous_state_c,previous_state_h')
|
||||
else:
|
||||
temp_fd, temp_freeze = tempfile.mkstemp(dir=FLAGS.export_dir)
|
||||
os.close(temp_fd)
|
||||
do_graph_freeze(output_file=temp_freeze, output_node_names=output_names, variables_blacklist='')
|
||||
frozen_graph = do_graph_freeze(output_node_names=output_names, variables_blacklist='')
|
||||
output_tflite_path = os.path.join(FLAGS.export_dir, output_filename.replace('.pb', '.tflite'))
|
||||
class TFLiteFlags():
|
||||
def __init__(self):
|
||||
self.graph_def_file = temp_freeze
|
||||
self.inference_type = 'FLOAT'
|
||||
self.input_arrays = input_names
|
||||
self.input_shapes = input_shapes
|
||||
self.output_arrays = output_names
|
||||
self.output_file = output_tflite_path
|
||||
self.output_format = 'TFLITE'
|
||||
self.post_training_quantize = True
|
||||
|
||||
default_empty = [
|
||||
'inference_input_type',
|
||||
'mean_values',
|
||||
'default_ranges_min', 'default_ranges_max',
|
||||
'drop_control_dependency',
|
||||
'reorder_across_fake_quant',
|
||||
'change_concat_input_ranges',
|
||||
'allow_custom_ops',
|
||||
'converter_mode',
|
||||
'dump_graphviz_dir',
|
||||
'dump_graphviz_video'
|
||||
]
|
||||
for e in default_empty:
|
||||
self.__dict__[e] = None
|
||||
converter = lite.TFLiteConverter(frozen_graph, input_tensors=inputs.values(), output_tensors=outputs.values())
|
||||
converter.post_training_quantize = True
|
||||
tflite_model = converter.convert()
|
||||
|
||||
with open(output_tflite_path, 'wb') as fout:
|
||||
fout.write(tflite_model)
|
||||
|
||||
flags = TFLiteFlags()
|
||||
tflite_convert._convert_model(flags)
|
||||
os.unlink(temp_freeze)
|
||||
log_info('Exported model for TF Lite engine as {}'.format(os.path.basename(output_tflite_path)))
|
||||
|
||||
log_info('Models exported at %s' % (FLAGS.export_dir))
|
||||
|
@ -857,7 +852,6 @@ def do_single_file_inference(input_file_path):
|
|||
|
||||
checkpoint_path = checkpoint.model_checkpoint_path
|
||||
saver.restore(session, checkpoint_path)
|
||||
|
||||
session.run(outputs['initialize_state'])
|
||||
|
||||
features = audiofile_to_input_vector(input_file_path, Config.n_input, Config.n_context)
|
||||
|
|
|
@ -57,6 +57,7 @@ See the output of `deepspeech -h` for more information on the use of `deepspeech
|
|||
* [Python 3.6](https://www.python.org/)
|
||||
* [Git Large File Storage](https://git-lfs.github.com/)
|
||||
* Mac or Linux environment
|
||||
* Go to [build README](examples/net_framework/README.md) to start building DeepSpeech for Windows from source.
|
||||
|
||||
## Getting the code
|
||||
|
||||
|
@ -203,7 +204,7 @@ npm install deepspeech-gpu
|
|||
|
||||
See the [release notes](https://github.com/mozilla/DeepSpeech/releases) to find which GPUs are supported. Please ensure you have the required [CUDA dependency](#cuda-dependency).
|
||||
|
||||
See [nodejs_wav](examples/nodejs_wav) for an example of how to use the bindings.
|
||||
See [client.js](native_client/javascript/client.js) for an example of how to use the bindings. Or download the [wav example](examples/nodejs_wav).
|
||||
|
||||
### Installing bindings from source
|
||||
|
||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
|||
0.5.0-alpha.1
|
||||
0.5.0-alpha.2
|
||||
|
|
|
@ -8,17 +8,17 @@ import sys
|
|||
sys.path.insert(1, os.path.join(sys.path[0], '..'))
|
||||
|
||||
import csv
|
||||
import sox
|
||||
import tarfile
|
||||
import subprocess
|
||||
import progressbar
|
||||
|
||||
from glob import glob
|
||||
from os import path
|
||||
from sox import Transformer
|
||||
from threading import RLock
|
||||
from multiprocessing.dummy import Pool
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
from util.text import validate_label
|
||||
from util.downloader import maybe_download, SIMPLE_BAR
|
||||
|
||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||
|
@ -66,7 +66,7 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
|
|||
samples.append((row['filename'], row['text']))
|
||||
|
||||
# Mutable counters for the concurrent embedded routine
|
||||
counter = { 'all': 0, 'too_short': 0, 'too_long': 0 }
|
||||
counter = { 'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0 }
|
||||
lock = RLock()
|
||||
num_samples = len(samples)
|
||||
rows = []
|
||||
|
@ -78,9 +78,19 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
|
|||
wav_filename = path.splitext(mp3_filename)[0] + ".wav"
|
||||
_maybe_convert_wav(mp3_filename, wav_filename)
|
||||
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
|
||||
file_size = path.getsize(wav_filename)
|
||||
file_size = -1
|
||||
if path.exists(wav_filename):
|
||||
file_size = path.getsize(wav_filename)
|
||||
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
|
||||
label = validate_label(sample[1])
|
||||
with lock:
|
||||
if int(frames/SAMPLE_RATE*1000/10/2) < len(str(sample[1])):
|
||||
if file_size == -1:
|
||||
# Excluding samples that failed upon conversion
|
||||
counter['failed'] += 1
|
||||
elif label is None:
|
||||
# Excluding samples that failed on label validation
|
||||
counter['invalid_label'] += 1
|
||||
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
|
||||
# Excluding samples that are too short to fit the transcript
|
||||
counter['too_short'] += 1
|
||||
elif frames/SAMPLE_RATE > MAX_SECS:
|
||||
|
@ -88,7 +98,7 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
|
|||
counter['too_long'] += 1
|
||||
else:
|
||||
# This one is good - keep it for the target CSV
|
||||
rows.append((wav_filename, file_size, sample[1]))
|
||||
rows.append((wav_filename, file_size, label))
|
||||
counter['all'] += 1
|
||||
|
||||
print('Importing mp3 files...')
|
||||
|
@ -108,7 +118,11 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
|
|||
for filename, file_size, transcript in bar(rows):
|
||||
writer.writerow({ 'wav_filename': filename, 'wav_filesize': file_size, 'transcript': transcript })
|
||||
|
||||
print('Imported %d samples.' % (counter['all'] - counter['too_short'] - counter['too_long']))
|
||||
print('Imported %d samples.' % (counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long']))
|
||||
if counter['failed'] > 0:
|
||||
print('Skipped %d samples that failed upon conversion.' % counter['failed'])
|
||||
if counter['invalid_label'] > 0:
|
||||
print('Skipped %d samples that failed on transcript validation.' % counter['invalid_label'])
|
||||
if counter['too_short'] > 0:
|
||||
print('Skipped %d samples that were too short to match the transcript.' % counter['too_short'])
|
||||
if counter['too_long'] > 0:
|
||||
|
@ -116,9 +130,12 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
|
|||
|
||||
def _maybe_convert_wav(mp3_filename, wav_filename):
|
||||
if not path.exists(wav_filename):
|
||||
transformer = Transformer()
|
||||
transformer = sox.Transformer()
|
||||
transformer.convert(samplerate=SAMPLE_RATE)
|
||||
transformer.build(mp3_filename, wav_filename)
|
||||
try:
|
||||
transformer.build(mp3_filename, wav_filename)
|
||||
except sox.core.SoxError:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
_download_and_preprocess_data(sys.argv[1])
|
||||
|
|
|
@ -8,28 +8,29 @@ import sys
|
|||
sys.path.insert(1, os.path.join(sys.path[0], '..'))
|
||||
|
||||
import csv
|
||||
import sox
|
||||
import subprocess
|
||||
import progressbar
|
||||
|
||||
from os import path
|
||||
from sox import Transformer
|
||||
from threading import RLock
|
||||
from multiprocessing.dummy import Pool
|
||||
from multiprocessing import cpu_count
|
||||
from util.downloader import SIMPLE_BAR
|
||||
from util.text import validate_label
|
||||
|
||||
'''
|
||||
Broadly speaking, this script takes the audio downloaded from Common Voice
|
||||
for a certain language, in addition to the *.tsv files output by CorporaCeator,
|
||||
for a certain language, in addition to the *.tsv files output by CorporaCreator,
|
||||
and the script formats the data and transcripts to be in a state usable by
|
||||
DeepSpeech.py
|
||||
|
||||
Usage:
|
||||
$ python3 import_cv2.py /path/to/audio/data_dir /path/to/tsv_dir
|
||||
|
||||
Input:
|
||||
Input:
|
||||
(1) audio_dir (string) path to dir of audio downloaded from Common Voice
|
||||
(2) tsv_dir (string) path to dir containing {train,test,dev}.tsv files
|
||||
(2) tsv_dir (string) path to dir containing {train,test,dev}.tsv files
|
||||
which were generated by CorporaCreator
|
||||
|
||||
Ouput:
|
||||
|
@ -53,30 +54,41 @@ def _preprocess_data(audio_dir, tsv_dir):
|
|||
def _maybe_convert_set(audio_dir, input_tsv):
|
||||
output_csv = path.join(audio_dir,os.path.split(input_tsv)[-1].replace('tsv', 'csv'))
|
||||
print("Saving new DeepSpeech-formatted CSV file to: ", output_csv)
|
||||
|
||||
|
||||
# Get audiofile path and transcript for each sentence in tsv
|
||||
samples = []
|
||||
with open(input_tsv) as input_tsv_file:
|
||||
reader = csv.DictReader(input_tsv_file, delimiter='\t')
|
||||
for row in reader:
|
||||
samples.append((row['path'], row['sentence']))
|
||||
|
||||
|
||||
# Keep track of how many samples are good vs. problematic
|
||||
counter = { 'all': 0, 'too_short': 0, 'too_long': 0 }
|
||||
counter = { 'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0 }
|
||||
lock = RLock()
|
||||
num_samples = len(samples)
|
||||
rows = []
|
||||
|
||||
|
||||
def one_sample(sample):
|
||||
""" Take a audio file, and optionally convert it to 16kHz WAV """
|
||||
mp3_filename = path.join(audio_dir, sample[0])
|
||||
if not path.splitext(mp3_filename.lower())[1] == '.mp3':
|
||||
mp3_filename += ".mp3"
|
||||
# Storing wav files next to the mp3 ones - just with a different suffix
|
||||
wav_filename = path.splitext(mp3_filename)[0] + ".wav"
|
||||
_maybe_convert_wav(mp3_filename, wav_filename)
|
||||
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
|
||||
file_size = path.getsize(wav_filename)
|
||||
file_size = -1
|
||||
if path.exists(wav_filename):
|
||||
file_size = path.getsize(wav_filename)
|
||||
frames = int(subprocess.check_output(['soxi', '-s', wav_filename], stderr=subprocess.STDOUT))
|
||||
label = validate_label(sample[1])
|
||||
with lock:
|
||||
if int(frames/SAMPLE_RATE*1000/10/2) < len(str(sample[1])):
|
||||
if file_size == -1:
|
||||
# Excluding samples that failed upon conversion
|
||||
counter['failed'] += 1
|
||||
elif label is None:
|
||||
# Excluding samples that failed on label validation
|
||||
counter['invalid_label'] += 1
|
||||
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
|
||||
# Excluding samples that are too short to fit the transcript
|
||||
counter['too_short'] += 1
|
||||
elif frames/SAMPLE_RATE > MAX_SECS:
|
||||
|
@ -84,9 +96,9 @@ def _maybe_convert_set(audio_dir, input_tsv):
|
|||
counter['too_long'] += 1
|
||||
else:
|
||||
# This one is good - keep it for the target CSV
|
||||
rows.append((wav_filename, file_size, sample[1]))
|
||||
rows.append((wav_filename, file_size, label))
|
||||
counter['all'] += 1
|
||||
|
||||
|
||||
print("Importing mp3 files...")
|
||||
pool = Pool(cpu_count())
|
||||
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
|
||||
|
@ -95,7 +107,7 @@ def _maybe_convert_set(audio_dir, input_tsv):
|
|||
bar.update(num_samples)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
with open(output_csv, 'w') as output_csv_file:
|
||||
print('Writing CSV file for DeepSpeech.py as: ', output_csv)
|
||||
writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
|
||||
|
@ -103,8 +115,12 @@ def _maybe_convert_set(audio_dir, input_tsv):
|
|||
bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
|
||||
for filename, file_size, transcript in bar(rows):
|
||||
writer.writerow({ 'wav_filename': filename, 'wav_filesize': file_size, 'transcript': transcript })
|
||||
|
||||
print('Imported %d samples.' % (counter['all'] - counter['too_short'] - counter['too_long']))
|
||||
|
||||
print('Imported %d samples.' % (counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long']))
|
||||
if counter['failed'] > 0:
|
||||
print('Skipped %d samples that failed upon conversion.' % counter['failed'])
|
||||
if counter['invalid_label'] > 0:
|
||||
print('Skipped %d samples that failed on transcript validation.' % counter['invalid_label'])
|
||||
if counter['too_short'] > 0:
|
||||
print('Skipped %d samples that were too short to match the transcript.' % counter['too_short'])
|
||||
if counter['too_long'] > 0:
|
||||
|
@ -112,9 +128,13 @@ def _maybe_convert_set(audio_dir, input_tsv):
|
|||
|
||||
def _maybe_convert_wav(mp3_filename, wav_filename):
|
||||
if not path.exists(wav_filename):
|
||||
transformer = Transformer()
|
||||
transformer = sox.Transformer()
|
||||
transformer.convert(samplerate=SAMPLE_RATE)
|
||||
transformer.build(mp3_filename, wav_filename)
|
||||
try:
|
||||
transformer.build(mp3_filename, wav_filename)
|
||||
except sox.core.SoxError:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
audio_dir = sys.argv[1]
|
||||
|
|
|
@ -23,7 +23,7 @@ python -u DeepSpeech.py \
|
|||
--train_batch_size 1 \
|
||||
--dev_batch_size 1 \
|
||||
--test_batch_size 1 \
|
||||
--n_hidden 494 \
|
||||
--epoch 75 \
|
||||
--n_hidden 100 \
|
||||
--epoch 200 \
|
||||
--checkpoint_dir "$checkpoint_dir" \
|
||||
"$@"
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
# Language-Specific Data
|
||||
|
||||
This directory contains language-specific data files. Most importantly, you will find here:
|
||||
|
||||
1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt`
|
||||
2. A binary n-gram language model compiled by `kenlm` in `data/lm/lm.binary`
|
||||
3. A trie model compiled by `generate_trie.cpp` in `data/lm/trie`
|
||||
|
||||
For more information on how to create these resources, see `data/lm/README.md`
|
|
@ -20,10 +20,49 @@ sudo apt-get install ffmpeg
|
|||
|
||||
Here is an example for a local audio file:
|
||||
```bash
|
||||
node ./index.js --audio <AUDIO_FILE> --model $HOME/models/output_graph.pbmm --alphabet $HOME/models/alphabet.txt
|
||||
node ./index.js --audio <AUDIO_FILE> \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
|
||||
Here is an example for a remote RTMP-Stream:
|
||||
```bash
|
||||
node ./index.js --audio rtmp://<IP>:1935/live/teststream --model $HOME/models/output_graph.pbmm --alphabet $HOME/models/alphabet.txt
|
||||
node ./index.js --audio rtmp://<IP>:1935/live/teststream \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
|
||||
## Examples
|
||||
Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)).
|
||||
```bash
|
||||
node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
```bash
|
||||
node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
```bash
|
||||
node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
Real time streaming inference in combination with a RTMP server.
|
||||
```bash
|
||||
node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
|
||||
## Notes
|
||||
To get the best result mapped on to your own scenario, it might be helpful to adjust the parameters `VAD_MODE` and `DEBUNCE_TIME`.
|
|
@ -4,11 +4,12 @@ const VAD = require("node-vad");
|
|||
const Ds = require('deepspeech');
|
||||
const argparse = require('argparse');
|
||||
const util = require('util');
|
||||
const { spawn } = require('child_process');
|
||||
|
||||
// These constants control the beam search decoder
|
||||
|
||||
// Beam width used in the CTC decoder when building candidate transcriptions
|
||||
const BEAM_WIDTH = 1024;
|
||||
const BEAM_WIDTH = 500;
|
||||
|
||||
// The alpha hyperparameter of the CTC decoder. Language Model weight
|
||||
const LM_ALPHA = 0.75;
|
||||
|
@ -44,7 +45,7 @@ parser.addArgument(['--model'], {required: true, help: 'Path to the model (proto
|
|||
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
|
||||
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
||||
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
||||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
|
||||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
|
||||
parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'});
|
||||
let args = parser.parseArgs();
|
||||
|
||||
|
@ -67,51 +68,71 @@ if (args['lm'] && args['trie']) {
|
|||
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
||||
}
|
||||
|
||||
const vad = new VAD(VAD.Mode.NORMAL);
|
||||
const voice = {START: true, STOP: false};
|
||||
let sctx = model.setupStream(150, 16000);
|
||||
let state = voice.STOP;
|
||||
// Default initial allocation = 3 seconds := 150
|
||||
const PRE_ALLOC_FRAMES = 150;
|
||||
|
||||
// Default is 16kHz
|
||||
const AUDIO_SAMPLE_RATE = 16000;
|
||||
|
||||
// Defines different thresholds for voice detection
|
||||
// NORMAL: Suitable for high bitrate, low-noise data. May classify noise as voice, too.
|
||||
// LOW_BITRATE: Detection mode optimised for low-bitrate audio.
|
||||
// AGGRESSIVE: Detection mode best suited for somewhat noisy, lower quality audio.
|
||||
// VERY_AGGRESSIVE: Detection mode with lowest miss-rate. Works well for most inputs.
|
||||
const VAD_MODE = VAD.Mode.NORMAL;
|
||||
// const VAD_MODE = VAD.Mode.LOW_BITRATE;
|
||||
// const VAD_MODE = VAD.Mode.AGGRESSIVE;
|
||||
// const VAD_MODE = VAD.Mode.VERY_AGGRESSIVE;
|
||||
|
||||
// Time in milliseconds for debouncing speech active state
|
||||
const DEBOUNCE_TIME = 20;
|
||||
|
||||
// Create voice activity stream
|
||||
const VAD_STREAM = VAD.createStream({
|
||||
mode: VAD_MODE,
|
||||
audioFrequency: AUDIO_SAMPLE_RATE,
|
||||
debounceTime: DEBOUNCE_TIME
|
||||
});
|
||||
|
||||
// Spawn ffmpeg process
|
||||
const ffmpeg = spawn('ffmpeg', [
|
||||
'-hide_banner',
|
||||
'-nostats',
|
||||
'-loglevel', 'fatal',
|
||||
'-i', args['audio'],
|
||||
'-vn',
|
||||
'-acodec', 'pcm_s16le',
|
||||
'-ac', 1,
|
||||
'-ar', AUDIO_SAMPLE_RATE,
|
||||
'-f', 's16le',
|
||||
'pipe:'
|
||||
]);
|
||||
|
||||
let audioLength = 0;
|
||||
let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
|
||||
|
||||
function finishStream() {
|
||||
const model_load_start = process.hrtime();
|
||||
console.error('Running inference.');
|
||||
console.log('Transcription: ', model.finishStream(sctx));
|
||||
const model_load_end = process.hrtime(model_load_start);
|
||||
console.error('Inference took %ds.', totalTime(model_load_end));
|
||||
console.error('Inference took %ds for %ds audio file.', totalTime(model_load_end), audioLength.toPrecision(4));
|
||||
audioLength = 0;
|
||||
}
|
||||
|
||||
let ffmpeg = require('child_process').spawn('ffmpeg', [
|
||||
'-hide_banner',
|
||||
'-nostats',
|
||||
'-loglevel', 'fatal',
|
||||
'-i', args['audio'],
|
||||
'-af', 'highpass=f=200,lowpass=f=3000',
|
||||
'-vn',
|
||||
'-acodec', 'pcm_s16le',
|
||||
'-ac', 1,
|
||||
'-ar', 16000,
|
||||
'-f', 's16le',
|
||||
'pipe:'
|
||||
]);
|
||||
|
||||
ffmpeg.stdout.on('data', chunk => {
|
||||
vad.processAudio(chunk, 16000).then(res => {
|
||||
switch (res) {
|
||||
case VAD.Event.SILENCE:
|
||||
if (state === voice.START) {
|
||||
state = voice.STOP;
|
||||
finishStream();
|
||||
sctx = model.setupStream(150,16000);
|
||||
}
|
||||
break;
|
||||
case VAD.Event.VOICE:
|
||||
state = voice.START;
|
||||
model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
|
||||
break;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
ffmpeg.stdout.on('close', code => {
|
||||
function intermediateDecode() {
|
||||
finishStream();
|
||||
});
|
||||
sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
|
||||
}
|
||||
|
||||
function feedAudioContent(chunk) {
|
||||
audioLength += (chunk.length / 2) * ( 1 / AUDIO_SAMPLE_RATE);
|
||||
model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
|
||||
}
|
||||
|
||||
function processVad(data) {
|
||||
if (data.speech.start||data.speech.state) feedAudioContent(data.audioData)
|
||||
else if (data.speech.end) { feedAudioContent(data.audioData); intermediateDecode() }
|
||||
}
|
||||
|
||||
ffmpeg.stdout.pipe(VAD_STREAM).on('data', processVad);
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import time, logging
|
||||
from datetime import datetime
|
||||
import threading, collections, queue, os, os.path
|
||||
import wave
|
||||
import pyaudio
|
||||
import webrtcvad
|
||||
from halo import Halo
|
||||
import deepspeech
|
||||
import numpy as np
|
||||
import pyaudio
|
||||
import wave
|
||||
import webrtcvad
|
||||
from halo import Halo
|
||||
from scipy import signal
|
||||
|
||||
logging.basicConfig(level=20)
|
||||
|
||||
|
@ -14,28 +15,61 @@ class Audio(object):
|
|||
"""Streams raw audio from microphone. Data is received in a separate thread, and stored in a buffer, to be read from."""
|
||||
|
||||
FORMAT = pyaudio.paInt16
|
||||
RATE = 16000
|
||||
# Network/VAD rate-space
|
||||
RATE_PROCESS = 16000
|
||||
CHANNELS = 1
|
||||
BLOCKS_PER_SECOND = 50
|
||||
BLOCK_SIZE = int(RATE / float(BLOCKS_PER_SECOND))
|
||||
|
||||
def __init__(self, callback=None):
|
||||
def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS):
|
||||
def proxy_callback(in_data, frame_count, time_info, status):
|
||||
callback(in_data)
|
||||
return (None, pyaudio.paContinue)
|
||||
if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data)
|
||||
self.buffer_queue = queue.Queue()
|
||||
self.sample_rate = self.RATE
|
||||
self.block_size = self.BLOCK_SIZE
|
||||
self.device = device
|
||||
self.input_rate = input_rate
|
||||
self.sample_rate = self.RATE_PROCESS
|
||||
self.block_size = int(self.RATE_PROCESS / float(self.BLOCKS_PER_SECOND))
|
||||
self.block_size_input = int(self.input_rate / float(self.BLOCKS_PER_SECOND))
|
||||
self.pa = pyaudio.PyAudio()
|
||||
self.stream = self.pa.open(format=self.FORMAT,
|
||||
channels=self.CHANNELS,
|
||||
rate=self.sample_rate,
|
||||
input=True,
|
||||
frames_per_buffer=self.block_size,
|
||||
stream_callback=proxy_callback)
|
||||
|
||||
kwargs = {
|
||||
'format': self.FORMAT,
|
||||
'channels': self.CHANNELS,
|
||||
'rate': self.input_rate,
|
||||
'input': True,
|
||||
'frames_per_buffer': self.block_size_input,
|
||||
'stream_callback': proxy_callback,
|
||||
}
|
||||
|
||||
# if not default device
|
||||
if self.device:
|
||||
kwargs['input_device_index'] = self.device
|
||||
|
||||
self.stream = self.pa.open(**kwargs)
|
||||
self.stream.start_stream()
|
||||
|
||||
def resample(self, data, input_rate):
|
||||
"""
|
||||
Microphone may not support our native processing sampling rate, so
|
||||
resample from input_rate to RATE_PROCESS here for webrtcvad and
|
||||
deepspeech
|
||||
|
||||
Args:
|
||||
data (binary): Input audio stream
|
||||
input_rate (int): Input audio rate to resample from
|
||||
"""
|
||||
data16 = np.fromstring(string=data, dtype=np.int16)
|
||||
resample_size = int(len(data16) / self.input_rate * self.RATE_PROCESS)
|
||||
resample = signal.resample(data16, resample_size)
|
||||
resample16 = np.array(resample, dtype=np.int16)
|
||||
return resample16.tostring()
|
||||
|
||||
def read_resampled(self):
|
||||
"""Return a block of audio data resampled to 16000hz, blocking if necessary."""
|
||||
return self.resample(data=self.buffer_queue.get(),
|
||||
input_rate=self.input_rate)
|
||||
|
||||
def read(self):
|
||||
"""Return a block of audio data, blocking if necessary."""
|
||||
return self.buffer_queue.get()
|
||||
|
@ -58,17 +92,22 @@ class Audio(object):
|
|||
wf.writeframes(data)
|
||||
wf.close()
|
||||
|
||||
|
||||
class VADAudio(Audio):
|
||||
"""Filter & segment audio with voice activity detection."""
|
||||
|
||||
def __init__(self, aggressiveness=3):
|
||||
super().__init__()
|
||||
def __init__(self, aggressiveness=3, device=None, input_rate=None):
|
||||
super().__init__(device=device, input_rate=input_rate)
|
||||
self.vad = webrtcvad.Vad(aggressiveness)
|
||||
|
||||
def frame_generator(self):
|
||||
"""Generator that yields all audio frames from microphone."""
|
||||
while True:
|
||||
yield self.read()
|
||||
if self.input_rate == self.RATE_PROCESS:
|
||||
while True:
|
||||
yield self.read()
|
||||
else:
|
||||
while True:
|
||||
yield self.read_resampled()
|
||||
|
||||
def vad_collector(self, padding_ms=300, ratio=0.75, frames=None):
|
||||
"""Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None.
|
||||
|
@ -121,7 +160,9 @@ def main(ARGS):
|
|||
model.enableDecoderWithLM(ARGS.alphabet, ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta)
|
||||
|
||||
# Start audio with VAD
|
||||
vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness)
|
||||
vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
|
||||
device=ARGS.device,
|
||||
input_rate=ARGS.rate)
|
||||
print("Listening (ctrl-C to exit)...")
|
||||
frames = vad_audio.vad_collector()
|
||||
|
||||
|
@ -148,6 +189,7 @@ def main(ARGS):
|
|||
|
||||
if __name__ == '__main__':
|
||||
BEAM_WIDTH = 500
|
||||
DEFAULT_SAMPLE_RATE = 16000
|
||||
LM_ALPHA = 0.75
|
||||
LM_BETA = 1.85
|
||||
N_FEATURES = 26
|
||||
|
@ -171,6 +213,10 @@ if __name__ == '__main__':
|
|||
help="Path to the language model binary file. Default: lm.binary")
|
||||
parser.add_argument('-t', '--trie', default='trie',
|
||||
help="Path to the language model trie file created with native_client/generate_trie. Default: trie")
|
||||
parser.add_argument('-d', '--device', type=int, default=None,
|
||||
help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device()")
|
||||
parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
|
||||
help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
|
||||
parser.add_argument('-nf', '--n_features', type=int, default=N_FEATURES,
|
||||
help=f"Number of MFCC features to use. Default: {N_FEATURES}")
|
||||
parser.add_argument('-nc', '--n_context', type=int, default=N_CONTEXT,
|
||||
|
|
|
@ -11,34 +11,20 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSp
|
|||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Any CPU = Release|Any CPU
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|x64
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.ActiveCfg = Release|x64
|
||||
{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.Build.0 = Release|x64
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.Build.0 = Debug|x64
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.ActiveCfg = Release|x64
|
||||
{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.Build.0 = Release|x64
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Debug|x64.Build.0 = Debug|x64
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|x64.ActiveCfg = Release|x64
|
||||
{312965E5-C4F6-4D95-BA64-79906B8BC7AC}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace DeepSpeechClient
|
|||
/// <summary>
|
||||
/// Client of the Mozilla's deepspeech implementation.
|
||||
/// </summary>
|
||||
public class DeepSpeech : IDeepSpeech, IDisposable
|
||||
public class DeepSpeech : IDeepSpeech
|
||||
{
|
||||
private unsafe ModelState** _modelStatePP;
|
||||
private unsafe ModelState* _modelStateP;
|
||||
|
|
|
@ -1,32 +1,91 @@
|
|||
namespace DeepSpeechClient.Interfaces
|
||||
using System;
|
||||
|
||||
namespace DeepSpeechClient.Interfaces
|
||||
{
|
||||
public interface IDeepSpeech
|
||||
/// <summary>
|
||||
/// Client interface of the Mozilla's deepspeech implementation.
|
||||
/// </summary>
|
||||
public interface IDeepSpeech : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Prints the versions of Tensorflow and DeepSpeech.
|
||||
/// </summary>
|
||||
void PrintVersions();
|
||||
|
||||
/// <summary>
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||
/// <returns>Zero on success, non-zero on failure.</returns>
|
||||
unsafe int CreateModel(string aModelPath, uint aNCep,
|
||||
uint aNContext,
|
||||
string aAlphabetConfigPath,
|
||||
uint aBeamWidth);
|
||||
|
||||
/// <summary>
|
||||
/// Enable decoding using beam scoring with a KenLM language model.
|
||||
/// </summary>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||
/// <returns>Zero on success, non-zero on failure (invalid arguments).</returns>
|
||||
unsafe int EnableDecoderWithLM(string aAlphabetConfigPath,
|
||||
string aLMPath,
|
||||
string aTriePath,
|
||||
float aLMAlpha,
|
||||
float aLMBeta);
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
/// <param name="aSampleRate">The sample-rate of the audio signal.</param>
|
||||
/// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns>
|
||||
unsafe string SpeechToText(short[] aBuffer,
|
||||
uint aBufferSize,
|
||||
uint aSampleRate);
|
||||
|
||||
/// <summary>
|
||||
/// Destroy a streaming state without decoding the computed logits.
|
||||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
unsafe void DiscardStream();
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
|
||||
/// One timestep is equivalent to two window lengths(20ms).
|
||||
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
|
||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||
/// <returns>Zero for success, non-zero on failure</returns>
|
||||
unsafe int SetupStream(uint aPreAllocFrames, uint aSampleRate);
|
||||
|
||||
/// <summary>
|
||||
/// Feeds audio samples to an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate.</param>
|
||||
unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference. This is an expensive process as the decoder implementation isn't
|
||||
/// currently capable of streaming, so it always starts from the beginning of the audio.
|
||||
/// </summary>
|
||||
/// <returns>The STT intermediate result. The user is responsible for freeing the string.</returns>
|
||||
unsafe string IntermediateDecode();
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
|
||||
/// </summary>
|
||||
/// <returns>The STT result. The user is responsible for freeing the string.</returns>
|
||||
unsafe string FinishStream();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using DeepSpeechClient;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using NAudio.Wave;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
@ -43,7 +44,7 @@ namespace CSharpExamples
|
|||
|
||||
Stopwatch stopwatch = new Stopwatch();
|
||||
|
||||
using (DeepSpeech sttClient = new DeepSpeech())
|
||||
using (IDeepSpeech sttClient = new DeepSpeech())
|
||||
{
|
||||
var result = 1;
|
||||
Console.WriteLine("Loading model...");
|
||||
|
@ -109,7 +110,6 @@ namespace CSharpExamples
|
|||
Console.WriteLine("Error loding the model.");
|
||||
}
|
||||
}
|
||||
Console.ReadKey();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
using CSCore.CoreAudioAPI;
|
||||
using CSCore.SoundIn;
|
||||
using CSCore.Streams;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using Microsoft.Win32;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
|
@ -19,7 +20,7 @@ namespace DeepSpeechWPF
|
|||
/// </summary>
|
||||
public partial class MainWindow : Window
|
||||
{
|
||||
private readonly DeepSpeechClient.DeepSpeech _sttClient;
|
||||
private readonly IDeepSpeech _sttClient;
|
||||
|
||||
private const uint N_CEP = 26;
|
||||
private const uint N_CONTEXT = 9;
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<NativeLibs Include="$(MSBuildThisFileDirectory)\*.so" />
|
||||
<None Include="@(NativeLibs)">
|
||||
<Link>%(FileName)%(Extension)</Link>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -0,0 +1,21 @@
|
|||
<?xml version="1.0"?>
|
||||
<package>
|
||||
<metadata>
|
||||
<id>$NUPKG_ID</id>
|
||||
<version>$NUPKG_VERSION</version>
|
||||
<title>DeepSpeech</title>
|
||||
<authors>Mozilla</authors>
|
||||
<owners>Mozilla</owners>
|
||||
<license type="expression">MPL-2.0</license>
|
||||
<projectUrl>http://github.com/mozilla/DeepSpeech</projectUrl>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>A library for running inference with a DeepSpeech model</description>
|
||||
<copyright>Copyright (c) 2019 Mozilla Corporation</copyright>
|
||||
<tags>native speech speech_recognition</tags>
|
||||
</metadata>
|
||||
<files>
|
||||
<file src="build\**" target="build/"/>
|
||||
<file src="lib\**" target="lib/"/>
|
||||
<file src="tools\**" target="tools/"/>
|
||||
</files>
|
||||
</package>
|
|
@ -0,0 +1,134 @@
|
|||
# Building DeepSpeech native client for Windows
|
||||
|
||||
Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the `native_client`.
|
||||
|
||||
**Table of Contents**
|
||||
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Getting the code](#getting-the-code)
|
||||
- [Configuring the paths](#configuring-the-paths)
|
||||
- [Adding environment variables](#adding-environment-variables)
|
||||
- [MSYS2 paths](#msys2-paths)
|
||||
- [BAZEL path](#bazel-path)
|
||||
- [Python path](#python-path)
|
||||
- [CUDA paths](#cuda-paths)
|
||||
- [Building the native_client](#building-the-native_client)
|
||||
- [Build for CPU](#cpu)
|
||||
- [Build with CUDA support](#gpu-with-cuda)
|
||||
- [Using the generated library](#using-the-generated-library)
|
||||
## Prerequisites
|
||||
|
||||
* [Python 3.6](https://www.python.org/)
|
||||
* [Git Large File Storage](https://git-lfs.github.com/)
|
||||
* [MSYS2(x86_64)](https://www.msys2.org/)
|
||||
* [Bazel v0.17.2](https://github.com/bazelbuild/bazel/releases)
|
||||
* [Windows 10 SDK](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk)
|
||||
* Windows 10
|
||||
* [Visual Studio 2017 Community](https://visualstudio.microsoft.com/vs/community/)
|
||||
|
||||
Inside the Visual Studio Installer enable `MS Build Tools` and `VC++ 2015.3 v14.00 (v140) toolset for desktop`.
|
||||
|
||||
If you want to enable CUDA support you need to install:
|
||||
|
||||
* [CUDA 9.0 and cuDNN 7.3.1](https://developer.nvidia.com/cuda-90-download-archive)
|
||||
|
||||
It may compile with other versions, as we don't extensively test other versions, we highly recommend sticking to the recommended versions in order to avoid compilation errors caused by incompatible versions.
|
||||
|
||||
## Getting the code
|
||||
|
||||
We need to clone `mozilla/DeepSpeech` and `mozilla/tensorflow`.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/mozilla/DeepSpeech
|
||||
```
|
||||
|
||||
```bash
|
||||
git clone https://github.com/mozilla/tensorflow
|
||||
```
|
||||
|
||||
## Configuring the paths
|
||||
|
||||
We need to create a symbolic link, for this example let's suppose that we cloned into `D:\cloned` and now the structure looks like:
|
||||
|
||||
.
|
||||
├── D:\
|
||||
│ ├── cloned # Contains DeepSpeech and tensorflow side by side
|
||||
│ │ ├── DeepSpeech # Root of the cloned DeepSpeech
|
||||
│ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow
|
||||
└── ...
|
||||
|
||||
Change your path accordingly to your path structure, for the structure above we are going to use the following command:
|
||||
|
||||
```bash
|
||||
mklink /d "D:\cloned\tensorflow\native_client" "D:\cloned\DeepSpeech\native_client"
|
||||
```
|
||||
|
||||
## Adding environment variables
|
||||
|
||||
After you have installed the requirements there are few environment variables that we need to add to our `PATH` variable of the system variables.
|
||||
|
||||
#### MSYS2 paths
|
||||
|
||||
For MSYS2 we need to add `bin` directory, if you installed in the default route the path that we need to add should looks like `C:\msys64\usr\bin`. Now we can run `pacman`:
|
||||
|
||||
```bash
|
||||
pacman -Syu
|
||||
```
|
||||
|
||||
```bash
|
||||
pacman -Su
|
||||
```
|
||||
|
||||
```bash
|
||||
pacman -S patch unzip
|
||||
```
|
||||
|
||||
#### BAZEL path
|
||||
|
||||
For BAZEL we need to add the path to the executable, make sure you rename the executable to `bazel`.
|
||||
|
||||
To check the version installed you can run:
|
||||
|
||||
```bash
|
||||
bazel version
|
||||
```
|
||||
|
||||
#### PYTHON path
|
||||
|
||||
Add your `python.exe` path to the `PATH` variable.
|
||||
|
||||
|
||||
#### CUDA paths
|
||||
|
||||
If you run CUDA enabled `native_client` we need to add the following to the `PATH` variable.
|
||||
|
||||
```
|
||||
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\bin
|
||||
```
|
||||
|
||||
### Building the native_client
|
||||
|
||||
There's one last command to run before building, you need to run the [configure.py](https://github.com/mozilla/tensorflow/blob/master/configure.py) inside `tensorflow` cloned directory.
|
||||
|
||||
At this point we are ready to start building the `native_client`, go to `tensorflow` directory that you cloned, following our examples should be `D:\cloned\tensorflow`.
|
||||
|
||||
#### CPU
|
||||
We will add AVX/AVX2 support in the command, please make sure that your CPU supports these instructions before adding the flags, if not you can remove them.
|
||||
|
||||
```bash
|
||||
bazel build -c opt --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so
|
||||
```
|
||||
|
||||
#### GPU with CUDA
|
||||
If you enabled CUDA in [configure.py](https://github.com/mozilla/tensorflow/blob/master/configure.py) configuration command now you can add `--config=cuda` to compile with CUDA support.
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config=cuda --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so
|
||||
```
|
||||
|
||||
Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally you should see it stops and shows the path to the generated `libdeepspeech.so`.
|
||||
|
||||
|
||||
## Using the generated library
|
||||
|
||||
As for now we can only use the generated `libdeepspeech.so` with the C# clients, go to [DeepSpeech/examples/net_framework/CSharpExamples/](https://github.com/mozilla/DeepSpeech/tree/master/examples/net_framework/CSharpExamples) in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy `libdeepspeech.so` to the generated `x64/Debug` or `x64/Release` directory.
|
|
@ -0,0 +1,59 @@
|
|||
# NodeJS voice recognition example using Mozilla DeepSpeech
|
||||
|
||||
Download the pre-trained model (1.8GB):
|
||||
|
||||
```
|
||||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz
|
||||
tar xvfz deepspeech-0.4.1-models.tar.gz
|
||||
```
|
||||
|
||||
Edit references to models path if necessary:
|
||||
|
||||
```
|
||||
let modelPath = './models/output_graph.pbmm';
|
||||
let alphabetPath = './models/alphabet.txt';
|
||||
let lmPath = './models/lm.binary';
|
||||
let triePath = './models/trie';
|
||||
```
|
||||
|
||||
Install Sox (for .wav file loading):
|
||||
|
||||
```
|
||||
brew install sox
|
||||
```
|
||||
|
||||
Download test audio files:
|
||||
|
||||
```
|
||||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz
|
||||
tar xfvz audio-0.4.1.tar.gz
|
||||
```
|
||||
|
||||
Install NPM dependencies:
|
||||
|
||||
```
|
||||
npm install
|
||||
```
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
node index.js
|
||||
```
|
||||
|
||||
Result should be something like:
|
||||
|
||||
```
|
||||
audio length 1.975
|
||||
result: experience proves this
|
||||
|
||||
```
|
||||
|
||||
Try other wav files with an argument:
|
||||
|
||||
```
|
||||
node index.js audio/2830-3980-0043.wav
|
||||
node index.js audio/8455-210777-0068.wav
|
||||
node index.js audio/4507-16021-0012.wav
|
||||
```
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
const DeepSpeech = require('deepspeech');
|
||||
const Fs = require('fs');
|
||||
const Sox = require('sox-stream');
|
||||
const MemoryStream = require('memory-stream');
|
||||
const Duplex = require('stream').Duplex;
|
||||
const Wav = require('node-wav');
|
||||
|
||||
const BEAM_WIDTH = 1024;
|
||||
const N_FEATURES = 26;
|
||||
const N_CONTEXT = 9;
|
||||
let modelPath = './models/output_graph.pbmm';
|
||||
let alphabetPath = './models/alphabet.txt';
|
||||
|
||||
let model = new DeepSpeech.Model(modelPath, N_FEATURES, N_CONTEXT, alphabetPath, BEAM_WIDTH);
|
||||
|
||||
const LM_ALPHA = 0.75;
|
||||
const LM_BETA = 1.85;
|
||||
let lmPath = './models/lm.binary';
|
||||
let triePath = './models/trie';
|
||||
|
||||
model.enableDecoderWithLM(alphabetPath, lmPath, triePath, LM_ALPHA, LM_BETA);
|
||||
|
||||
let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';
|
||||
|
||||
if (!Fs.existsSync(audioFile)) {
|
||||
console.log('file missing:', audioFile);
|
||||
process.exit();
|
||||
}
|
||||
|
||||
const buffer = Fs.readFileSync(audioFile);
|
||||
const result = Wav.decode(buffer);
|
||||
|
||||
if (result.sampleRate < 16000) {
|
||||
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
|
||||
}
|
||||
|
||||
function bufferToStream(buffer) {
|
||||
let stream = new Duplex();
|
||||
stream.push(buffer);
|
||||
stream.push(null);
|
||||
return stream;
|
||||
}
|
||||
|
||||
let audioStream = new MemoryStream();
|
||||
bufferToStream(buffer).
|
||||
pipe(Sox({
|
||||
global: {
|
||||
'no-dither': true,
|
||||
},
|
||||
output: {
|
||||
bits: 16,
|
||||
rate: 16000,
|
||||
channels: 1,
|
||||
encoding: 'signed-integer',
|
||||
endian: 'little',
|
||||
compression: 0.0,
|
||||
type: 'raw'
|
||||
}
|
||||
})).
|
||||
pipe(audioStream);
|
||||
|
||||
audioStream.on('finish', () => {
|
||||
|
||||
let audioBuffer = audioStream.toBuffer();
|
||||
|
||||
const audioLength = (audioBuffer.length / 2) * ( 1 / 16000);
|
||||
console.log('audio length', audioLength);
|
||||
|
||||
let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000);
|
||||
|
||||
console.log('result:', result);
|
||||
});
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "deepspeech-nodejs_wav",
|
||||
"version": "1.0.0",
|
||||
"description": "Simple audio processing",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"start": "node ./index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"argparse": "^1.0.10",
|
||||
"deepspeech": "^0.4.1",
|
||||
"node-wav": "0.0.2",
|
||||
"sox-stream": "^2.0.3",
|
||||
"util": "^0.11.1"
|
||||
},
|
||||
"license": "Public domain"
|
||||
}
|
|
@ -13,25 +13,25 @@
|
|||
|
||||
include definitions.mk
|
||||
|
||||
default: deepspeech
|
||||
default: $(DEEPSPEECH_BIN)
|
||||
|
||||
clean:
|
||||
clean: bindings-clean
|
||||
rm -f deepspeech
|
||||
|
||||
deepspeech: client.cc
|
||||
$(CXX) -std=c++11 -o deepspeech $(CFLAGS) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS)
|
||||
$(DEEPSPEECH_BIN): client.cc
|
||||
$(CXX) $(CFLAGS) $(CFLAGS_DEEPSPEECH) $(SOX_CFLAGS) client.cc $(LDFLAGS) $(SOX_LDFLAGS)
|
||||
ifeq ($(OS),Darwin)
|
||||
install_name_tool -change $$TASKCLUSTER_TASK_DIR/homebrew/opt/sox/lib/libsox.3.dylib @rpath/libsox.3.dylib deepspeech
|
||||
install_name_tool -change bazel-out/local-opt/bin/native_client/libdeepspeech.so @rpath/libdeepspeech.so deepspeech
|
||||
endif
|
||||
|
||||
run: deepspeech
|
||||
run: $(DEEPSPEECH_BIN)
|
||||
${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./deepspeech ${ARGS}
|
||||
|
||||
debug: deepspeech
|
||||
debug: $(DEEPSPEECH_BIN)
|
||||
${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} gdb --args ./deepspeech ${ARGS}
|
||||
|
||||
install: deepspeech
|
||||
install: $(DEEPSPEECH_BIN)
|
||||
install -d ${PREFIX}/lib
|
||||
install -m 0644 ${TFDIR}/bazel-bin/native_client/libdeepspeech.so ${PREFIX}/lib/
|
||||
install -d ${PREFIX}/bin
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
#ifndef __ARGS_H__
|
||||
#define __ARGS_H__
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include "getopt_win.h"
|
||||
#else
|
||||
#include <getopt.h>
|
||||
#endif
|
||||
#include <iostream>
|
||||
|
||||
#include "deepspeech.h"
|
||||
|
|
|
@ -2,22 +2,33 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#ifndef __ANDROID__
|
||||
#include <sox.h>
|
||||
#endif // __ANDROID__
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#if defined(__ANDROID__) || defined(_MSC_VER)
|
||||
#define NO_SOX
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define NO_DIR
|
||||
#endif
|
||||
|
||||
#ifndef NO_SOX
|
||||
#include <sox.h>
|
||||
#endif
|
||||
|
||||
#ifndef NO_DIR
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif // NO_DIR
|
||||
|
||||
#include "deepspeech.h"
|
||||
#include "args.h"
|
||||
|
||||
|
@ -61,7 +72,7 @@ GetAudioBuffer(const char* path)
|
|||
{
|
||||
ds_audio_buffer res = {0};
|
||||
|
||||
#ifndef __ANDROID__
|
||||
#ifndef NO_SOX
|
||||
sox_format_t* input = sox_open_read(path, NULL, NULL, NULL);
|
||||
assert(input);
|
||||
|
||||
|
@ -150,9 +161,9 @@ GetAudioBuffer(const char* path)
|
|||
// Close sox handles
|
||||
sox_close(output);
|
||||
sox_close(input);
|
||||
#endif // __ANDROID__
|
||||
#endif // NO_SOX
|
||||
|
||||
#ifdef __ANDROID__
|
||||
#ifdef NO_SOX
|
||||
// FIXME: Hack and support only 16kHz mono 16-bits PCM
|
||||
FILE* wave = fopen(path, "r");
|
||||
|
||||
|
@ -160,19 +171,15 @@ GetAudioBuffer(const char* path)
|
|||
|
||||
unsigned short audio_format;
|
||||
fseek(wave, 20, SEEK_SET); rv = fread(&audio_format, 2, 1, wave);
|
||||
assert(rv == 2);
|
||||
|
||||
unsigned short num_channels;
|
||||
fseek(wave, 22, SEEK_SET); rv = fread(&num_channels, 2, 1, wave);
|
||||
assert(rv == 2);
|
||||
|
||||
unsigned int sample_rate;
|
||||
fseek(wave, 24, SEEK_SET); rv = fread(&sample_rate, 4, 1, wave);
|
||||
assert(rv == 2);
|
||||
|
||||
unsigned short bits_per_sample;
|
||||
fseek(wave, 34, SEEK_SET); rv = fread(&bits_per_sample, 2, 1, wave);
|
||||
assert(rv == 2);
|
||||
|
||||
assert(audio_format == 1); // 1 is PCM
|
||||
assert(num_channels == 1); // MONO
|
||||
|
@ -185,16 +192,14 @@ GetAudioBuffer(const char* path)
|
|||
fprintf(stderr, "bits_per_sample=%d\n", bits_per_sample);
|
||||
|
||||
fseek(wave, 40, SEEK_SET); rv = fread(&res.buffer_size, 4, 1, wave);
|
||||
assert(rv == 2);
|
||||
fprintf(stderr, "res.buffer_size=%ld\n", res.buffer_size);
|
||||
|
||||
fseek(wave, 44, SEEK_SET);
|
||||
res.buffer = (char*)malloc(sizeof(char) * res.buffer_size);
|
||||
rv = fread(res.buffer, sizeof(char), res.buffer_size, wave);
|
||||
assert(rv == res.buffer_size);
|
||||
|
||||
fclose(wave);
|
||||
#endif // __ANDROID__
|
||||
#endif // NO_SOX
|
||||
|
||||
#ifdef __APPLE__
|
||||
res.buffer_size = (size_t)(output->olength * 2);
|
||||
|
@ -261,8 +266,10 @@ main(int argc, char **argv)
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef NO_SOX
|
||||
// Initialise SOX
|
||||
assert(sox_init() == SOX_SUCCESS);
|
||||
#endif
|
||||
|
||||
struct stat wav_info;
|
||||
if (0 != stat(audio, &wav_info)) {
|
||||
|
@ -270,11 +277,14 @@ main(int argc, char **argv)
|
|||
}
|
||||
|
||||
switch (wav_info.st_mode & S_IFMT) {
|
||||
#ifndef _MSC_VER
|
||||
case S_IFLNK:
|
||||
#endif
|
||||
case S_IFREG:
|
||||
ProcessFile(ctx, audio, show_times);
|
||||
break;
|
||||
|
||||
#ifndef NO_DIR
|
||||
case S_IFDIR:
|
||||
{
|
||||
printf("Running on directory %s\n", audio);
|
||||
|
@ -297,16 +307,17 @@ main(int argc, char **argv)
|
|||
closedir(wav_dir);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
printf("Unexpected type for %s: %d\n", audio, (wav_info.st_mode & S_IFMT));
|
||||
break;
|
||||
}
|
||||
|
||||
#ifndef __ANDROID__
|
||||
#ifndef NO_SOX
|
||||
// Deinitialise and quit
|
||||
sox_quit();
|
||||
#endif // __ANDROID__
|
||||
#endif // NO_SOX
|
||||
|
||||
DS_DestroyModel(ctx);
|
||||
|
||||
|
|
|
@ -5,6 +5,17 @@ TFDIR ?= $(abspath $(NC_DIR)/../../tensorflow)
|
|||
PREFIX ?= /usr/local
|
||||
SO_SEARCH ?= $(TFDIR)/bazel-bin/
|
||||
|
||||
TOOL_AS := as
|
||||
TOOL_CC := gcc
|
||||
TOOL_CXX := c++
|
||||
TOOL_LD := ld
|
||||
TOOL_LDD := ldd
|
||||
|
||||
DEEPSPEECH_BIN := deepspeech
|
||||
CFLAGS_DEEPSPEECH := -std=c++11 -o $(DEEPSPEECH_BIN)
|
||||
LINK_DEEPSPEECH := -ldeepspeech
|
||||
LINK_PATH_DEEPSPEECH := -L${TFDIR}/bazel-bin/native_client
|
||||
|
||||
ifeq ($(TARGET),host)
|
||||
TOOLCHAIN :=
|
||||
CFLAGS :=
|
||||
|
@ -18,6 +29,19 @@ PYTHON_PLATFORM_NAME := --plat-name manylinux1_x86_64
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET),host-win)
|
||||
DEEPSPEECH_BIN := deepspeech.exe
|
||||
TOOLCHAIN := '$(VCINSTALLDIR)\bin\amd64\'
|
||||
TOOL_CC := cl.exe
|
||||
TOOL_CXX := cl.exe
|
||||
TOOL_LD := link.exe
|
||||
LINK_DEEPSPEECH := $(TFDIR)\bazel-bin\native_client\libdeepspeech.so.if.lib
|
||||
LINK_PATH_DEEPSPEECH :=
|
||||
CFLAGS_DEEPSPEECH := -nologo -Fe$(DEEPSPEECH_BIN)
|
||||
SOX_CFLAGS :=
|
||||
SOX_LDFLAGS :=
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET),rpi3)
|
||||
TOOLCHAIN ?= ${TFDIR}/bazel-$(shell basename "${TFDIR}")/external/LinaroArmGcc72/bin/arm-linux-gnueabihf-
|
||||
RASPBIAN ?= $(abspath $(NC_DIR)/../multistrap-raspbian-stretch)
|
||||
|
@ -72,15 +96,15 @@ endif
|
|||
|
||||
CFLAGS += $(EXTRA_CFLAGS)
|
||||
CXXFLAGS += $(EXTRA_CXXFLAGS)
|
||||
LIBS := -ldeepspeech $(EXTRA_LIBS)
|
||||
LDFLAGS_DIRS := -L${TFDIR}/bazel-bin/native_client $(EXTRA_LDFLAGS)
|
||||
LIBS := $(LINK_DEEPSPEECH) $(EXTRA_LIBS)
|
||||
LDFLAGS_DIRS := $(LINK_PATH_DEEPSPEECH) $(EXTRA_LDFLAGS)
|
||||
LDFLAGS += $(LDFLAGS_NEEDED) $(LDFLAGS_RPATH) $(LDFLAGS_DIRS) $(LIBS)
|
||||
|
||||
AS := $(TOOLCHAIN)as
|
||||
CC := $(TOOLCHAIN)gcc
|
||||
CXX := $(TOOLCHAIN)c++
|
||||
LD := $(TOOLCHAIN)ld
|
||||
LDD := $(TOOLCHAIN)ldd $(TOOLCHAIN_LDD_OPTS)
|
||||
AS := $(TOOLCHAIN)$(TOOL_AS)
|
||||
CC := $(TOOLCHAIN)$(TOOL_CC)
|
||||
CXX := $(TOOLCHAIN)$(TOOL_CXX)
|
||||
LD := $(TOOLCHAIN)$(TOOL_LD)
|
||||
LDD := $(TOOLCHAIN)$(TOOL_LDD) $(TOOLCHAIN_LDD_OPTS)
|
||||
|
||||
RPATH_PYTHON := '-Wl,-rpath,\$$ORIGIN/lib/' $(LDFLAGS_RPATH)
|
||||
RPATH_NODEJS := '-Wl,-rpath,$$\$$ORIGIN/../'
|
||||
|
|
|
@ -4,6 +4,10 @@ if [ `uname` = "Darwin" ]; then
|
|||
export PATH="/Users/build-user/TaskCluster/Workdir/tasks/tc-workdir/homebrew/opt/coreutils/libexec/gnubin:${PATH}"
|
||||
fi
|
||||
|
||||
if [ `uname -o` = "Msys" ]; then
|
||||
export PATH="/c/Program Files/Git/bin/:${PATH}"
|
||||
fi
|
||||
|
||||
DS_GIT_DIR="$(realpath "$(dirname "$(realpath "$0")")/../.git/")"
|
||||
if [ ! -d "${DS_GIT_DIR}" ]; then
|
||||
return 1
|
||||
|
|
|
@ -0,0 +1,653 @@
|
|||
#ifndef __GETOPT_H__
|
||||
/**
|
||||
* DISCLAIMER
|
||||
* This file is part of the mingw-w64 runtime package.
|
||||
*
|
||||
* The mingw-w64 runtime package and its code is distributed in the hope that it
|
||||
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
|
||||
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
|
||||
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* Sponsored in part by the Defense Advanced Research Projects
|
||||
* Agency (DARPA) and Air Force Research Laboratory, Air Force
|
||||
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Dieter Baron and Thomas Klausner.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma warning(disable:4996);
|
||||
|
||||
#define __GETOPT_H__
|
||||
|
||||
/* All the headers include this file. */
|
||||
#include <crtdefs.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <windows.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
|
||||
|
||||
#ifdef REPLACE_GETOPT
|
||||
int opterr = 1; /* if error message should be printed */
|
||||
int optind = 1; /* index into parent argv vector */
|
||||
int optopt = '?'; /* character checked for validity */
|
||||
#undef optreset /* see getopt.h */
|
||||
#define optreset __mingw_optreset
|
||||
int optreset; /* reset getopt */
|
||||
char *optarg; /* argument associated with option */
|
||||
#endif
|
||||
|
||||
//extern int optind; /* index of first non-option in argv */
|
||||
//extern int optopt; /* single option character, as parsed */
|
||||
//extern int opterr; /* flag to enable built-in diagnostics... */
|
||||
// /* (user may set to zero, to suppress) */
|
||||
//
|
||||
//extern char *optarg; /* pointer to argument of current option */
|
||||
|
||||
#define PRINT_ERROR ((opterr) && (*options != ':'))
|
||||
|
||||
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
|
||||
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
|
||||
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
|
||||
|
||||
/* return values */
|
||||
#define BADCH (int)'?'
|
||||
#define BADARG ((*options == ':') ? (int)':' : (int)'?')
|
||||
#define INORDER (int)1
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
#define __progname __argv[0]
|
||||
#else
|
||||
extern char __declspec(dllimport) *__progname;
|
||||
#endif
|
||||
|
||||
#ifdef __CYGWIN__
|
||||
static char EMSG[] = "";
|
||||
#else
|
||||
#define EMSG ""
|
||||
#endif
|
||||
|
||||
static int getopt_internal(int, char * const *, const char *,
|
||||
const struct option *, int *, int);
|
||||
static int parse_long_options(char * const *, const char *,
|
||||
const struct option *, int *, int);
|
||||
static int gcd(int, int);
|
||||
static void permute_args(int, int, int, char * const *);
|
||||
|
||||
static char *place = EMSG; /* option letter processing */
|
||||
|
||||
/* XXX: set optreset to 1 rather than these two */
|
||||
static int nonopt_start = -1; /* first non option argument (for permute) */
|
||||
static int nonopt_end = -1; /* first option after non options (for permute) */
|
||||
|
||||
/* Error messages */
|
||||
static const char recargchar[] = "option requires an argument -- %c";
|
||||
static const char recargstring[] = "option requires an argument -- %s";
|
||||
static const char ambig[] = "ambiguous option -- %.*s";
|
||||
static const char noarg[] = "option doesn't take an argument -- %.*s";
|
||||
static const char illoptchar[] = "unknown option -- %c";
|
||||
static const char illoptstring[] = "unknown option -- %s";
|
||||
|
||||
static void
|
||||
_vwarnx(const char *fmt,va_list ap)
|
||||
{
|
||||
(void)fprintf(stderr,"%s: ",__progname);
|
||||
if (fmt != NULL)
|
||||
(void)vfprintf(stderr,fmt,ap);
|
||||
(void)fprintf(stderr,"\n");
|
||||
}
|
||||
|
||||
static void
|
||||
warnx(const char *fmt,...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap,fmt);
|
||||
_vwarnx(fmt,ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the greatest common divisor of a and b.
|
||||
*/
|
||||
static int
|
||||
gcd(int a, int b)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = a % b;
|
||||
while (c != 0) {
|
||||
a = b;
|
||||
b = c;
|
||||
c = a % b;
|
||||
}
|
||||
|
||||
return (b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Exchange the block from nonopt_start to nonopt_end with the block
|
||||
* from nonopt_end to opt_end (keeping the same order of arguments
|
||||
* in each block).
|
||||
*/
|
||||
static void
|
||||
permute_args(int panonopt_start, int panonopt_end, int opt_end,
|
||||
char * const *nargv)
|
||||
{
|
||||
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
|
||||
char *swap;
|
||||
|
||||
/*
|
||||
* compute lengths of blocks and number and size of cycles
|
||||
*/
|
||||
nnonopts = panonopt_end - panonopt_start;
|
||||
nopts = opt_end - panonopt_end;
|
||||
ncycle = gcd(nnonopts, nopts);
|
||||
cyclelen = (opt_end - panonopt_start) / ncycle;
|
||||
|
||||
for (i = 0; i < ncycle; i++) {
|
||||
cstart = panonopt_end+i;
|
||||
pos = cstart;
|
||||
for (j = 0; j < cyclelen; j++) {
|
||||
if (pos >= panonopt_end)
|
||||
pos -= nnonopts;
|
||||
else
|
||||
pos += nopts;
|
||||
swap = nargv[pos];
|
||||
/* LINTED const cast */
|
||||
((char **) nargv)[pos] = nargv[cstart];
|
||||
/* LINTED const cast */
|
||||
((char **)nargv)[cstart] = swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REPLACE_GETOPT
|
||||
/*
|
||||
* getopt --
|
||||
* Parse argc/argv argument vector.
|
||||
*
|
||||
* [eventually this will replace the BSD getopt]
|
||||
*/
|
||||
int
|
||||
getopt(int nargc, char * const *nargv, const char *options)
|
||||
{
|
||||
|
||||
/*
|
||||
* We don't pass FLAG_PERMUTE to getopt_internal() since
|
||||
* the BSD getopt(3) (unlike GNU) has never done this.
|
||||
*
|
||||
* Furthermore, since many privileged programs call getopt()
|
||||
* before dropping privileges it makes sense to keep things
|
||||
* as simple (and bug-free) as possible.
|
||||
*/
|
||||
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
|
||||
}
|
||||
#endif /* REPLACE_GETOPT */
|
||||
|
||||
//extern int getopt(int nargc, char * const *nargv, const char *options);
|
||||
|
||||
#ifdef _BSD_SOURCE
|
||||
/*
|
||||
* BSD adds the non-standard `optreset' feature, for reinitialisation
|
||||
* of `getopt' parsing. We support this feature, for applications which
|
||||
* proclaim their BSD heritage, before including this header; however,
|
||||
* to maintain portability, developers are advised to avoid it.
|
||||
*/
|
||||
# define optreset __mingw_optreset
|
||||
extern int optreset;
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* POSIX requires the `getopt' API to be specified in `unistd.h';
|
||||
* thus, `unistd.h' includes this header. However, we do not want
|
||||
* to expose the `getopt_long' or `getopt_long_only' APIs, when
|
||||
* included in this manner. Thus, close the standard __GETOPT_H__
|
||||
* declarations block, and open an additional __GETOPT_LONG_H__
|
||||
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
|
||||
* to declare the extended API.
|
||||
*/
|
||||
#endif /* !defined(__GETOPT_H__) */
|
||||
|
||||
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
|
||||
#define __GETOPT_LONG_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct option /* specification for a long form option... */
|
||||
{
|
||||
const char *name; /* option name, without leading hyphens */
|
||||
int has_arg; /* does it take an argument? */
|
||||
int *flag; /* where to save its status, or NULL */
|
||||
int val; /* its associated status value */
|
||||
};
|
||||
|
||||
enum /* permitted values for its `has_arg' field... */
|
||||
{
|
||||
no_argument = 0, /* option never takes an argument */
|
||||
required_argument, /* option always requires an argument */
|
||||
optional_argument /* option may take an argument */
|
||||
};
|
||||
|
||||
/*
|
||||
* parse_long_options --
|
||||
* Parse long options in argc/argv argument vector.
|
||||
* Returns -1 if short_too is set and the option does not match long_options.
|
||||
*/
|
||||
static int
|
||||
parse_long_options(char * const *nargv, const char *options,
|
||||
const struct option *long_options, int *idx, int short_too)
|
||||
{
|
||||
char *current_argv, *has_equal;
|
||||
size_t current_argv_len;
|
||||
int i, ambiguous, match;
|
||||
|
||||
#define IDENTICAL_INTERPRETATION(_x, _y) \
|
||||
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && \
|
||||
long_options[(_x)].flag == long_options[(_y)].flag && \
|
||||
long_options[(_x)].val == long_options[(_y)].val)
|
||||
|
||||
current_argv = place;
|
||||
match = -1;
|
||||
ambiguous = 0;
|
||||
|
||||
optind++;
|
||||
|
||||
if ((has_equal = strchr(current_argv, '=')) != NULL) {
|
||||
/* argument found (--option=arg) */
|
||||
current_argv_len = has_equal - current_argv;
|
||||
has_equal++;
|
||||
} else
|
||||
current_argv_len = strlen(current_argv);
|
||||
|
||||
for (i = 0; long_options[i].name; i++) {
|
||||
/* find matching long option */
|
||||
if (strncmp(current_argv, long_options[i].name,
|
||||
current_argv_len))
|
||||
continue;
|
||||
|
||||
if (strlen(long_options[i].name) == current_argv_len) {
|
||||
/* exact match */
|
||||
match = i;
|
||||
ambiguous = 0;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If this is a known short option, don't allow
|
||||
* a partial match of a single character.
|
||||
*/
|
||||
if (short_too && current_argv_len == 1)
|
||||
continue;
|
||||
|
||||
if (match == -1) /* partial match */
|
||||
match = i;
|
||||
else if (!IDENTICAL_INTERPRETATION(i, match))
|
||||
ambiguous = 1;
|
||||
}
|
||||
if (ambiguous) {
|
||||
/* ambiguous abbreviation */
|
||||
if (PRINT_ERROR)
|
||||
warnx(ambig, (int)current_argv_len,
|
||||
current_argv);
|
||||
optopt = 0;
|
||||
return (BADCH);
|
||||
}
|
||||
if (match != -1) { /* option found */
|
||||
if (long_options[match].has_arg == no_argument
|
||||
&& has_equal) {
|
||||
if (PRINT_ERROR)
|
||||
warnx(noarg, (int)current_argv_len,
|
||||
current_argv);
|
||||
/*
|
||||
* XXX: GNU sets optopt to val regardless of flag
|
||||
*/
|
||||
if (long_options[match].flag == NULL)
|
||||
optopt = long_options[match].val;
|
||||
else
|
||||
optopt = 0;
|
||||
return (BADARG);
|
||||
}
|
||||
if (long_options[match].has_arg == required_argument ||
|
||||
long_options[match].has_arg == optional_argument) {
|
||||
if (has_equal)
|
||||
optarg = has_equal;
|
||||
else if (long_options[match].has_arg ==
|
||||
required_argument) {
|
||||
/*
|
||||
* optional argument doesn't use next nargv
|
||||
*/
|
||||
optarg = nargv[optind++];
|
||||
}
|
||||
}
|
||||
if ((long_options[match].has_arg == required_argument)
|
||||
&& (optarg == NULL)) {
|
||||
/*
|
||||
* Missing argument; leading ':' indicates no error
|
||||
* should be generated.
|
||||
*/
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargstring,
|
||||
current_argv);
|
||||
/*
|
||||
* XXX: GNU sets optopt to val regardless of flag
|
||||
*/
|
||||
if (long_options[match].flag == NULL)
|
||||
optopt = long_options[match].val;
|
||||
else
|
||||
optopt = 0;
|
||||
--optind;
|
||||
return (BADARG);
|
||||
}
|
||||
} else { /* unknown option */
|
||||
if (short_too) {
|
||||
--optind;
|
||||
return (-1);
|
||||
}
|
||||
if (PRINT_ERROR)
|
||||
warnx(illoptstring, current_argv);
|
||||
optopt = 0;
|
||||
return (BADCH);
|
||||
}
|
||||
if (idx)
|
||||
*idx = match;
|
||||
if (long_options[match].flag) {
|
||||
*long_options[match].flag = long_options[match].val;
|
||||
return (0);
|
||||
} else
|
||||
return (long_options[match].val);
|
||||
#undef IDENTICAL_INTERPRETATION
|
||||
}
|
||||
|
||||
/*
|
||||
* getopt_internal --
|
||||
* Parse argc/argv argument vector. Called by user level routines.
|
||||
*/
|
||||
static int
|
||||
getopt_internal(int nargc, char * const *nargv, const char *options,
|
||||
const struct option *long_options, int *idx, int flags)
|
||||
{
|
||||
char *oli; /* option letter list index */
|
||||
int optchar, short_too;
|
||||
static int posixly_correct = -1;
|
||||
|
||||
if (options == NULL)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* XXX Some GNU programs (like cvs) set optind to 0 instead of
|
||||
* XXX using optreset. Work around this braindamage.
|
||||
*/
|
||||
if (optind == 0)
|
||||
optind = optreset = 1;
|
||||
|
||||
/*
|
||||
* Disable GNU extensions if POSIXLY_CORRECT is set or options
|
||||
* string begins with a '+'.
|
||||
*
|
||||
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
|
||||
* optreset != 0 for GNU compatibility.
|
||||
*/
|
||||
if (posixly_correct == -1 || optreset != 0)
|
||||
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
|
||||
if (*options == '-')
|
||||
flags |= FLAG_ALLARGS;
|
||||
else if (posixly_correct || *options == '+')
|
||||
flags &= ~FLAG_PERMUTE;
|
||||
if (*options == '+' || *options == '-')
|
||||
options++;
|
||||
|
||||
optarg = NULL;
|
||||
if (optreset)
|
||||
nonopt_start = nonopt_end = -1;
|
||||
start:
|
||||
if (optreset || !*place) { /* update scanning pointer */
|
||||
optreset = 0;
|
||||
if (optind >= nargc) { /* end of argument vector */
|
||||
place = EMSG;
|
||||
if (nonopt_end != -1) {
|
||||
/* do permutation, if we have to */
|
||||
permute_args(nonopt_start, nonopt_end,
|
||||
optind, nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
else if (nonopt_start != -1) {
|
||||
/*
|
||||
* If we skipped non-options, set optind
|
||||
* to the first of them.
|
||||
*/
|
||||
optind = nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
return (-1);
|
||||
}
|
||||
if (*(place = nargv[optind]) != '-' ||
|
||||
(place[1] == '\0' && strchr(options, '-') == NULL)) {
|
||||
place = EMSG; /* found non-option */
|
||||
if (flags & FLAG_ALLARGS) {
|
||||
/*
|
||||
* GNU extension:
|
||||
* return non-option as argument to option 1
|
||||
*/
|
||||
optarg = nargv[optind++];
|
||||
return (INORDER);
|
||||
}
|
||||
if (!(flags & FLAG_PERMUTE)) {
|
||||
/*
|
||||
* If no permutation wanted, stop parsing
|
||||
* at first non-option.
|
||||
*/
|
||||
return (-1);
|
||||
}
|
||||
/* do permutation */
|
||||
if (nonopt_start == -1)
|
||||
nonopt_start = optind;
|
||||
else if (nonopt_end != -1) {
|
||||
permute_args(nonopt_start, nonopt_end,
|
||||
optind, nargv);
|
||||
nonopt_start = optind -
|
||||
(nonopt_end - nonopt_start);
|
||||
nonopt_end = -1;
|
||||
}
|
||||
optind++;
|
||||
/* process next argument */
|
||||
goto start;
|
||||
}
|
||||
if (nonopt_start != -1 && nonopt_end == -1)
|
||||
nonopt_end = optind;
|
||||
|
||||
/*
|
||||
* If we have "-" do nothing, if "--" we are done.
|
||||
*/
|
||||
if (place[1] != '\0' && *++place == '-' && place[1] == '\0') {
|
||||
optind++;
|
||||
place = EMSG;
|
||||
/*
|
||||
* We found an option (--), so if we skipped
|
||||
* non-options, we have to permute.
|
||||
*/
|
||||
if (nonopt_end != -1) {
|
||||
permute_args(nonopt_start, nonopt_end,
|
||||
optind, nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check long options if:
|
||||
* 1) we were passed some
|
||||
* 2) the arg is not just "-"
|
||||
* 3) either the arg starts with -- we are getopt_long_only()
|
||||
*/
|
||||
if (long_options != NULL && place != nargv[optind] &&
|
||||
(*place == '-' || (flags & FLAG_LONGONLY))) {
|
||||
short_too = 0;
|
||||
if (*place == '-')
|
||||
place++; /* --foo long option */
|
||||
else if (*place != ':' && strchr(options, *place) != NULL)
|
||||
short_too = 1; /* could be short option too */
|
||||
|
||||
optchar = parse_long_options(nargv, options, long_options,
|
||||
idx, short_too);
|
||||
if (optchar != -1) {
|
||||
place = EMSG;
|
||||
return (optchar);
|
||||
}
|
||||
}
|
||||
|
||||
if ((optchar = (int)*place++) == (int)':' ||
|
||||
(optchar == (int)'-' && *place != '\0') ||
|
||||
(oli = (char*)strchr(options, optchar)) == NULL) {
|
||||
/*
|
||||
* If the user specified "-" and '-' isn't listed in
|
||||
* options, return -1 (non-option) as per POSIX.
|
||||
* Otherwise, it is an unknown option character (or ':').
|
||||
*/
|
||||
if (optchar == (int)'-' && *place == '\0')
|
||||
return (-1);
|
||||
if (!*place)
|
||||
++optind;
|
||||
if (PRINT_ERROR)
|
||||
warnx(illoptchar, optchar);
|
||||
optopt = optchar;
|
||||
return (BADCH);
|
||||
}
|
||||
if (long_options != NULL && optchar == 'W' && oli[1] == ';') {
|
||||
/* -W long-option */
|
||||
if (*place) /* no space */
|
||||
/* NOTHING */;
|
||||
else if (++optind >= nargc) { /* no arg */
|
||||
place = EMSG;
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargchar, optchar);
|
||||
optopt = optchar;
|
||||
return (BADARG);
|
||||
} else /* white space */
|
||||
place = nargv[optind];
|
||||
optchar = parse_long_options(nargv, options, long_options,
|
||||
idx, 0);
|
||||
place = EMSG;
|
||||
return (optchar);
|
||||
}
|
||||
if (*++oli != ':') { /* doesn't take argument */
|
||||
if (!*place)
|
||||
++optind;
|
||||
} else { /* takes (optional) argument */
|
||||
optarg = NULL;
|
||||
if (*place) /* no white space */
|
||||
optarg = place;
|
||||
else if (oli[1] != ':') { /* arg not optional */
|
||||
if (++optind >= nargc) { /* no arg */
|
||||
place = EMSG;
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargchar, optchar);
|
||||
optopt = optchar;
|
||||
return (BADARG);
|
||||
} else
|
||||
optarg = nargv[optind];
|
||||
}
|
||||
place = EMSG;
|
||||
++optind;
|
||||
}
|
||||
/* dump back option letter */
|
||||
return (optchar);
|
||||
}
|
||||
|
||||
/*
|
||||
* getopt_long --
|
||||
* Parse argc/argv argument vector.
|
||||
*/
|
||||
int
|
||||
getopt_long(int nargc, char * const *nargv, const char *options,
|
||||
const struct option *long_options, int *idx)
|
||||
{
|
||||
|
||||
return (getopt_internal(nargc, nargv, options, long_options, idx,
|
||||
FLAG_PERMUTE));
|
||||
}
|
||||
|
||||
/*
|
||||
* getopt_long_only --
|
||||
* Parse argc/argv argument vector.
|
||||
*/
|
||||
int
|
||||
getopt_long_only(int nargc, char * const *nargv, const char *options,
|
||||
const struct option *long_options, int *idx)
|
||||
{
|
||||
|
||||
return (getopt_internal(nargc, nargv, options, long_options, idx,
|
||||
FLAG_PERMUTE|FLAG_LONGONLY));
|
||||
}
|
||||
|
||||
//extern int getopt_long(int nargc, char * const *nargv, const char *options,
|
||||
// const struct option *long_options, int *idx);
|
||||
//extern int getopt_long_only(int nargc, char * const *nargv, const char *options,
|
||||
// const struct option *long_options, int *idx);
|
||||
/*
|
||||
* Previous MinGW implementation had...
|
||||
*/
|
||||
#ifndef HAVE_DECL_GETOPT
|
||||
/*
|
||||
* ...for the long form API only; keep this for compatibility.
|
||||
*/
|
||||
# define HAVE_DECL_GETOPT 1
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
|
|
@ -48,7 +48,6 @@ android {
|
|||
dependencies {
|
||||
implementation fileTree(dir: 'libs', include: ['*.jar'])
|
||||
|
||||
implementation 'com.android.support:appcompat-v7:27.1.1'
|
||||
testImplementation 'junit:junit:4.12'
|
||||
|
||||
androidTestImplementation 'com.android.support.test:runner:1.0.2'
|
||||
|
|
|
@ -6,6 +6,10 @@ PROJECT_VERSION ?= $(shell cat ../../VERSION | tr -d '\n')
|
|||
|
||||
include ../definitions.mk
|
||||
|
||||
ifeq ($(TARGET),host-win)
|
||||
LIBS := '$(shell cygpath -w $(subst .lib,,$(LIBS)))'
|
||||
endif
|
||||
|
||||
default: build
|
||||
|
||||
clean:
|
||||
|
@ -32,7 +36,7 @@ configure: deepspeech_wrap.cxx package.json
|
|||
$(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE)
|
||||
|
||||
build: configure deepspeech_wrap.cxx
|
||||
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS="$(LIBS)" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_ABI_TARGET) rebuild $(NODE_BUILD_VERBOSE)
|
||||
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_ABI_TARGET) rebuild $(NODE_BUILD_VERBOSE)
|
||||
|
||||
copy-deps: build
|
||||
$(call copy_missing_libs,lib/binding/*/*/*/deepspeech.node,lib/binding/*/*/)
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
"target_name": "deepspeech",
|
||||
"sources": [ "deepspeech_wrap.cxx" ],
|
||||
"libraries": [
|
||||
"${LIBS}"
|
||||
"$(LIBS)"
|
||||
],
|
||||
"include_dirs": [
|
||||
"../"
|
||||
|
|
|
@ -8,7 +8,7 @@ bindings-clean:
|
|||
|
||||
bindings-build:
|
||||
pip install --quiet $(PYTHON_PACKAGES) wheel==0.31.0 setuptools==39.1.0
|
||||
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME)
|
||||
PATH=$(TOOLCHAIN):$$PATH AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME)
|
||||
|
||||
MANIFEST.in: bindings-build
|
||||
> $@
|
||||
|
|
|
@ -39,11 +39,27 @@ class BuildExtFirst(build):
|
|||
('build_clib', build.has_c_libraries),
|
||||
('build_scripts', build.has_scripts)]
|
||||
|
||||
# Properly pass arguments for linking, setuptools will perform some checks
|
||||
def lib_dirs_split(a):
|
||||
if os.name == 'posix':
|
||||
return a.split('-L')[1:]
|
||||
|
||||
if os.name == 'nt':
|
||||
return []
|
||||
|
||||
def libs_split(a):
|
||||
if os.name == 'posix':
|
||||
return a.split('-l')[1:]
|
||||
|
||||
if os.name == 'nt':
|
||||
return a.split('.lib')[0:1]
|
||||
|
||||
ds_ext = Extension('deepspeech._impl',
|
||||
['impl.i'],
|
||||
include_dirs = [ numpy_include, '../' ],
|
||||
library_dirs = list(map(lambda x: x.strip(), os.getenv('MODEL_LDFLAGS', '').split('-L')[1:])),
|
||||
libraries = list(map(lambda x: x.strip(), os.getenv('MODEL_LIBS', '').split('-l')[1:])))
|
||||
library_dirs = list(map(lambda x: x.strip(), lib_dirs_split(os.getenv('MODEL_LDFLAGS', '')))),
|
||||
libraries = list(map(lambda x: x.strip(), libs_split(os.getenv('MODEL_LIBS', ''))))
|
||||
)
|
||||
|
||||
setup(name = project_name,
|
||||
description = 'A library for running inference on a DeepSpeech model',
|
||||
|
|
|
@ -23,3 +23,4 @@ build:
|
|||
tests_cmdline: ''
|
||||
convert_graphdef: ''
|
||||
benchmark_model_bin: ''
|
||||
tensorflow_git_desc: 'TensorFlow: v1.12.0-21-ge763555'
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
# Taskcluster
|
||||
|
||||
This directory contains files associated with Taskcluster -- a task execution framework for Mozilla's Continuous Integration system.
|
||||
|
||||
Please consult the [existing Taskcluster documentation](https://docs.taskcluster.net/docs).
|
|
@ -12,7 +12,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-arm64/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-arm64/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/android-build.sh arm64-v8a"
|
||||
package: "taskcluster/android-package.sh arm64-v8a"
|
||||
|
|
|
@ -12,7 +12,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-armv7/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-armv7/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/android-build.sh armeabi-v7a"
|
||||
package: "taskcluster/android-package.sh armeabi-v7a"
|
||||
|
|
|
@ -13,7 +13,7 @@ build:
|
|||
system_setup:
|
||||
>
|
||||
apt-get -qq -y install curl && ${swig.packages.install_script}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-armv7/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-armv7/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/android-apk-build.sh"
|
||||
package: "taskcluster/android-apk-package.sh"
|
||||
|
|
|
@ -12,7 +12,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.android-arm64/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.android-arm64/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/android-build.sh x86_64"
|
||||
package: "taskcluster/android-package.sh x86_64"
|
||||
|
|
|
@ -6,7 +6,7 @@ build:
|
|||
- "index.project.deepspeech.deepspeech.native_client.osx.${event.head.sha}"
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.osx/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.osx/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/host-build.sh"
|
||||
package: "taskcluster/package.sh"
|
||||
|
|
|
@ -6,7 +6,7 @@ build:
|
|||
- "index.project.deepspeech.deepspeech.native_client.osx-ctc.${event.head.sha}"
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.osx/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.osx/artifacts/public/home.tar.xz"
|
||||
maxRunTime: 14400
|
||||
scripts:
|
||||
build: 'taskcluster/decoder-build.sh'
|
||||
|
|
|
@ -14,7 +14,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/host-build.sh"
|
||||
package: "taskcluster/package.sh"
|
||||
|
|
|
@ -14,7 +14,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: 'taskcluster/decoder-build.sh'
|
||||
package: 'taskcluster/decoder-package.sh'
|
||||
|
|
|
@ -12,7 +12,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.gpu/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.gpu/artifacts/public/home.tar.xz"
|
||||
maxRunTime: 14400
|
||||
scripts:
|
||||
build: "taskcluster/cuda-build.sh"
|
||||
|
|
|
@ -4,7 +4,7 @@ build:
|
|||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm64"
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm64"
|
||||
- "index.project.deepspeech.deepspeech.native_client.arm64.${event.head.sha}"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.arm64/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.arm64/artifacts/public/home.tar.xz"
|
||||
## multistrap 2.2.0-ubuntu1 is broken in 14.04: https://bugs.launchpad.net/ubuntu/+source/multistrap/+bug/1313787
|
||||
system_setup:
|
||||
>
|
||||
|
|
|
@ -4,7 +4,7 @@ build:
|
|||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm"
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm"
|
||||
- "index.project.deepspeech.deepspeech.native_client.arm.${event.head.sha}"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.arm/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.arm/artifacts/public/home.tar.xz"
|
||||
## multistrap 2.2.0-ubuntu1 is broken in 14.04: https://bugs.launchpad.net/ubuntu/+source/multistrap/+bug/1313787
|
||||
system_setup:
|
||||
>
|
||||
|
|
|
@ -16,7 +16,7 @@ build:
|
|||
system_config:
|
||||
>
|
||||
${swig.patch_nodejs.linux}
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/home.tar.xz"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/node-build.sh"
|
||||
package: "taskcluster/node-package.sh"
|
||||
|
|
|
@ -11,6 +11,8 @@ build:
|
|||
- "android-arm64-cpu-opt"
|
||||
- "android-armv7-cpu-opt"
|
||||
- "android-java-opt"
|
||||
- "win-amd64-cpu-opt"
|
||||
- "win-amd64-gpu-opt"
|
||||
allowed:
|
||||
- "tag"
|
||||
ref_match: "refs/tags/"
|
||||
|
@ -39,8 +41,13 @@ build:
|
|||
- "linux-arm64-cpu-opt"
|
||||
- "android-arm64-cpu-opt"
|
||||
- "android-armv7-cpu-opt"
|
||||
- "win-amd64-cpu-opt"
|
||||
- "win-amd64-gpu-opt"
|
||||
java_aar:
|
||||
- "android-java-opt"
|
||||
nuget:
|
||||
- "win-amd64-cpu-opt"
|
||||
- "win-amd64-gpu-opt"
|
||||
metadata:
|
||||
name: "DeepSpeech GitHub Packages"
|
||||
description: "Trigger Uploading of DeepSpeech Packages to GitHub release page"
|
||||
|
|
|
@ -17,6 +17,7 @@ build:
|
|||
javascript: []
|
||||
java_aar:
|
||||
- "android-java-opt"
|
||||
nuget: []
|
||||
metadata:
|
||||
name: "DeepSpeech Android lib Packages"
|
||||
description: "Trigger Uploading of DeepSpeech Android lib to JCenter registry"
|
||||
|
|
|
@ -21,6 +21,7 @@ build:
|
|||
- "linux-amd64-gpu-opt"
|
||||
# CPU package with all archs
|
||||
- "node-package"
|
||||
nuget: []
|
||||
metadata:
|
||||
name: "DeepSpeech NPM Packages"
|
||||
description: "Trigger Uploading of DeepSpeech Packages to NPM registry"
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
build:
|
||||
template_file: simple-task.tyml
|
||||
dependencies:
|
||||
# Make sure builds are ready
|
||||
- "win-amd64-cpu-opt"
|
||||
- "win-amd64-gpu-opt"
|
||||
allowed:
|
||||
- "tag"
|
||||
ref_match: "refs/tags/"
|
||||
routes:
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
upload_targets:
|
||||
- "nuget"
|
||||
artifacts_deps:
|
||||
python: []
|
||||
cpp: []
|
||||
javascript: []
|
||||
java_aar: []
|
||||
nuget:
|
||||
- "win-amd64-cpu-opt"
|
||||
- "win-amd64-gpu-opt"
|
||||
metadata:
|
||||
name: "DeepSpeech NuGet Packages"
|
||||
description: "Trigger Uploading of DeepSpeech .Net Framework bindings to NuGet"
|
|
@ -25,6 +25,7 @@ build:
|
|||
- "linux-amd64-gpu-opt"
|
||||
- "linux-rpi3-cpu-opt"
|
||||
# - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
|
||||
nuget: []
|
||||
metadata:
|
||||
name: "DeepSpeech PyPi Packages"
|
||||
description: "Trigger Uploading of DeepSpeech Packages to PyPi"
|
||||
|
|
|
@ -44,6 +44,10 @@ then:
|
|||
$map: { $eval: build.artifacts_deps.cpp }
|
||||
each(b):
|
||||
$eval: as_slugid(b)
|
||||
nuget:
|
||||
$map: { $eval: build.artifacts_deps.nuget }
|
||||
each(b):
|
||||
$eval: as_slugid(b)
|
||||
|
||||
metadata:
|
||||
name: ${build.metadata.name}
|
||||
|
|
|
@ -38,7 +38,7 @@ then:
|
|||
DEEPSPEECH_ARTIFACTS_ROOT_ARM64: https://queue.taskcluster.net/v1/task/${android_arm64_build}/artifacts/public
|
||||
DEEPSPEECH_ARTIFACTS_ROOT_ARMV7: https://queue.taskcluster.net/v1/task/${android_armv7_build}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
|
|
|
@ -43,7 +43,7 @@ then:
|
|||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
PIP_EXTRA_INDEX_URL: "https://lissyx.github.io/deepspeech-python-wheels/"
|
||||
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Debian Stretch
|
||||
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
build:
|
||||
template_file: test-win-opt-base.tyml
|
||||
dependencies:
|
||||
- "win-amd64-cpu-opt"
|
||||
- "test-training_upstream-linux-amd64-py27mu-opt"
|
||||
args:
|
||||
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-cppwin-ds-tests.sh"
|
||||
metadata:
|
||||
name: "DeepSpeech Windows AMD64 CPU C++ tests"
|
||||
description: "Testing DeepSpeech C++ for Windows/AMD64, CPU only, optimized version"
|
|
@ -41,7 +41,7 @@ then:
|
|||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
|
||||
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
||||
command:
|
||||
- - "/bin/bash"
|
||||
|
|
|
@ -45,7 +45,7 @@ then:
|
|||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
|
||||
DECODER_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public
|
||||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
|
|
|
@ -4,7 +4,7 @@ build:
|
|||
- "test-training_upstream-linux-amd64-py27mu-opt"
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-lite_benchmark_model-ds-tests.sh"
|
||||
benchmark_model_bin: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/lite_benchmark_model"
|
||||
benchmark_model_bin: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/lite_benchmark_model"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 CPU TF Lite benchmark_model"
|
||||
description: "Testing DeepSpeech TF Lite benchmark_model for Linux/AMD64, CPU only, optimized version"
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
build:
|
||||
template_file: test-win-opt-base.tyml
|
||||
dependencies:
|
||||
- "win-amd64-cpu-opt"
|
||||
- "test-training_upstream-linux-amd64-py27mu-opt"
|
||||
args:
|
||||
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-netframework-ds-tests.sh"
|
||||
metadata:
|
||||
name: "DeepSpeech Windows AMD64 CPU .Net Framework tests"
|
||||
description: "Testing DeepSpeech .Net Framework for Windows/AMD64, CPU only, optimized version"
|
|
@ -43,7 +43,7 @@ then:
|
|||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple"
|
||||
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Raspbian Stretch / PiWheels
|
||||
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-14-g943a6c3"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
|
|
|
@ -7,7 +7,7 @@ build:
|
|||
apt-get -qq -y install ${python.packages_trusty.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-train-tests.sh 2.7.14:mu"
|
||||
convert_graphdef: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.943a6c332331c0ceeba981b51c24abfed2cd6ffa.cpu/artifacts/public/convert_graphdef_memmapped_format"
|
||||
convert_graphdef: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.cpu/artifacts/public/convert_graphdef_memmapped_format"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 CPU upstream training Py2.7 mu"
|
||||
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 using upstream TensorFlow Python 2.7 mu, CPU only, optimized version"
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
build:
|
||||
template_file: test-linux-opt-base.tyml
|
||||
dependencies:
|
||||
- "linux-amd64-ctc-opt"
|
||||
system_setup:
|
||||
>
|
||||
apt-get -qq -y install ${python.packages_trusty.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-train-tests.sh 3.4.8:m"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 CPU upstream training Py3.4"
|
||||
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 using upstream TensorFlow Python 3.4, CPU only, optimized version"
|
|
@ -0,0 +1,80 @@
|
|||
$if: '(event.event != "push") && (event.event != "tag")'
|
||||
then:
|
||||
taskId: ${taskcluster.taskId}
|
||||
provisionerId: ${taskcluster.docker.provisionerId}
|
||||
workerType: ${taskcluster.docker.workerTypeWin}
|
||||
taskGroupId: ${taskcluster.taskGroupId}
|
||||
schedulerId: ${taskcluster.schedulerId}
|
||||
dependencies:
|
||||
$map: { $eval: build.dependencies }
|
||||
each(b):
|
||||
$eval: as_slugid(b)
|
||||
created: { $fromNow: '0 sec' }
|
||||
deadline: { $fromNow: '1 day' }
|
||||
expires: { $fromNow: '7 days' }
|
||||
|
||||
extra:
|
||||
github:
|
||||
{ $eval: taskcluster.github_events.pull_request }
|
||||
|
||||
routes:
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
|
||||
scopes: [
|
||||
"queue:route:notify.irc-channel.*",
|
||||
]
|
||||
|
||||
payload:
|
||||
maxRunTime: { $eval: to_int(build.maxRunTime) }
|
||||
mounts:
|
||||
- file: msys2-base-x86_64.tar.xz
|
||||
content:
|
||||
sha256: 4e799b5c3efcf9efcb84923656b7bcff16f75a666911abd6620ea8e5e1e9870c
|
||||
url: >-
|
||||
https://sourceforge.net/projects/msys2/files/Base/x86_64/msys2-base-x86_64-20180531.tar.xz/download
|
||||
|
||||
env:
|
||||
$let:
|
||||
training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
|
||||
win_amd64_build: { $eval: as_slugid("win-amd64-cpu-opt") }
|
||||
node_package: { $eval: as_slugid("node-package") }
|
||||
in:
|
||||
DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${win_amd64_build}/artifacts/public
|
||||
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
TC_MSYS_VERSION: 'MSYS_NT-6.3'
|
||||
MSYS: 'winsymlinks:nativestrict'
|
||||
|
||||
command:
|
||||
- >-
|
||||
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
|
||||
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "exit"
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
|
||||
- .\msys64\usr\bin\bash.exe --login -cxe "
|
||||
export LC_ALL=C &&
|
||||
export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&
|
||||
export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" &&
|
||||
export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" &&
|
||||
export TASKCLUSTER_TMP_DIR="$TASKCLUSTER_TASK_DIR/tmp" &&
|
||||
export PIP_DEFAULT_TIMEOUT=60 &&
|
||||
(mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR &&
|
||||
env &&
|
||||
ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64 &&
|
||||
git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ &&
|
||||
cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} &&
|
||||
cd $TASKCLUSTER_TASK_DIR &&
|
||||
pacman --noconfirm -R bsdtar &&
|
||||
pacman --noconfirm -S tar vim &&
|
||||
/bin/bash ${build.args.tests_cmdline} ;
|
||||
cd $TASKCLUSTER_TASK_DIR/../ && rm -fr tc-workdir/ && exit $TASKCLUSTER_TASK_EXIT_CODE"
|
||||
|
||||
metadata:
|
||||
name: ${build.metadata.name}
|
||||
description: ${build.metadata.description}
|
||||
owner: ${event.head.user.email}
|
||||
source: ${event.head.repo.url}
|
|
@ -0,0 +1,17 @@
|
|||
build:
|
||||
template_file: win-opt-base.tyml
|
||||
routes:
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.win"
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.win"
|
||||
- "index.project.deepspeech.deepspeech.native_client.win.${event.head.sha}"
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.win/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/win-build.sh"
|
||||
package: "taskcluster/win-package.sh"
|
||||
nc_asset_name: "native_client.amd64.cpu.win.tar.xz"
|
||||
maxRunTime: 14400
|
||||
metadata:
|
||||
name: "DeepSpeech Windows AMD64 CPU"
|
||||
description: "Building DeepSpeech for Windows AMD64, CPU only, optimized version"
|
|
@ -0,0 +1,17 @@
|
|||
build:
|
||||
template_file: win-opt-base.tyml
|
||||
routes:
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.win-cuda"
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.win-cuda"
|
||||
- "index.project.deepspeech.deepspeech.native_client.win-cuda.${event.head.sha}"
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e76355516a0c417cfd3fa8a122405477fcd1af0d.win-cuda/artifacts/public/home.tar.xz"
|
||||
scripts:
|
||||
build: "taskcluster/win-build.sh --cuda"
|
||||
package: "taskcluster/win-package.sh"
|
||||
nc_asset_name: "native_client.amd64.gpu.win.tar.xz"
|
||||
maxRunTime: 14400
|
||||
metadata:
|
||||
name: "DeepSpeech Windows AMD64 CUDA"
|
||||
description: "Building DeepSpeech for Windows AMD64, CUDA-enabled, optimized version"
|
|
@ -0,0 +1,42 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
cuda=$1
|
||||
|
||||
source $(dirname "$0")/../tc-tests-utils.sh
|
||||
|
||||
source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
|
||||
|
||||
BAZEL_TARGETS="
|
||||
//native_client:libdeepspeech.so
|
||||
//native_client:generate_trie
|
||||
"
|
||||
|
||||
if [ "${cuda}" = "--cuda" ]; then
|
||||
BAZEL_ENV_FLAGS="TF_NEED_CUDA=1 ${TF_CUDA_FLAGS}"
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS}"
|
||||
PROJECT_NAME="DeepSpeech-GPU"
|
||||
else
|
||||
PROJECT_NAME="DeepSpeech"
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}"
|
||||
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
|
||||
fi
|
||||
|
||||
SYSTEM_TARGET=host-win
|
||||
|
||||
do_bazel_build
|
||||
|
||||
if [ "${cuda}" = "--cuda" ]; then
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-bin/native_client/liblibdeepspeech.so.ifso ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-bin/native_client/libdeepspeech.so.if.lib
|
||||
fi
|
||||
|
||||
export PATH=$PATH:$(cygpath ${ChocolateyInstall})/bin
|
||||
|
||||
do_deepspeech_binary_build
|
||||
|
||||
do_deepspeech_netframework_build
|
||||
|
||||
do_nuget_build "${PROJECT_NAME}"
|
||||
|
||||
shutdown_bazel
|
|
@ -0,0 +1,89 @@
|
|||
taskId: ${taskcluster.taskId}
|
||||
provisionerId: ${taskcluster.docker.provisionerId}
|
||||
workerType: ${taskcluster.docker.workerTypeWin}
|
||||
taskGroupId: ${taskcluster.taskGroupId}
|
||||
schedulerId: ${taskcluster.schedulerId}
|
||||
dependencies:
|
||||
$map: { $eval: build.dependencies }
|
||||
each(b):
|
||||
$eval: as_slugid(b)
|
||||
created: { $fromNow: '0 sec' }
|
||||
deadline: { $fromNow: '1 day' }
|
||||
expires:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then: { $fromNow: '6 months' }
|
||||
else: { $fromNow: '7 days' }
|
||||
|
||||
extra:
|
||||
nc_asset_name: { $eval: build.nc_asset_name }
|
||||
github:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then: { $eval: taskcluster.github_events.merge }
|
||||
else: { $eval: taskcluster.github_events.pull_request }
|
||||
|
||||
routes:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then:
|
||||
{ $eval: build.routes }
|
||||
|
||||
payload:
|
||||
maxRunTime: { $eval: to_int(build.maxRunTime) }
|
||||
mounts:
|
||||
- file: msys2-base-x86_64.tar.xz
|
||||
content:
|
||||
sha256: 4e799b5c3efcf9efcb84923656b7bcff16f75a666911abd6620ea8e5e1e9870c
|
||||
url: >-
|
||||
https://sourceforge.net/projects/msys2/files/Base/x86_64/msys2-base-x86_64-20180531.tar.xz/download
|
||||
|
||||
env:
|
||||
$let:
|
||||
training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
|
||||
in:
|
||||
TC_MSYS_VERSION: 'MSYS_NT-6.3'
|
||||
MSYS: 'winsymlinks:nativestrict'
|
||||
TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow}
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
|
||||
command:
|
||||
- >-
|
||||
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
|
||||
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "exit"
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
|
||||
- echo .\msys64\usr\bin\bash.exe --login -cxe "
|
||||
export LC_ALL=C &&
|
||||
export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&
|
||||
export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" &&
|
||||
export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" &&
|
||||
(mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR &&
|
||||
env &&
|
||||
ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64 &&
|
||||
(wget -O - $TENSORFLOW_BUILD_ARTIFACT | 7z x -txz -si -so | 7z x -o$TASKCLUSTER_TASK_DIR -aoa -ttar -si ) &&
|
||||
git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ &&
|
||||
cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} &&
|
||||
ln -s $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/native_client/ $TASKCLUSTER_TASK_DIR/DeepSpeech/tf/native_client &&
|
||||
cd $TASKCLUSTER_TASK_DIR &&
|
||||
pacman --noconfirm -R bsdtar &&
|
||||
pacman --noconfirm -S tar make &&
|
||||
$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} &&
|
||||
$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ;
|
||||
echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh &&
|
||||
cd $TASKCLUSTER_TASK_DIR/../ && rm -fr tc-workdir/ && exit $TASKCLUSTER_TASK_EXIT_CODE" | cmd
|
||||
/k ""C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat"" x64
|
||||
|
||||
- .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh &&
|
||||
exit $TASKCLUSTER_TASK_EXIT_CODE"
|
||||
|
||||
artifacts:
|
||||
- type: "directory"
|
||||
path: "public/"
|
||||
expires:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then: { $fromNow: '6 months' }
|
||||
else: { $fromNow: '7 days' }
|
||||
|
||||
metadata:
|
||||
name: ${build.metadata.name}
|
||||
description: ${build.metadata.description}
|
||||
owner: ${event.head.user.email}
|
||||
source: ${event.head.repo.url}
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
arm_flavor=$1
|
||||
|
||||
source $(dirname "$0")/../tc-tests-utils.sh
|
||||
|
||||
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
|
||||
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
|
||||
|
||||
package_native_client "native_client.tar.xz"
|
||||
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/examples/net_framework/CSharpExamples/*.nupkg ${TASKCLUSTER_ARTIFACTS}/
|
||||
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/examples/net_framework/CSharpExamples/DeepSpeechConsole/bin/x64/Release/DeepSpeechConsole.exe ${TASKCLUSTER_ARTIFACTS}/
|
|
@ -4,6 +4,7 @@ taskcluster:
|
|||
provisionerId: aws-provisioner-v1
|
||||
workerType: deepspeech-worker
|
||||
workerTypeKvm: deepspeech-kvm-worker
|
||||
workerTypeWin: deepspeech-win
|
||||
dockerrpi3:
|
||||
provisionerId: deepspeech-provisioner
|
||||
workerType: ds-rpi3
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/tc-tests-utils.sh
|
||||
|
||||
download_material "${TASKCLUSTER_TMP_DIR}/ds"
|
||||
|
||||
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||
|
||||
check_tensorflow_version
|
||||
|
||||
run_basic_inference_tests
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
cuda=$1
|
||||
|
||||
source $(dirname "$0")/tc-tests-utils.sh
|
||||
|
||||
if [ "${cuda}" = "--cuda" ]; then
|
||||
PROJECT_NAME="DeepSpeech-GPU"
|
||||
else
|
||||
PROJECT_NAME="DeepSpeech"
|
||||
fi
|
||||
|
||||
download_data
|
||||
|
||||
install_nuget "${PROJECT_NAME}"
|
||||
|
||||
run_netframework_inference_tests
|
|
@ -7,6 +7,11 @@ if [ "${OS}" = "Linux" ]; then
|
|||
export DS_ROOT_TASK=${HOME}
|
||||
fi;
|
||||
|
||||
if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
|
||||
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
|
||||
export PLATFORM_EXE_SUFFIX=.exe
|
||||
fi;
|
||||
|
||||
if [ "${OS}" = "Darwin" ]; then
|
||||
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
|
||||
export SWIG_LIB="$(find ${DS_ROOT_TASK}/homebrew/Cellar/swig/ -type f -name "swig.swg" | xargs dirname)"
|
||||
|
@ -33,6 +38,16 @@ export DS_VERSION="$(cat ${DS_DSDIR}/VERSION)"
|
|||
export ANDROID_SDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/SDK/
|
||||
export ANDROID_NDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/android-ndk-r18b/
|
||||
|
||||
TAR=${TAR:-"tar"}
|
||||
XZ=${XZ:-"pixz -9"}
|
||||
UNXZ=${UNXZ:-"pixz -d"}
|
||||
|
||||
if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
|
||||
TAR=/usr/bin/tar.exe
|
||||
XZ="xz -9 -T0 -c -"
|
||||
UNXZ="xz -9 -T0 -d"
|
||||
fi
|
||||
|
||||
model_source="${DEEPSPEECH_TEST_MODEL}"
|
||||
model_name="$(basename "${model_source}")"
|
||||
model_name_mmap="$(basename -s ".pb" "${model_source}").pbmm"
|
||||
|
@ -172,11 +187,21 @@ assert_correct_ldc93s1()
|
|||
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
|
||||
}
|
||||
|
||||
assert_working_ldc93s1()
|
||||
{
|
||||
assert_working_inference "$1" "she had your dark suit in greasy wash water all year"
|
||||
}
|
||||
|
||||
assert_correct_ldc93s1_lm()
|
||||
{
|
||||
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
|
||||
}
|
||||
|
||||
assert_working_ldc93s1_lm()
|
||||
{
|
||||
assert_working_inference "$1" "she had your dark suit in greasy wash water all year"
|
||||
}
|
||||
|
||||
assert_correct_multi_ldc93s1()
|
||||
{
|
||||
assert_shows_something "$1" "/LDC93S1.wav%she had your dark suit in greasy wash water all year%"
|
||||
|
@ -226,7 +251,19 @@ run_tflite_basic_inference_tests()
|
|||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
}
|
||||
|
||||
run_all_inference_tests()
|
||||
run_netframework_inference_tests()
|
||||
{
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
|
||||
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
|
||||
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}"
|
||||
}
|
||||
|
||||
run_basic_inference_tests()
|
||||
{
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
|
@ -236,6 +273,11 @@ run_all_inference_tests()
|
|||
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
|
||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}"
|
||||
}
|
||||
|
||||
run_all_inference_tests()
|
||||
{
|
||||
run_basic_inference_tests
|
||||
|
||||
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}"
|
||||
|
@ -299,7 +341,7 @@ generic_download_tarxz()
|
|||
|
||||
mkdir -p ${target_dir} || true
|
||||
|
||||
wget ${url} -O - | pixz -d | tar -C ${target_dir} -xf -
|
||||
wget ${url} -O - | ${UNXZ} | ${TAR} -C ${target_dir} -xf -
|
||||
}
|
||||
|
||||
download_native_client_files()
|
||||
|
@ -307,6 +349,45 @@ download_native_client_files()
|
|||
generic_download_tarxz "$1" "${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz"
|
||||
}
|
||||
|
||||
install_nuget()
|
||||
{
|
||||
PROJECT_NAME=$1
|
||||
if [ -z "${PROJECT_NAME}" ]; then
|
||||
exit "Please call with a valid PROJECT_NAME"
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
nuget="${PROJECT_NAME}.${DS_VERSION}.nupkg"
|
||||
|
||||
export PATH=$PATH:$(cygpath ${ChocolateyInstall})/bin
|
||||
|
||||
mkdir -p "${TASKCLUSTER_TMP_DIR}/repo/"
|
||||
mkdir -p "${TASKCLUSTER_TMP_DIR}/ds/"
|
||||
|
||||
wget -O - "${DEEPSPEECH_ARTIFACTS_ROOT}/${nuget}" | gunzip > "${TASKCLUSTER_TMP_DIR}/${PROJECT_NAME}.${DS_VERSION}.nupkg"
|
||||
wget -O - "${DEEPSPEECH_ARTIFACTS_ROOT}/DeepSpeechConsole.exe" | gunzip > "${TASKCLUSTER_TMP_DIR}/ds/DeepSpeechConsole.exe"
|
||||
|
||||
nuget sources add -Name repo -Source $(cygpath -w "${TASKCLUSTER_TMP_DIR}/repo/")
|
||||
|
||||
cd "${TASKCLUSTER_TMP_DIR}"
|
||||
nuget add $(cygpath -w "${TASKCLUSTER_TMP_DIR}/${nuget}") -source repo
|
||||
|
||||
cd "${TASKCLUSTER_TMP_DIR}/ds/"
|
||||
nuget list -Source repo -Prerelease
|
||||
nuget install ${PROJECT_NAME} -Source repo -Prerelease
|
||||
|
||||
ls -halR "${PROJECT_NAME}.${DS_VERSION}"
|
||||
|
||||
nuget install NAudio
|
||||
cp NAudio*/lib/net35/NAudio.dll ${TASKCLUSTER_TMP_DIR}/ds/
|
||||
cp ${PROJECT_NAME}.${DS_VERSION}/build/libdeepspeech.so ${TASKCLUSTER_TMP_DIR}/ds/
|
||||
cp ${PROJECT_NAME}.${DS_VERSION}/lib/net46/DeepSpeechClient.dll ${TASKCLUSTER_TMP_DIR}/ds/
|
||||
|
||||
ls -hal ${TASKCLUSTER_TMP_DIR}/ds/
|
||||
|
||||
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||
}
|
||||
|
||||
download_data()
|
||||
{
|
||||
wget -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"
|
||||
|
@ -390,6 +471,8 @@ is_patched_bazel()
|
|||
{
|
||||
bazel_version=$(bazel version | grep 'Build label:' | cut -d':' -f2)
|
||||
|
||||
bazel shutdown
|
||||
|
||||
if [ -z "${bazel_version}" ]; then
|
||||
return 0;
|
||||
else
|
||||
|
@ -473,6 +556,12 @@ do_bazel_build()
|
|||
verify_bazel_rebuild "${DS_ROOT_TASK}/DeepSpeech/tf/bazel_monolithic.log"
|
||||
}
|
||||
|
||||
shutdown_bazel()
|
||||
{
|
||||
cd ${DS_ROOT_TASK}/DeepSpeech/tf
|
||||
bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown
|
||||
}
|
||||
|
||||
do_bazel_shared_build()
|
||||
{
|
||||
cd ${DS_ROOT_TASK}/DeepSpeech/tf
|
||||
|
@ -491,7 +580,7 @@ do_deepspeech_binary_build()
|
|||
EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" \
|
||||
EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" \
|
||||
EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \
|
||||
deepspeech
|
||||
deepspeech${PLATFORM_EXE_SUFFIX}
|
||||
}
|
||||
|
||||
do_deepspeech_ndk_build()
|
||||
|
@ -509,6 +598,83 @@ do_deepspeech_ndk_build()
|
|||
TARGET_ARCH_ABI=${arch_abi}
|
||||
}
|
||||
|
||||
do_deepspeech_netframework_build()
|
||||
{
|
||||
cd ${DS_DSDIR}/examples/net_framework/CSharpExamples
|
||||
|
||||
# Setup dependencies
|
||||
nuget install DeepSpeechConsole/packages.config -OutputDirectory packages/
|
||||
nuget install DeepSpeechWPF/packages.config -OutputDirectory packages/
|
||||
|
||||
MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe')"
|
||||
|
||||
# We need MSYS2_ARG_CONV_EXCL='/' otherwise the '/' of CLI parameters gets mangled and disappears
|
||||
# We build the .NET Client for .NET Framework v4.5,v4.6,v4.7
|
||||
|
||||
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
|
||||
DeepSpeechClient/DeepSpeechClient.csproj \
|
||||
/p:Configuration=Release \
|
||||
/p:Platform=x64 \
|
||||
/p:TargetFrameworkVersion="v4.5" \
|
||||
/p:OutputPath=bin/nuget/x64/v4.5
|
||||
|
||||
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
|
||||
DeepSpeechClient/DeepSpeechClient.csproj \
|
||||
/p:Configuration=Release \
|
||||
/p:Platform=x64 \
|
||||
/p:TargetFrameworkVersion="v4.6" \
|
||||
/p:OutputPath=bin/nuget/x64/v4.6
|
||||
|
||||
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
|
||||
DeepSpeechClient/DeepSpeechClient.csproj \
|
||||
/p:Configuration=Release \
|
||||
/p:Platform=x64 \
|
||||
/p:TargetFrameworkVersion="v4.7" \
|
||||
/p:OutputPath=bin/nuget/x64/v4.7
|
||||
|
||||
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
|
||||
DeepSpeechConsole/DeepSpeechConsole.csproj \
|
||||
/p:Configuration=Release \
|
||||
/p:Platform=x64
|
||||
|
||||
MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \
|
||||
DeepSpeechWPF/DeepSpeech.WPF.csproj \
|
||||
/p:Configuration=Release \
|
||||
/p:Platform=x64
|
||||
}
|
||||
|
||||
do_nuget_build()
|
||||
{
|
||||
PROJECT_NAME=$1
|
||||
if [ -z "${PROJECT_NAME}" ]; then
|
||||
exit "Please call with a valid PROJECT_NAME"
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
cd ${DS_DSDIR}/examples/net_framework/CSharpExamples
|
||||
|
||||
cp ${DS_TFDIR}/bazel-bin/native_client/libdeepspeech.so nupkg/build
|
||||
|
||||
# We copy the generated clients for .NET into the Nuget framework dirs
|
||||
|
||||
mkdir -p nupkg/lib/net45/
|
||||
cp DeepSpeechClient/bin/nuget/x64/v4.5/DeepSpeechClient.dll nupkg/lib/net45/
|
||||
|
||||
mkdir -p nupkg/lib/net46/
|
||||
cp DeepSpeechClient/bin/nuget/x64/v4.6/DeepSpeechClient.dll nupkg/lib/net46/
|
||||
|
||||
mkdir -p nupkg/lib/net47/
|
||||
cp DeepSpeechClient/bin/nuget/x64/v4.7/DeepSpeechClient.dll nupkg/lib/net47/
|
||||
|
||||
PROJECT_VERSION=$(shell cat ../../../VERSION | tr -d '\n' | tr -d '\r')
|
||||
sed \
|
||||
-e "s/\$NUPKG_ID/${PROJECT_NAME}/" \
|
||||
-e "s/\$NUPKG_VERSION/${PROJECT_VERSION}/" \
|
||||
nupkg/deepspeech.nuspec.in > nupkg/deepspeech.nuspec && cat nupkg/deepspeech.nuspec
|
||||
|
||||
nuget pack nupkg/deepspeech.nuspec
|
||||
}
|
||||
|
||||
# Hack to extract Ubuntu's 16.04 libssl 1.0.2 packages and use them during the
|
||||
# local build of Python.
|
||||
#
|
||||
|
@ -533,8 +699,8 @@ maybe_ssl102_py37()
|
|||
|
||||
mkdir -p ${PY37_OPENSSL_DIR}
|
||||
wget -P ${TASKCLUSTER_TMP_DIR} \
|
||||
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.14_amd64.deb \
|
||||
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.14_amd64.deb
|
||||
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.15_amd64.deb \
|
||||
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.15_amd64.deb
|
||||
|
||||
for deb in ${TASKCLUSTER_TMP_DIR}/libssl*.deb; do
|
||||
dpkg -x ${deb} ${PY37_OPENSSL_DIR}
|
||||
|
@ -778,13 +944,13 @@ package_native_client()
|
|||
echo "Please specify artifact name."
|
||||
fi;
|
||||
|
||||
tar -cf - \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
|
||||
${TAR} -cf - \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie${PLATFORM_EXE_SUFFIX} \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
|
||||
-C ${deepspeech_dir}/ LICENSE \
|
||||
-C ${deepspeech_dir}/native_client/ deepspeech \
|
||||
-C ${deepspeech_dir}/native_client/ deepspeech${PLATFORM_EXE_SUFFIX} \
|
||||
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
|
||||
| pixz -9 > "${artifacts_dir}/${artifact_name}"
|
||||
| ${XZ} > "${artifacts_dir}/${artifact_name}"
|
||||
}
|
||||
|
||||
package_native_client_ndk()
|
||||
|
|
|
@ -37,6 +37,7 @@ for inFile in (inFiles):
|
|||
with open(inFile, "r") as csvFile:
|
||||
reader = csv.reader(csvFile)
|
||||
try:
|
||||
next(reader, None) # skip the file header (i.e. "transcript")
|
||||
for row in reader:
|
||||
allText |= set(str(row[2]))
|
||||
except IndexError as ie:
|
||||
|
|
|
@ -133,6 +133,7 @@ def validate_label(label):
|
|||
label = label.replace(".", "")
|
||||
label = label.replace(",", "")
|
||||
label = label.replace("?", "")
|
||||
label = label.replace("\"", "")
|
||||
label = label.strip()
|
||||
|
||||
return label.lower()
|
||||
|
|
Загрузка…
Ссылка в новой задаче