v0.6.0 examples

2019-12-04 16:38:56 +01:00 · 2019-12-04 16:38:56 +01:00 · 80cafe6bfb
--- a/ffmpeg_vad_streaming/README.MD
+++ b/ffmpeg_vad_streaming/README.MD
@ -0,0 +1,62 @@
+# FFmpeg VAD Streaming
+
+Streaming inference from arbitrary source (FFmpeg input) to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Node.js.
+
+This example was successfully tested with a mobile phone streaming a live feed to a RTMP server (nginx-rtmp), which then could be used by this script for near real time speech recognition.
+
+## Installation
+
+```bash
+npm install
+```
+
+Moreover FFmpeg must be installed:
+
+```bash
+sudo apt-get install ffmpeg
+```
+
+## Usage
+
+Here is an example for a local audio file:
+```bash
+node ./index.js --audio <AUDIO_FILE> \
+                --model $HOME/models/output_graph.pbmm \
+```
+
+Here is an example for a remote RTMP-Stream:
+```bash
+node ./index.js  --audio rtmp://<IP>:1935/live/teststream \
+                 --model $HOME/models/output_graph.pbmm \
+```
+
+## Examples
+Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)).
+```bash
+node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
+                --lm $HOME/models/lm.binary \
+                --trie $HOME/models/trie \
+                --model $HOME/models/output_graph.pbmm \
+```
+```bash
+node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
+                --lm $HOME/models/lm.binary \
+                --trie $HOME/models/trie \
+                --model $HOME/models/output_graph.pbmm \
+```
+```bash
+node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
+                --lm $HOME/models/lm.binary \
+                --trie $HOME/models/trie \
+                --model $HOME/models/output_graph.pbmm \
+```
+Real time streaming inference in combination with a RTMP server.
+```bash
+node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
+                --lm $HOME/models/lm.binary \
+                --trie $HOME/models/trie \
+                --model $HOME/models/output_graph.pbmm \
+```
+
+## Notes
+To get the best result mapped on to your own scenario, it might be helpful to adjust the parameters `VAD_MODE` and `DEBUNCE_TIME`.
--- a/ffmpeg_vad_streaming/index.js
+++ b/ffmpeg_vad_streaming/index.js
@ -0,0 +1,123 @@
+#!/usr/bin/env node
+
+const VAD = require("node-vad");
+const Ds = require('deepspeech');
+const argparse = require('argparse');
+const util = require('util');
+const { spawn } = require('child_process');
+
+// These constants control the beam search decoder
+
+// Beam width used in the CTC decoder when building candidate transcriptions
+const BEAM_WIDTH = 500;
+
+// The alpha hyperparameter of the CTC decoder. Language Model weight
+const LM_ALPHA = 0.75;
+
+// The beta hyperparameter of the CTC decoder. Word insertion bonus.
+const LM_BETA = 1.85;
+
+let VersionAction = function VersionAction(options) {
+	options = options || {};
+	options.nargs = 0;
+	argparse.Action.call(this, options);
+};
+
+util.inherits(VersionAction, argparse.Action);
+
+VersionAction.prototype.call = function(parser) {
+	Ds.printVersions();
+	process.exit(0);
+};
+
+let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
+parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
+parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
+parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
+parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
+parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'});
+let args = parser.parseArgs();
+
+function totalTime(hrtimeValue) {
+	return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
+}
+
+console.error('Loading model from file %s', args['model']);
+const model_load_start = process.hrtime();
+let model = new Ds.Model(args['model'], BEAM_WIDTH);
+const model_load_end = process.hrtime(model_load_start);
+console.error('Loaded model in %ds.', totalTime(model_load_end));
+
+if (args['lm'] && args['trie']) {
+	console.error('Loading language model from files %s %s', args['lm'], args['trie']);
+	const lm_load_start = process.hrtime();
+	model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
+	const lm_load_end = process.hrtime(lm_load_start);
+	console.error('Loaded language model in %ds.', totalTime(lm_load_end));
+}
+
+// Default is 16kHz
+const AUDIO_SAMPLE_RATE = 16000;
+
+// Defines different thresholds for voice detection
+// NORMAL: Suitable for high bitrate, low-noise data. May classify noise as voice, too.
+// LOW_BITRATE: Detection mode optimised for low-bitrate audio.
+// AGGRESSIVE: Detection mode best suited for somewhat noisy, lower quality audio.
+// VERY_AGGRESSIVE: Detection mode with lowest miss-rate. Works well for most inputs.
+const VAD_MODE = VAD.Mode.NORMAL;
+// const VAD_MODE = VAD.Mode.LOW_BITRATE;
+// const VAD_MODE = VAD.Mode.AGGRESSIVE;
+// const VAD_MODE = VAD.Mode.VERY_AGGRESSIVE;
+
+// Time in milliseconds for debouncing speech active state
+const DEBOUNCE_TIME = 20;
+
+// Create voice activity stream
+const VAD_STREAM = VAD.createStream({
+	mode: VAD_MODE,
+	audioFrequency: AUDIO_SAMPLE_RATE,
+	debounceTime: DEBOUNCE_TIME
+});
+
+// Spawn ffmpeg process
+const ffmpeg = spawn('ffmpeg', [
+	'-hide_banner',
+	'-nostats',
+	'-loglevel', 'fatal',
+	'-i', args['audio'],
+	'-vn',
+	'-acodec', 'pcm_s16le',
+	'-ac', 1,
+	'-ar', AUDIO_SAMPLE_RATE,
+	'-f', 's16le',
+	'pipe:'
+]);
+
+let audioLength = 0;
+let sctx = model.createStream();
+
+function finishStream() {
+	const model_load_start = process.hrtime();
+	console.error('Running inference.');
+	console.log('Transcription: ', model.finishStream(sctx));
+	const model_load_end = process.hrtime(model_load_start);
+	console.error('Inference took %ds for %ds audio file.', totalTime(model_load_end), audioLength.toPrecision(4));
+	audioLength = 0;
+}
+
+function intermediateDecode() {
+	finishStream();
+	sctx = model.createStream();
+}
+
+function feedAudioContent(chunk) {
+	audioLength += (chunk.length / 2) * ( 1 / AUDIO_SAMPLE_RATE);
+	model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
+}
+
+function processVad(data) {
+	if (data.speech.start||data.speech.state) feedAudioContent(data.audioData)
+	else if (data.speech.end) { feedAudioContent(data.audioData); intermediateDecode() }
+}
+
+ffmpeg.stdout.pipe(VAD_STREAM).on('data', processVad);
--- a/ffmpeg_vad_streaming/package.json
+++ b/ffmpeg_vad_streaming/package.json
@ -0,0 +1,16 @@
+{
+  "name": "ffmpeg-vad-streaming",
+  "version": "1.0.0",
+  "description": "Streaming inference from arbitrary source with VAD and FFmpeg",
+  "main": "index.js",
+  "scripts": {
+    "start": "node ./index.js"
+  },
+  "dependencies": {
+    "argparse": "^1.0.10",
+    "deepspeech": "0.6.0",
+    "node-vad": "^1.1.1",
+    "util": "^0.11.1"
+  },
+  "license" : "MIT"
+}
--- a/ffmpeg_vad_streaming/test.sh
+++ b/ffmpeg_vad_streaming/test.sh
@ -0,0 +1,27 @@
+#!/bin/bash
+
+set -xe
+
+THIS=$(dirname "$0")
+
+pushd ${THIS}
+  source ../tests.sh
+
+  npm install $(get_npm_package_url)
+  npm install
+
+  node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
+                  --lm $HOME/DeepSpeech/models/lm.binary \
+                  --trie $HOME/DeepSpeech/models/trie \
+                  --model $HOME/DeepSpeech/models/output_graph.pbmm
+
+  node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
+                  --lm $HOME/DeepSpeech/models/lm.binary \
+                  --trie $HOME/DeepSpeech/models/trie \
+                  --model $HOME/DeepSpeech/models/output_graph.pbmm
+
+  node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
+                  --lm $HOME/DeepSpeech/models/lm.binary \
+                  --trie $HOME/DeepSpeech/models/trie \
+                  --model $HOME/DeepSpeech/models/output_graph.pbmm
+popd
--- a/mic_vad_streaming/README.rst
+++ b/mic_vad_streaming/README.rst
@ -0,0 +1,69 @@
+
+Microphone VAD Streaming
+========================
+
+Stream from microphone to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Python. Also useful for quick, real-time testing of models and decoding parameters.
+
+Installation
+------------
+
+.. code-block:: bash
+
+   pip install -r requirements.txt
+
+Uses portaudio for microphone access, so on Linux, you may need to install its header files to compile the ``pyaudio`` package:
+
+.. code-block:: bash
+
+   sudo apt install portaudio19-dev
+
+Installation on MacOS may fail due to portaudio, use brew to install it:
+
+.. code-block:: bash
+
+   brew install portaudio
+
+Usage
+-----
+
+.. code-block::
+
+   usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner]
+                               [-w SAVEWAV] -m MODEL [-l LM]
+                               [-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT]
+                               [-la LM_ALPHA] [-lb LM_BETA]
+                               [-bw BEAM_WIDTH]
+
+   Stream from microphone to DeepSpeech using VAD
+
+   optional arguments:
+     -h, --help            show this help message and exit
+     -v VAD_AGGRESSIVENESS, --vad_aggressiveness VAD_AGGRESSIVENESS
+                           Set aggressiveness of VAD: an integer between 0 and 3,
+                           0 being the least aggressive about filtering out non-
+                           speech, 3 the most aggressive. Default: 3
+     --nospinner           Disable spinner
+     -w SAVEWAV, --savewav SAVEWAV
+                           Save .wav files of utterences to given directory
+     -m MODEL, --model MODEL
+                           Path to the model (protocol buffer binary file, or
+                           entire directory containing all standard-named files
+                           for model)
+     -l LM, --lm LM        Path to the language model binary file. Default:
+                           lm.binary
+     -t TRIE, --trie TRIE  Path to the language model trie file created with
+                           native_client/generate_trie. Default: trie
+     -nf N_FEATURES, --n_features N_FEATURES
+                           Number of MFCC features to use. Default: 26
+     -nc N_CONTEXT, --n_context N_CONTEXT
+                           Size of the context window used for producing
+                           timesteps in the input vector. Default: 9
+     -la LM_ALPHA, --lm_alpha LM_ALPHA
+                           The alpha hyperparameter of the CTC decoder. Language
+                           Model weight. Default: 0.75
+     -lb LM_BETA, --lm_beta LM_BETA
+                           The beta hyperparameter of the CTC decoder. Word insertion
+                           bonus. Default: 1.85
+     -bw BEAM_WIDTH, --beam_width BEAM_WIDTH
+                           Beam width used in the CTC decoder when building
+                           candidate transcriptions. Default: 500
--- a/mic_vad_streaming/mic_vad_streaming.py
+++ b/mic_vad_streaming/mic_vad_streaming.py
@ -0,0 +1,237 @@
+import time, logging
+from datetime import datetime
+import threading, collections, queue, os, os.path
+import deepspeech
+import numpy as np
+import pyaudio
+import wave
+import webrtcvad
+from halo import Halo
+from scipy import signal
+
+logging.basicConfig(level=20)
+
+class Audio(object):
+    """Streams raw audio from microphone. Data is received in a separate thread, and stored in a buffer, to be read from."""
+
+    FORMAT = pyaudio.paInt16
+    # Network/VAD rate-space
+    RATE_PROCESS = 16000
+    CHANNELS = 1
+    BLOCKS_PER_SECOND = 50
+
+    def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS, file=None):
+        def proxy_callback(in_data, frame_count, time_info, status):
+            #pylint: disable=unused-argument
+            if self.chunk is not None:
+                in_data = self.wf.readframes(self.chunk)
+            callback(in_data)
+            return (None, pyaudio.paContinue)
+        if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data)
+        self.buffer_queue = queue.Queue()
+        self.device = device
+        self.input_rate = input_rate
+        self.sample_rate = self.RATE_PROCESS
+        self.block_size = int(self.RATE_PROCESS / float(self.BLOCKS_PER_SECOND))
+        self.block_size_input = int(self.input_rate / float(self.BLOCKS_PER_SECOND))
+        self.pa = pyaudio.PyAudio()
+
+        kwargs = {
+            'format': self.FORMAT,
+            'channels': self.CHANNELS,
+            'rate': self.input_rate,
+            'input': True,
+            'frames_per_buffer': self.block_size_input,
+            'stream_callback': proxy_callback,
+        }
+
+        self.chunk = None
+        # if not default device
+        if self.device:
+            kwargs['input_device_index'] = self.device
+        elif file is not None:
+            self.chunk = 320
+            self.wf = wave.open(file, 'rb')
+
+        self.stream = self.pa.open(**kwargs)
+        self.stream.start_stream()
+
+    def resample(self, data, input_rate):
+        """
+        Microphone may not support our native processing sampling rate, so
+        resample from input_rate to RATE_PROCESS here for webrtcvad and
+        deepspeech
+
+        Args:
+            data (binary): Input audio stream
+            input_rate (int): Input audio rate to resample from
+        """
+        data16 = np.fromstring(string=data, dtype=np.int16)
+        resample_size = int(len(data16) / self.input_rate * self.RATE_PROCESS)
+        resample = signal.resample(data16, resample_size)
+        resample16 = np.array(resample, dtype=np.int16)
+        return resample16.tostring()
+
+    def read_resampled(self):
+        """Return a block of audio data resampled to 16000hz, blocking if necessary."""
+        return self.resample(data=self.buffer_queue.get(),
+                             input_rate=self.input_rate)
+
+    def read(self):
+        """Return a block of audio data, blocking if necessary."""
+        return self.buffer_queue.get()
+
+    def destroy(self):
+        self.stream.stop_stream()
+        self.stream.close()
+        self.pa.terminate()
+
+    frame_duration_ms = property(lambda self: 1000 * self.block_size // self.sample_rate)
+
+    def write_wav(self, filename, data):
+        logging.info("write wav %s", filename)
+        wf = wave.open(filename, 'wb')
+        wf.setnchannels(self.CHANNELS)
+        # wf.setsampwidth(self.pa.get_sample_size(FORMAT))
+        assert self.FORMAT == pyaudio.paInt16
+        wf.setsampwidth(2)
+        wf.setframerate(self.sample_rate)
+        wf.writeframes(data)
+        wf.close()
+
+
+class VADAudio(Audio):
+    """Filter & segment audio with voice activity detection."""
+
+    def __init__(self, aggressiveness=3, device=None, input_rate=None, file=None):
+        super().__init__(device=device, input_rate=input_rate, file=file)
+        self.vad = webrtcvad.Vad(aggressiveness)
+
+    def frame_generator(self):
+        """Generator that yields all audio frames from microphone."""
+        if self.input_rate == self.RATE_PROCESS:
+            while True:
+                yield self.read()
+        else:
+            while True:
+                yield self.read_resampled()
+
+    def vad_collector(self, padding_ms=300, ratio=0.75, frames=None):
+        """Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None.
+            Determines voice activity by ratio of frames in padding_ms. Uses a buffer to include padding_ms prior to being triggered.
+            Example: (frame, ..., frame, None, frame, ..., frame, None, ...)
+                      |---utterence---|        |---utterence---|
+        """
+        if frames is None: frames = self.frame_generator()
+        num_padding_frames = padding_ms // self.frame_duration_ms
+        ring_buffer = collections.deque(maxlen=num_padding_frames)
+        triggered = False
+
+        for frame in frames:
+            if len(frame) < 640:
+                return
+
+            is_speech = self.vad.is_speech(frame, self.sample_rate)
+
+            if not triggered:
+                ring_buffer.append((frame, is_speech))
+                num_voiced = len([f for f, speech in ring_buffer if speech])
+                if num_voiced > ratio * ring_buffer.maxlen:
+                    triggered = True
+                    for f, s in ring_buffer:
+                        yield f
+                    ring_buffer.clear()
+
+            else:
+                yield frame
+                ring_buffer.append((frame, is_speech))
+                num_unvoiced = len([f for f, speech in ring_buffer if not speech])
+                if num_unvoiced > ratio * ring_buffer.maxlen:
+                    triggered = False
+                    yield None
+                    ring_buffer.clear()
+
+def main(ARGS):
+    # Load DeepSpeech model
+    if os.path.isdir(ARGS.model):
+        model_dir = ARGS.model
+        ARGS.model = os.path.join(model_dir, 'output_graph.pb')
+        ARGS.lm = os.path.join(model_dir, ARGS.lm)
+        ARGS.trie = os.path.join(model_dir, ARGS.trie)
+
+    print('Initializing model...')
+    logging.info("ARGS.model: %s", ARGS.model)
+    model = deepspeech.Model(ARGS.model, ARGS.beam_width)
+    if ARGS.lm and ARGS.trie:
+        logging.info("ARGS.lm: %s", ARGS.lm)
+        logging.info("ARGS.trie: %s", ARGS.trie)
+        model.enableDecoderWithLM(ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta)
+
+    # Start audio with VAD
+    vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
+                         device=ARGS.device,
+                         input_rate=ARGS.rate,
+                         file=ARGS.file)
+    print("Listening (ctrl-C to exit)...")
+    frames = vad_audio.vad_collector()
+
+    # Stream from microphone to DeepSpeech using VAD
+    spinner = None
+    if not ARGS.nospinner:
+        spinner = Halo(spinner='line')
+    stream_context = model.createStream()
+    wav_data = bytearray()
+    for frame in frames:
+        if frame is not None:
+            if spinner: spinner.start()
+            logging.debug("streaming frame")
+            model.feedAudioContent(stream_context, np.frombuffer(frame, np.int16))
+            if ARGS.savewav: wav_data.extend(frame)
+        else:
+            if spinner: spinner.stop()
+            logging.debug("end utterence")
+            if ARGS.savewav:
+                vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data)
+                wav_data = bytearray()
+            text = model.finishStream(stream_context)
+            print("Recognized: %s" % text)
+            stream_context = model.createStream()
+
+if __name__ == '__main__':
+    BEAM_WIDTH = 500
+    DEFAULT_SAMPLE_RATE = 16000
+    LM_ALPHA = 0.75
+    LM_BETA = 1.85
+
+    import argparse
+    parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD")
+
+    parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3,
+                        help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3")
+    parser.add_argument('--nospinner', action='store_true',
+                        help="Disable spinner")
+    parser.add_argument('-w', '--savewav',
+                        help="Save .wav files of utterences to given directory")
+    parser.add_argument('-f', '--file',
+                        help="Read from .wav file instead of microphone")
+
+    parser.add_argument('-m', '--model', required=True,
+                        help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
+    parser.add_argument('-l', '--lm', default='lm.binary',
+                        help="Path to the language model binary file. Default: lm.binary")
+    parser.add_argument('-t', '--trie', default='trie',
+                        help="Path to the language model trie file created with native_client/generate_trie. Default: trie")
+    parser.add_argument('-d', '--device', type=int, default=None,
+                        help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device().")
+    parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
+                        help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
+    parser.add_argument('-la', '--lm_alpha', type=float, default=LM_ALPHA,
+                        help=f"The alpha hyperparameter of the CTC decoder. Language Model weight. Default: {LM_ALPHA}")
+    parser.add_argument('-lb', '--lm_beta', type=float, default=LM_BETA,
+                        help=f"The beta hyperparameter of the CTC decoder. Word insertion bonus. Default: {LM_BETA}")
+    parser.add_argument('-bw', '--beam_width', type=int, default=BEAM_WIDTH,
+                        help=f"Beam width used in the CTC decoder when building candidate transcriptions. Default: {BEAM_WIDTH}")
+
+    ARGS = parser.parse_args()
+    if ARGS.savewav: os.makedirs(ARGS.savewav, exist_ok=True)
+    main(ARGS)
--- a/mic_vad_streaming/requirements.txt
+++ b/mic_vad_streaming/requirements.txt
@ -0,0 +1,6 @@
+deepspeech==0.6.0
+pyaudio~=0.2.11
+webrtcvad~=2.0.10
+halo~=0.0.18
+numpy>=1.15.1
+scipy>=1.1.0
--- a/mic_vad_streaming/test.sh
+++ b/mic_vad_streaming/test.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+set -xe
+
+THIS=$(dirname "$0")
+
+pushd ${THIS}
+  source ../tests.sh
+
+  pip install --user $(get_python_wheel_url "$1")
+  pip install --user -r requirements.txt
+
+  pulseaudio &
+
+  python mic_vad_streaming.py \
+	  --model $HOME/DeepSpeech/models/output_graph.pbmm \
+	  --lm $HOME/DeepSpeech/models/lm.binary \
+	  --trie $HOME/DeepSpeech/models/trie \
+	  --file $HOME/DeepSpeech/audio/2830-3980-0043.wav
+popd
--- a/net_framework/.gitignore
+++ b/net_framework/.gitignore
@ -0,0 +1,330 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUNIT
+*.VisualState.xml
+TestResult.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+**/Properties/launchSettings.json
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# JustCode is a .NET coding add-in
+.JustCode
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk 
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# JetBrains Rider
+.idea/
+*.sln.iml
+
+# CodeRush
+.cr/
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output 
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder 
+.mfractor/
--- a/net_framework/DeepSpeechWPF/App.config
+++ b/net_framework/DeepSpeechWPF/App.config
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2" />
+    </startup>
+</configuration>
--- a/net_framework/DeepSpeechWPF/App.xaml
+++ b/net_framework/DeepSpeechWPF/App.xaml
@ -0,0 +1,8 @@
+<Application
+    x:Class="DeepSpeechWPF.App"
+    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
+    xmlns:local="clr-namespace:DeepSpeechWPF"
+    StartupUri="MainWindow.xaml">
+    <Application.Resources />
+</Application>
--- a/net_framework/DeepSpeechWPF/App.xaml.cs
+++ b/net_framework/DeepSpeechWPF/App.xaml.cs
@ -0,0 +1,44 @@
+using CommonServiceLocator;
+using DeepSpeech.WPF.ViewModels;
+using DeepSpeechClient.Interfaces;
+using GalaSoft.MvvmLight.Ioc;
+using System.Windows;
+
+namespace DeepSpeechWPF
+{
+    /// <summary>
+    /// Interaction logic for App.xaml
+    /// </summary>
+    public partial class App : Application
+    {
+        protected override void OnStartup(StartupEventArgs e)
+        {
+            base.OnStartup(e);
+            ServiceLocator.SetLocatorProvider(() => SimpleIoc.Default);
+
+            const int BEAM_WIDTH = 500;
+
+            //Register instance of DeepSpeech
+            DeepSpeechClient.DeepSpeech deepSpeechClient = new DeepSpeechClient.DeepSpeech();
+            try
+            {
+                deepSpeechClient.CreateModel("output_graph.pbmm", BEAM_WIDTH);
+            }
+            catch (System.Exception ex)
+            {
+                MessageBox.Show(ex.Message);
+                Current.Shutdown();
+            }
+            
+            SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
+            SimpleIoc.Default.Register<MainWindowViewModel>();
+        }
+
+        protected override void OnExit(ExitEventArgs e)
+        {
+            base.OnExit(e);
+            //Dispose instance of DeepSpeech
+            ServiceLocator.Current.GetInstance<IDeepSpeech>()?.Dispose();
+        }
+    }
+}
--- a/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj
+++ b/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj
@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{54BFD766-4305-4F4C-BA59-AF45505DF3C1}</ProjectGuid>
+    <OutputType>WinExe</OutputType>
+    <RootNamespace>DeepSpeech.WPF</RootNamespace>
+    <AssemblyName>DeepSpeech.WPF</AssemblyName>
+    <TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <WarningLevel>4</WarningLevel>
+    <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
+    <Deterministic>true</Deterministic>
+    <NuGetPackageImportStamp>
+    </NuGetPackageImportStamp>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>AnyCPU</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>false</Prefer32Bit>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <OutputPath>bin\x64\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <Prefer32Bit>true</Prefer32Bit>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="AsyncAwaitBestPractices, Version=1.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <HintPath>packages\AsyncAwaitBestPractices.3.1.0\lib\netstandard1.0\AsyncAwaitBestPractices.dll</HintPath>
+    </Reference>
+    <Reference Include="AsyncAwaitBestPractices.MVVM, Version=1.0.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <HintPath>packages\AsyncAwaitBestPractices.MVVM.3.1.0\lib\netstandard1.0\AsyncAwaitBestPractices.MVVM.dll</HintPath>
+    </Reference>
+    <Reference Include="CommonServiceLocator, Version=2.0.2.0, Culture=neutral, PublicKeyToken=489b6accfaf20ef0, processorArchitecture=MSIL">
+      <HintPath>packages\CommonServiceLocator.2.0.2\lib\net45\CommonServiceLocator.dll</HintPath>
+    </Reference>
+    <Reference Include="CSCore, Version=1.2.1.2, Culture=neutral, PublicKeyToken=5a08f2b6f4415dea, processorArchitecture=MSIL">
+      <HintPath>packages\CSCore.1.2.1.2\lib\net35-client\CSCore.dll</HintPath>
+    </Reference>
+    <Reference Include="GalaSoft.MvvmLight, Version=5.4.1.0, Culture=neutral, PublicKeyToken=e7570ab207bcb616, processorArchitecture=MSIL">
+      <HintPath>packages\MvvmLightLibs.5.4.1.1\lib\net45\GalaSoft.MvvmLight.dll</HintPath>
+    </Reference>
+    <Reference Include="GalaSoft.MvvmLight.Extras, Version=5.4.1.0, Culture=neutral, PublicKeyToken=669f0b5e8f868abf, processorArchitecture=MSIL">
+      <HintPath>packages\MvvmLightLibs.5.4.1.1\lib\net45\GalaSoft.MvvmLight.Extras.dll</HintPath>
+    </Reference>
+    <Reference Include="GalaSoft.MvvmLight.Platform, Version=5.4.1.0, Culture=neutral, PublicKeyToken=5f873c45e98af8a1, processorArchitecture=MSIL">
+      <HintPath>packages\MvvmLightLibs.5.4.1.1\lib\net45\GalaSoft.MvvmLight.Platform.dll</HintPath>
+    </Reference>
+    <Reference Include="NAudio, Version=1.9.0.0, Culture=neutral, processorArchitecture=MSIL">
+      <HintPath>packages\NAudio.1.9.0\lib\net35\NAudio.dll</HintPath>
+    </Reference>
+    <Reference Include="System" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Windows.Forms" />
+    <Reference Include="System.Windows.Interactivity, Version=4.5.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
+      <HintPath>packages\MvvmLightLibs.5.4.1.1\lib\net45\System.Windows.Interactivity.dll</HintPath>
+    </Reference>
+    <Reference Include="System.Xml" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xaml">
+      <RequiredTargetFramework>4.0</RequiredTargetFramework>
+    </Reference>
+    <Reference Include="WindowsBase" />
+    <Reference Include="PresentationCore" />
+    <Reference Include="PresentationFramework" />
+  </ItemGroup>
+  <ItemGroup>
+    <ApplicationDefinition Include="App.xaml">
+      <Generator>MSBuild:Compile</Generator>
+      <SubType>Designer</SubType>
+    </ApplicationDefinition>
+    <Compile Include="ViewModels\MainWindowViewModel.cs" />
+    <Page Include="MainWindow.xaml">
+      <Generator>MSBuild:Compile</Generator>
+      <SubType>Designer</SubType>
+    </Page>
+    <Compile Include="App.xaml.cs">
+      <DependentUpon>App.xaml</DependentUpon>
+      <SubType>Code</SubType>
+    </Compile>
+    <Compile Include="ViewModels\BindableBase.cs" />
+    <Compile Include="MainWindow.xaml.cs">
+      <DependentUpon>MainWindow.xaml</DependentUpon>
+      <SubType>Code</SubType>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Properties\AssemblyInfo.cs">
+      <SubType>Code</SubType>
+    </Compile>
+    <Compile Include="Properties\Resources.Designer.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>Resources.resx</DependentUpon>
+    </Compile>
+    <Compile Include="Properties\Settings.Designer.cs">
+      <AutoGen>True</AutoGen>
+      <DependentUpon>Settings.settings</DependentUpon>
+      <DesignTimeSharedInput>True</DesignTimeSharedInput>
+    </Compile>
+    <EmbeddedResource Include="Properties\Resources.resx">
+      <Generator>ResXFileCodeGenerator</Generator>
+      <LastGenOutput>Resources.Designer.cs</LastGenOutput>
+    </EmbeddedResource>
+    <None Include="packages.config" />
+    <None Include="Properties\Settings.settings">
+      <Generator>SettingsSingleFileGenerator</Generator>
+      <LastGenOutput>Settings.Designer.cs</LastGenOutput>
+    </None>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\..\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj">
+      <Project>{56de4091-bbbe-47e4-852d-7268b33b971f}</Project>
+      <Name>DeepSpeechClient</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+</Project>
--- a/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln
+++ b/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln
@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28307.421
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\..\..\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.ActiveCfg = Debug|x64
+		{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.Build.0 = Debug|x64
+		{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.ActiveCfg = Release|x64
+		{54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.Build.0 = Release|x64
+		{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|x64
+		{56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|x64
+		{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.ActiveCfg = Release|x64
+		{56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {19C58802-CCEC-4FD1-8D17-A6EB766116F7}
+	EndGlobalSection
+EndGlobal
--- a/net_framework/DeepSpeechWPF/MainWindow.xaml
+++ b/net_framework/DeepSpeechWPF/MainWindow.xaml
@ -0,0 +1,102 @@
+<Window
+    x:Class="DeepSpeechWPF.MainWindow"
+    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
+    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
+    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
+    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
+    Title="Deepspeech client"
+    Width="800"
+    Height="600"
+    Loaded="Window_Loaded"
+    WindowStartupLocation="CenterScreen"
+    mc:Ignorable="d">
+    <Grid>
+        <Grid.RowDefinitions>
+            <RowDefinition Height="222" />
+            <RowDefinition />
+        </Grid.RowDefinitions>
+        <TextBox
+            Grid.Row="1"
+            Margin="10,36,10,10"
+            FontSize="16px"
+            Text="{Binding Transcription, Mode=OneWay}"
+            TextWrapping="Wrap" />
+        <Label
+            Grid.Row="1"
+            Height="26"
+            Margin="10,5,10,0"
+            VerticalAlignment="Top"
+            Content="Results:" />
+        <Label
+            Height="26"
+            Margin="10,10,10,0"
+            VerticalAlignment="Top"
+            Content="Select an audio file to transcript:" />
+        <TextBox
+            Height="23"
+            Margin="10,41,10,0"
+            VerticalAlignment="Top"
+            Text="{Binding AudioFilePath, Mode=TwoWay}"
+            TextWrapping="Wrap" />
+        <Button
+            Width="80"
+            Height="25"
+            Margin="10,69,0,0"
+            HorizontalAlignment="Left"
+            VerticalAlignment="Top"
+            Command="{Binding SelectFileCommand}"
+            Content="Open file" />
+        <Button
+            Width="82"
+            Height="25"
+            Margin="95,69,0,0"
+            HorizontalAlignment="Left"
+            VerticalAlignment="Top"
+            Command="{Binding EnableLanguageModelCommand}"
+            Content="Enable LM" />
+        <Button
+            Width="75"
+            Height="25"
+            Margin="182,69,0,0"
+            HorizontalAlignment="Left"
+            VerticalAlignment="Top"
+            Command="{Binding InferenceFromFileCommand}"
+            Content="Transcript" />
+        <Label
+            Height="30"
+            Margin="10,99,10,0"
+            VerticalAlignment="Top"
+            Content="{Binding StatusMessage, Mode=OneWay}" />
+        <Label
+            Height="26"
+            Margin="10,158,10,0"
+            VerticalAlignment="Top"
+            Content="Select an audio input:" />
+        <ComboBox
+            Height="23"
+            Margin="20,189,186,0"
+            VerticalAlignment="Top"
+            DisplayMemberPath="FriendlyName"
+            ItemsSource="{Binding AvailableRecordDevices, Mode=TwoWay}"
+            SelectedIndex="0"
+            SelectedItem="{Binding SelectedDevice, Mode=TwoWay}" />
+        <Button
+            Width="91"
+            Height="23"
+            Margin="0,0,90,10"
+            HorizontalAlignment="Right"
+            VerticalAlignment="Bottom"
+            Command="{Binding StartRecordingCommand}"
+            Content="Record"
+            IsEnabled="{Binding EnableStartRecord, Mode=OneWay}" />
+        <Button
+            Width="75"
+            Height="23"
+            Margin="0,0,10,10"
+            HorizontalAlignment="Right"
+            VerticalAlignment="Bottom"
+            Command="{Binding StopRecordingCommand}"
+            Content="Stop"
+            IsEnabled="{Binding EnableStopRecord, Mode=OneWay}" />
+    </Grid>
+</Window>
--- a/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
+++ b/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
@ -0,0 +1,17 @@
+using CommonServiceLocator;
+using DeepSpeech.WPF.ViewModels;
+using System.Windows;
+
+namespace DeepSpeechWPF
+{
+    /// <summary>
+    /// Interaction logic for MainWindow.xaml
+    /// </summary>
+    public partial class MainWindow : Window
+    {
+        public MainWindow() => InitializeComponent();
+
+        private void Window_Loaded(object sender, RoutedEventArgs e) =>
+            DataContext = ServiceLocator.Current.GetInstance<MainWindowViewModel>();
+    }
+}
--- a/net_framework/DeepSpeechWPF/Properties/AssemblyInfo.cs
+++ b/net_framework/DeepSpeechWPF/Properties/AssemblyInfo.cs
@ -0,0 +1,55 @@
+using System.Reflection;
+using System.Resources;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Windows;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("DeepSpeech.WPF")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")]
+[assembly: AssemblyCopyright("Copyright ©  2018")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components.  If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+//In order to begin building localizable applications, set
+//<UICulture>CultureYouAreCodingWith</UICulture> in your .csproj file
+//inside a <PropertyGroup>.  For example, if you are using US english
+//in your source files, set the <UICulture> to en-US.  Then uncomment
+//the NeutralResourceLanguage attribute below.  Update the "en-US" in
+//the line below to match the UICulture setting in the project file.
+
+//[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.Satellite)]
+
+
+[assembly: ThemeInfo(
+    ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
+                                     //(used if a resource is not found in the page,
+                                     // or application resource dictionaries)
+    ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
+                                              //(used if a resource is not found in the page,
+                                              // app, or any theme specific resource dictionaries)
+)]
+
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
--- a/net_framework/DeepSpeechWPF/Properties/Resources.Designer.cs
+++ b/net_framework/DeepSpeechWPF/Properties/Resources.Designer.cs
@ -0,0 +1,63 @@
+//------------------------------------------------------------------------------
+// <auto-generated>
+//     This code was generated by a tool.
+//     Runtime Version:4.0.30319.42000
+//
+//     Changes to this file may cause incorrect behavior and will be lost if
+//     the code is regenerated.
+// </auto-generated>
+//------------------------------------------------------------------------------
+
+namespace DeepSpeech.WPF.Properties {
+    using System;
+    
+    
+    /// <summary>
+    ///   A strongly-typed resource class, for looking up localized strings, etc.
+    /// </summary>
+    // This class was auto-generated by the StronglyTypedResourceBuilder
+    // class via a tool like ResGen or Visual Studio.
+    // To add or remove a member, edit your .ResX file then rerun ResGen
+    // with the /str option, or rebuild your VS project.
+    [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "15.0.0.0")]
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
+    [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
+    internal class Resources {
+        
+        private static global::System.Resources.ResourceManager resourceMan;
+        
+        private static global::System.Globalization.CultureInfo resourceCulture;
+        
+        [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
+        internal Resources() {
+        }
+        
+        /// <summary>
+        ///   Returns the cached ResourceManager instance used by this class.
+        /// </summary>
+        [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
+        internal static global::System.Resources.ResourceManager ResourceManager {
+            get {
+                if (object.ReferenceEquals(resourceMan, null)) {
+                    global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly);
+                    resourceMan = temp;
+                }
+                return resourceMan;
+            }
+        }
+        
+        /// <summary>
+        ///   Overrides the current thread's CurrentUICulture property for all
+        ///   resource lookups using this strongly typed resource class.
+        /// </summary>
+        [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
+        internal static global::System.Globalization.CultureInfo Culture {
+            get {
+                return resourceCulture;
+            }
+            set {
+                resourceCulture = value;
+            }
+        }
+    }
+}
--- a/net_framework/DeepSpeechWPF/Properties/Resources.resx
+++ b/net_framework/DeepSpeechWPF/Properties/Resources.resx
@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="utf-8"?>
+<root>
+  <!-- 
+    Microsoft ResX Schema 
+    
+    Version 2.0
+    
+    The primary goals of this format is to allow a simple XML format 
+    that is mostly human readable. The generation and parsing of the 
+    various data types are done through the TypeConverter classes 
+    associated with the data types.
+    
+    Example:
+    
+    ... ado.net/XML headers & schema ...
+    <resheader name="resmimetype">text/microsoft-resx</resheader>
+    <resheader name="version">2.0</resheader>
+    <resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
+    <resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
+    <data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
+    <data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
+    <data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
+        <value>[base64 mime encoded serialized .NET Framework object]</value>
+    </data>
+    <data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
+        <value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
+        <comment>This is a comment</comment>
+    </data>
+                
+    There are any number of "resheader" rows that contain simple 
+    name/value pairs.
+    
+    Each data row contains a name, and value. The row also contains a 
+    type or mimetype. Type corresponds to a .NET class that support 
+    text/value conversion through the TypeConverter architecture. 
+    Classes that don't support this are serialized and stored with the 
+    mimetype set.
+    
+    The mimetype is used for serialized objects, and tells the 
+    ResXResourceReader how to depersist the object. This is currently not 
+    extensible. For a given mimetype the value must be set accordingly:
+    
+    Note - application/x-microsoft.net.object.binary.base64 is the format 
+    that the ResXResourceWriter will generate, however the reader can 
+    read any of the formats listed below.
+    
+    mimetype: application/x-microsoft.net.object.binary.base64
+    value   : The object must be serialized with 
+            : System.Serialization.Formatters.Binary.BinaryFormatter
+            : and then encoded with base64 encoding.
+    
+    mimetype: application/x-microsoft.net.object.soap.base64
+    value   : The object must be serialized with 
+            : System.Runtime.Serialization.Formatters.Soap.SoapFormatter
+            : and then encoded with base64 encoding.
+
+    mimetype: application/x-microsoft.net.object.bytearray.base64
+    value   : The object must be serialized into a byte array 
+            : using a System.ComponentModel.TypeConverter
+            : and then encoded with base64 encoding.
+    -->
+  <xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
+    <xsd:element name="root" msdata:IsDataSet="true">
+      <xsd:complexType>
+        <xsd:choice maxOccurs="unbounded">
+          <xsd:element name="metadata">
+            <xsd:complexType>
+              <xsd:sequence>
+                <xsd:element name="value" type="xsd:string" minOccurs="0" />
+              </xsd:sequence>
+              <xsd:attribute name="name" type="xsd:string" />
+              <xsd:attribute name="type" type="xsd:string" />
+              <xsd:attribute name="mimetype" type="xsd:string" />
+            </xsd:complexType>
+          </xsd:element>
+          <xsd:element name="assembly">
+            <xsd:complexType>
+              <xsd:attribute name="alias" type="xsd:string" />
+              <xsd:attribute name="name" type="xsd:string" />
+            </xsd:complexType>
+          </xsd:element>
+          <xsd:element name="data">
+            <xsd:complexType>
+              <xsd:sequence>
+                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
+                <xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
+              </xsd:sequence>
+              <xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
+              <xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
+              <xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
+            </xsd:complexType>
+          </xsd:element>
+          <xsd:element name="resheader">
+            <xsd:complexType>
+              <xsd:sequence>
+                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
+              </xsd:sequence>
+              <xsd:attribute name="name" type="xsd:string" use="required" />
+            </xsd:complexType>
+          </xsd:element>
+        </xsd:choice>
+      </xsd:complexType>
+    </xsd:element>
+  </xsd:schema>
+  <resheader name="resmimetype">
+    <value>text/microsoft-resx</value>
+  </resheader>
+  <resheader name="version">
+    <value>2.0</value>
+  </resheader>
+  <resheader name="reader">
+    <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
+  </resheader>
+  <resheader name="writer">
+    <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
+  </resheader>
+</root>
--- a/net_framework/DeepSpeechWPF/Properties/Settings.Designer.cs
+++ b/net_framework/DeepSpeechWPF/Properties/Settings.Designer.cs
@ -0,0 +1,26 @@
+//------------------------------------------------------------------------------
+// <auto-generated>
+//     This code was generated by a tool.
+//     Runtime Version:4.0.30319.42000
+//
+//     Changes to this file may cause incorrect behavior and will be lost if
+//     the code is regenerated.
+// </auto-generated>
+//------------------------------------------------------------------------------
+
+namespace DeepSpeech.WPF.Properties {
+    
+    
+    [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
+    [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "15.9.0.0")]
+    internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
+        
+        private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
+        
+        public static Settings Default {
+            get {
+                return defaultInstance;
+            }
+        }
+    }
+}
--- a/net_framework/DeepSpeechWPF/Properties/Settings.settings
+++ b/net_framework/DeepSpeechWPF/Properties/Settings.settings
@ -0,0 +1,7 @@
+<?xml version='1.0' encoding='utf-8'?>
+<SettingsFile xmlns="uri:settings" CurrentProfile="(Default)">
+  <Profiles>
+    <Profile Name="(Default)" />
+  </Profiles>
+  <Settings />
+</SettingsFile>
--- a/net_framework/DeepSpeechWPF/ViewModels/BindableBase.cs
+++ b/net_framework/DeepSpeechWPF/ViewModels/BindableBase.cs
@ -0,0 +1,49 @@
+using System;
+using System.Collections.Generic;
+using System.ComponentModel;
+using System.Runtime.CompilerServices;
+
+namespace DeepSpeech.WPF.ViewModels
+{
+    /// <summary>
+    /// Implementation of <see cref="INotifyPropertyChanged"/> to simplify models.
+    /// </summary>
+    public abstract class BindableBase : INotifyPropertyChanged
+    {
+        /// <summary>
+        /// Checks if a property already matches a desired value.  Sets the property and
+        /// notifies listeners only when necessary.
+        /// </summary>
+        /// <typeparam name="T">Type of the property.</typeparam>
+        /// <param name="storage">Reference to a property with both getter and setter.</param>
+        /// <param name="value">Desired value for the property.</param>
+        /// <param name="propertyName">Name of the property used to notify listeners.  This
+        /// value is optional and can be provided automatically when invoked from compilers that
+        /// support CallerMemberName.</param>
+        /// <returns>True if the value was changed, false if the existing value matched the
+        /// desired value.</returns>
+        protected bool SetProperty<T>(ref T backingStore, T value,
+           [CallerMemberName]string propertyName = "",
+           Action onChanged = null)
+        {
+            if (EqualityComparer<T>.Default.Equals(backingStore, value))
+                return false;
+            backingStore = value;
+            onChanged?.Invoke();
+            OnPropertyChanged(propertyName);
+            return true;
+        }
+
+        #region INotifyPropertyChanged
+        /// <summary>
+        /// Notifies listeners that a property value has changed.
+        /// </summary>
+        /// <param name="propertyName">Name of the property used to notify listeners.  This
+        /// value is optional and can be provided automatically when invoked from compilers
+        /// that support <see cref="CallerMemberNameAttribute"/>.</param>
+        public event PropertyChangedEventHandler PropertyChanged;
+        protected void OnPropertyChanged([CallerMemberName] string propertyName = "")
+            => PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(propertyName));
+        #endregion
+    }
+}
--- a/net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs
+++ b/net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs
@ -0,0 +1,422 @@
+using AsyncAwaitBestPractices.MVVM;
+using CSCore;
+using CSCore.CoreAudioAPI;
+using CSCore.SoundIn;
+using CSCore.Streams;
+using DeepSpeechClient.Interfaces;
+using GalaSoft.MvvmLight.CommandWpf;
+using Microsoft.Win32;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.ObjectModel;
+using System.Diagnostics;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace DeepSpeech.WPF.ViewModels
+{
+    /// <summary>
+    /// View model of the MainWindow View.
+    /// </summary>
+    public class MainWindowViewModel : BindableBase
+    {
+        #region Constants
+        private const int SampleRate = 16000;
+        private const string LMPath = "lm.binary";
+        private const string TriePath = "trie";
+        #endregion
+
+        private readonly IDeepSpeech _sttClient;
+
+        #region Commands
+        /// <summary>
+        /// Gets or sets the command that enables the language model.
+        /// </summary>
+        public IAsyncCommand EnableLanguageModelCommand { get; private set; }
+
+        /// <summary>
+        /// Gets or sets the command that runs inference using an audio file.
+        /// </summary>
+        public IAsyncCommand InferenceFromFileCommand { get; private set; }
+
+        /// <summary>
+        /// Gets or sets the command that opens a dialog to select an audio file.
+        /// </summary>
+        public RelayCommand SelectFileCommand { get; private set; }
+
+        /// <summary>
+        /// Gets or sets the command that starts to record.
+        /// </summary>
+        public RelayCommand StartRecordingCommand { get; private set; }
+
+        /// <summary>
+        /// Gets or sets the command that stops the recording and gets the result.
+        /// </summary>
+        public IAsyncCommand StopRecordingCommand { get; private set; }
+
+        #endregion
+
+        #region Streaming
+        /// <summary>
+        /// Records the audio of the selected device.
+        /// </summary>
+        private WasapiCapture _audioCapture;
+
+        /// <summary>
+        /// Converts the device source into a wavesource.
+        /// </summary>
+        private SoundInSource _soundInSource;
+
+        /// <summary>
+        /// Target wave source.(16KHz Mono 16bit for DeepSpeech)
+        /// </summary>
+        private IWaveSource _convertedSource;
+
+        /// <summary>
+        /// Queue that prevents feeding data to the inference engine if it is busy.
+        /// </summary>
+        private ConcurrentQueue<short[]> _bufferQueue = new ConcurrentQueue<short[]>();
+
+        private int _threadSafeBoolBackValue = 0;
+
+        /// <summary>
+        /// Lock to process items in the queue one at time.
+        /// </summary>
+        public bool StreamingIsBusy
+        {
+            get => (Interlocked.CompareExchange(ref _threadSafeBoolBackValue, 1, 1) == 1);
+            set
+            {
+                if (value) Interlocked.CompareExchange(ref _threadSafeBoolBackValue, 1, 0);
+                else Interlocked.CompareExchange(ref _threadSafeBoolBackValue, 0, 1);
+            }
+        }
+
+        #endregion
+
+        #region ViewProperties
+
+        private bool _enableStartRecord;
+        /// <summary>
+        /// Gets or sets record status to control the record command.
+        /// </summary>
+        public bool EnableStartRecord
+        {
+            get => _enableStartRecord;
+            set => SetProperty(ref _enableStartRecord, value);
+        }
+
+        private bool _stopRecordStopRecord;
+        /// <summary>
+        /// Gets or sets record status to control stop command.
+        /// </summary>
+        public bool EnableStopRecord
+        {
+            get => _stopRecordStopRecord;
+            set => SetProperty(ref _stopRecordStopRecord, value,
+                onChanged: ()=> ((AsyncCommand)StopRecordingCommand).RaiseCanExecuteChanged());
+        }
+
+        private MMDevice _selectedDevice;
+        /// <summary>
+        /// Gets or sets the selected recording device.
+        /// </summary>
+        public MMDevice SelectedDevice
+        {
+            get => _selectedDevice;
+            set => SetProperty(ref _selectedDevice, value, 
+                onChanged: UpdateSelectedDevice);
+        }
+
+        private string _statusMessage;
+        /// <summary>
+        /// Gets or sets status message.
+        /// </summary>
+        public string StatusMessage
+        {
+            get => _statusMessage;
+            set => SetProperty(ref _statusMessage, value);
+        }
+
+        private bool _languageModelEnabled;
+        /// <summary>
+        /// Gets or sets the language model status.
+        /// </summary>
+        private bool LanguageModelEnabled
+        {
+            get => _languageModelEnabled;
+            set => SetProperty(ref _languageModelEnabled, value,
+                    onChanged: () => ((AsyncCommand)EnableLanguageModelCommand).RaiseCanExecuteChanged());
+        }
+
+        private bool _isRunningInference;
+        /// <summary>
+        /// Gets or sets whenever the model is running inference.
+        /// </summary>
+        private bool IsRunningInference
+        {
+            get => _isRunningInference;
+            set => SetProperty(ref _isRunningInference, value,
+                   onChanged: () => ((AsyncCommand)InferenceFromFileCommand).RaiseCanExecuteChanged());
+        }
+
+        private string _transcription;
+        /// <summary>
+        /// Gets or sets the current transcription.
+        /// </summary>
+        public string Transcription
+        {
+            get => _transcription;
+            set => SetProperty(ref _transcription, value);
+        }
+
+        private string _audioFilePaht;
+        /// <summary>
+        /// Gets or sets the selected audio file path.
+        /// </summary>
+        public string AudioFilePath
+        {
+            get => _audioFilePaht;
+            set => SetProperty(ref _audioFilePaht, value);
+        }
+
+        private ObservableCollection<MMDevice> _deviceNames;
+        /// <summary>
+        /// Gets or sets the available recording devices.
+        /// </summary>
+        public ObservableCollection<MMDevice> AvailableRecordDevices
+        {
+            get => _deviceNames;
+            set => SetProperty(ref _deviceNames, value);
+        }
+
+        #endregion
+
+        #region Ctors
+        public MainWindowViewModel(IDeepSpeech sttClient)
+        {
+            _sttClient = sttClient;
+
+            EnableLanguageModelCommand = new AsyncCommand(()=>EnableLanguageModelAsync(LMPath,TriePath),
+                _ => !LanguageModelEnabled);
+
+            InferenceFromFileCommand = new AsyncCommand(ExecuteInferenceFromFileAsync,
+                _ => !IsRunningInference);
+
+            SelectFileCommand = new RelayCommand(SelectAudioFile);
+
+            StartRecordingCommand = new RelayCommand(StartRecording,
+                canExecute: CanExecuteStartRecording);
+
+            StopRecordingCommand = new AsyncCommand(StopRecordingAsync,
+                _ => EnableStopRecord);
+
+            LoadAvailableCaptureDevices();
+        }
+        #endregion
+
+        /// <summary>
+        /// Releases the current capture device and initializes the selected one.
+        /// </summary>
+        private void UpdateSelectedDevice()
+        {
+            ReleaseCapture();
+            InitializeAudioCapture();
+        }
+
+        /// <summary>
+        /// Releases the capture device.
+        /// </summary>
+        private void ReleaseCapture()
+        {
+            if (_audioCapture != null)
+            {
+                _audioCapture.DataAvailable -= Capture_DataAvailable;
+                _audioCapture.Dispose();
+            }
+        }
+        /// <summary>
+        /// Command usage to know when the recording can start.
+        /// </summary>
+        /// <returns>If the device is not null.</returns>
+        private bool CanExecuteStartRecording() =>
+            SelectedDevice != null;
+
+        /// <summary>
+        /// Loads all the available audio capture devices.
+        /// </summary>
+        private void LoadAvailableCaptureDevices()
+        {
+            AvailableRecordDevices = new ObservableCollection<MMDevice>(
+                MMDeviceEnumerator.EnumerateDevices(DataFlow.All, DeviceState.Active)); //we get only enabled devices    
+            EnableStartRecord = true;
+            if (AvailableRecordDevices?.Count != 0)
+                SelectedDevice = AvailableRecordDevices[0];
+        }
+
+        /// <summary>
+        /// Initializes the capture source.
+        /// </summary>
+        private void InitializeAudioCapture()
+        {
+            if (SelectedDevice != null)
+            {
+                _audioCapture = SelectedDevice.DataFlow == DataFlow.Capture ?
+                    new WasapiCapture() : new WasapiLoopbackCapture();
+                _audioCapture.Device = SelectedDevice;
+                _audioCapture.Initialize();
+                _audioCapture.DataAvailable += Capture_DataAvailable;
+                _soundInSource = new SoundInSource(_audioCapture) { FillWithZeros = false };
+                //create a source, that converts the data provided by the
+                //soundInSource to required format
+                _convertedSource = _soundInSource
+                   .ChangeSampleRate(SampleRate) // sample rate
+                   .ToSampleSource()
+                   .ToWaveSource(16); //bits per sample
+
+                _convertedSource = _convertedSource.ToMono();
+            } 
+        }
+
+        private void Capture_DataAvailable(object sender, DataAvailableEventArgs e)
+        {
+            //read data from the converedSource
+            //important: don't use the e.Data here
+            //the e.Data contains the raw data provided by the 
+            //soundInSource which won't have the deepspeech required audio format
+            byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2];
+
+            int read;
+            //keep reading as long as we still get some data
+            while ((read = _convertedSource.Read(buffer, 0, buffer.Length)) > 0)
+            {
+                short[] sdata = new short[(int)Math.Ceiling(Convert.ToDecimal(read / 2))];
+                Buffer.BlockCopy(buffer, 0, sdata, 0, read);
+                _bufferQueue.Enqueue(sdata);
+                Task.Run(() => OnNewData());
+            }
+        }
+
+        /// <summary>
+        /// Starts processing data from the queue.
+        /// </summary>
+        private void OnNewData()
+        {
+            while (!StreamingIsBusy && !_bufferQueue.IsEmpty)
+            {
+                if (_bufferQueue.TryDequeue(out short[] buffer))
+                {
+                    StreamingIsBusy = true;
+                    _sttClient.FeedAudioContent(buffer, Convert.ToUInt32(buffer.Length));
+                    StreamingIsBusy = false;
+                }
+            }
+        }
+       
+        /// <summary>
+        /// Enables the language model.
+        /// </summary>
+        /// <param name="lmPath">Language model path.</param>
+        /// <param name="triePath">Trie path.</param>
+        /// <returns>A Task to await.</returns>
+        public async Task EnableLanguageModelAsync(string lmPath, string triePath)
+        {
+            try
+            {
+                StatusMessage = "Loading language model...";
+                const float LM_ALPHA = 0.75f;
+                const float LM_BETA = 1.85f;
+                await Task.Run(() => _sttClient.EnableDecoderWithLM(LMPath, TriePath, LM_ALPHA, LM_BETA));
+                LanguageModelEnabled = true;
+                StatusMessage = "Language model loaded.";
+            }
+            catch (Exception ex)
+            {
+                StatusMessage = ex.Message;
+            }
+        }
+        /// <summary>
+        /// Runs inference and sets the transcription of an audio file.
+        /// </summary>
+        /// <returns>A Task to await.</returns>
+        public async Task ExecuteInferenceFromFileAsync()
+        {
+            try
+            {
+                IsRunningInference = true;
+                Transcription = string.Empty;
+                StatusMessage = "Running inference...";
+                Stopwatch watch = new Stopwatch();
+                var waveBuffer = new NAudio.Wave.WaveBuffer(File.ReadAllBytes(AudioFilePath));
+                using (var waveInfo = new NAudio.Wave.WaveFileReader(AudioFilePath))
+                {
+                    watch.Start();
+                    string speechResult = await Task.Run(() => _sttClient.SpeechToText(
+                        waveBuffer.ShortBuffer,
+                        Convert.ToUInt32(waveBuffer.MaxSize / 2)));
+
+                    watch.Stop();
+                    Transcription = $"Audio duration: {waveInfo.TotalTime.ToString()} {Environment.NewLine}" +
+                        $"Inference took: {watch.Elapsed.ToString()} {Environment.NewLine}" +
+                        $"Recognized text: {speechResult}";
+                }
+                waveBuffer.Clear();
+                StatusMessage = string.Empty;
+            }
+            catch (Exception ex)
+            {
+                StatusMessage = ex.Message;
+            }
+            finally
+            {
+                IsRunningInference = false;
+            }
+        }
+
+        /// <summary>
+        /// Stops the recording and sets the transcription of the closed stream.
+        /// </summary>
+        /// <returns>A Task to await.</returns>
+        private async Task StopRecordingAsync()
+        {
+            EnableStopRecord = false;
+            _audioCapture.Stop();
+            while (!_bufferQueue.IsEmpty && StreamingIsBusy) //we wait for all the queued buffers to be processed
+            {
+                await Task.Delay(90);
+            }
+            Transcription = _sttClient.FinishStream();
+            EnableStartRecord = true;
+        }
+
+        /// <summary>
+        /// Creates a new stream and starts the recording.
+        /// </summary>
+        private void StartRecording()
+        {
+            _sttClient.CreateStream();
+            _audioCapture.Start();
+            EnableStartRecord = false;
+            EnableStopRecord = true;
+        }
+
+        /// <summary>
+        /// Opens a dialog to select an audio file.
+        /// </summary>
+        private void SelectAudioFile()
+        {
+            OpenFileDialog dialog = new OpenFileDialog
+            {
+                Filter = "wav Files |*.wav",
+                Multiselect = false,
+                Title = "Please select a wav file."
+            };
+
+            if ((bool)dialog.ShowDialog())
+            {
+                AudioFilePath = dialog.FileName;
+            }
+        }
+    }
+}
--- a/net_framework/DeepSpeechWPF/packages.config
+++ b/net_framework/DeepSpeechWPF/packages.config
@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="AsyncAwaitBestPractices" version="3.1.0" targetFramework="net462" />
+  <package id="AsyncAwaitBestPractices.MVVM" version="3.1.0" targetFramework="net462" />
+  <package id="CommonServiceLocator" version="2.0.2" targetFramework="net462" />
+  <package id="CSCore" version="1.2.1.2" targetFramework="net462" />
+  <package id="MvvmLightLibs" version="5.4.1.1" targetFramework="net462" />
+  <package id="NAudio" version="1.9.0" targetFramework="net462" />
+</packages>
--- a/nodejs_wav/Readme.md
+++ b/nodejs_wav/Readme.md
@ -0,0 +1,58 @@
+# NodeJS voice recognition example using Mozilla DeepSpeech
+
+Download the pre-trained model (1.8GB):
+
+```
+wget https://github.com/mozilla/DeepSpeech/releases/download/v0.6.0/deepspeech-0.6.0-models.tar.gz
+tar xvfz deepspeech-0.6.0-models.tar.gz
+```
+
+Edit references to models path if necessary:
+
+```
+let modelPath = './models/output_graph.pbmm';
+let lmPath = './models/lm.binary';
+let triePath = './models/trie';
+```
+
+Install Sox (for .wav file loading):
+
+```
+brew install sox
+```
+
+Download test audio files:
+
+```
+wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz
+tar xfvz audio-0.4.1.tar.gz
+```
+
+Install NPM dependencies:
+
+```
+npm install
+```
+
+Run:
+
+```
+node index.js
+```
+
+Result should be something like:
+
+```
+audio length 1.975
+result: experience proves this
+
+```
+
+Try other wav files with an argument:
+
+```
+node index.js audio/2830-3980-0043.wav
+node index.js audio/8455-210777-0068.wav
+node index.js audio/4507-16021-0012.wav
+```
+
--- a/nodejs_wav/index.js
+++ b/nodejs_wav/index.js
@ -0,0 +1,70 @@
+const DeepSpeech = require('deepspeech');
+const Fs = require('fs');
+const Sox = require('sox-stream');
+const MemoryStream = require('memory-stream');
+const Duplex = require('stream').Duplex;
+const Wav = require('node-wav');
+
+const BEAM_WIDTH = 1024;
+let modelPath = './models/output_graph.pbmm';
+
+let model = new DeepSpeech.Model(modelPath, BEAM_WIDTH);
+
+let desiredSampleRate = model.sampleRate();
+
+const LM_ALPHA = 0.75;
+const LM_BETA = 1.85;
+let lmPath = './models/lm.binary';
+let triePath = './models/trie';
+
+model.enableDecoderWithLM(lmPath, triePath, LM_ALPHA, LM_BETA);
+
+let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';
+
+if (!Fs.existsSync(audioFile)) {
+	console.log('file missing:', audioFile);
+	process.exit();
+}
+
+const buffer = Fs.readFileSync(audioFile);
+const result = Wav.decode(buffer);
+
+if (result.sampleRate < desiredSampleRate) {
+	console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than ' + desiredSampleRate + 'Hz. Up-sampling might produce erratic speech recognition.');
+}
+
+function bufferToStream(buffer) {
+	let stream = new Duplex();
+	stream.push(buffer);
+	stream.push(null);
+	return stream;
+}
+
+let audioStream = new MemoryStream();
+bufferToStream(buffer).
+pipe(Sox({
+	global: {
+		'no-dither': true,
+	},
+	output: {
+		bits: 16,
+		rate: desiredSampleRate,
+		channels: 1,
+		encoding: 'signed-integer',
+		endian: 'little',
+		compression: 0.0,
+		type: 'raw'
+	}
+})).
+pipe(audioStream);
+
+audioStream.on('finish', () => {
+	let audioBuffer = audioStream.toBuffer();
+	
+	const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate);
+	console.log('audio length', audioLength);
+	
+	let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2));
+	
+	console.log('result:', result);
+});
--- a/nodejs_wav/package.json
+++ b/nodejs_wav/package.json
@ -0,0 +1,17 @@
+{
+  "name": "deepspeech-nodejs_wav",
+  "version": "1.0.0",
+  "description": "Simple audio processing",
+  "main": "index.js",
+  "scripts": {
+    "start": "node ./index.js"
+  },
+  "dependencies": {
+    "argparse": "^1.0.10",
+    "deepspeech": "0.6.0",
+    "node-wav": "0.0.2",
+    "sox-stream": "^2.0.3",
+    "util": "^0.11.1"
+  },
+  "license": "Public domain"
+}
--- a/nodejs_wav/test.sh
+++ b/nodejs_wav/test.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -xe
+
+THIS=$(dirname "$0")
+
+pushd ${THIS}
+  source ../tests.sh
+
+  npm install $(get_npm_package_url)
+  npm install
+
+  ln -s $HOME/DeepSpeech/models models
+
+  node index.js $HOME/DeepSpeech/audio/2830-3980-0043.wav
+  node index.js $HOME/DeepSpeech/audio/8455-210777-0068.wav
+  node index.js $HOME/DeepSpeech/audio/4507-16021-0012.wav
+popd
--- a/tests.sh
+++ b/tests.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -xe
+
+THIS=$(dirname "$0")
+
+source ../../taskcluster/tc-tests-utils.sh
+
+DEP_TASK_ID=$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')
+
+get_python_wheel_url()
+{
+  local this_python_version=$1
+
+  extract_python_versions "${this_python_version}" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias"
+
+  echo "$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}" "deepspeech" https://community-tc.services.mozilla.com/api/queue/v1/task/${DEP_TASK_ID}/artifacts/public)"
+}
+
+get_npm_package_url()
+{
+  echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${DEP_TASK_ID}/artifacts/public/deepspeech-${DS_VERSION}.tgz"
+}
--- a/vad_transcriber/audioTranscript_cmd.py
+++ b/vad_transcriber/audioTranscript_cmd.py
@ -0,0 +1,92 @@
+import sys
+import os
+import logging
+import argparse
+import subprocess
+import shlex
+import numpy as np
+import wavTranscriber
+
+# Debug helpers
+logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
+
+
+def main(args):
+    parser = argparse.ArgumentParser(description='Transcribe long audio files using webRTC VAD or use the streaming interface')
+    parser.add_argument('--aggressive', type=int, choices=range(4), required=False,
+                        help='Determines how aggressive filtering out non-speech is. (Interger between 0-3)')
+    parser.add_argument('--audio', required=False,
+                        help='Path to the audio file to run (WAV format)')
+    parser.add_argument('--model', required=True,
+                        help='Path to directory that contains all model files (output_graph, lm and trie)')
+    parser.add_argument('--stream', required=False, action='store_true',
+                        help='To use deepspeech streaming interface')
+    args = parser.parse_args()
+    if args.stream is True:
+        print("Opening mic for streaming")
+    elif args.audio is not None:
+        logging.debug("Transcribing audio file @ %s" % args.audio)
+    else:
+        parser.print_help()
+        parser.exit()
+
+    # Point to a path containing the pre-trained models & resolve ~ if used
+    dirName = os.path.expanduser(args.model)
+
+    # Resolve all the paths of model files
+    output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
+
+    # Load output_graph, alpahbet, lm and trie
+    model_retval = wavTranscriber.load_model(output_graph, lm, trie)
+
+    if args.audio is not None:
+        title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
+        print("\n%-30s %-20s %-20s %-20s %s" % (title_names[0], title_names[1], title_names[2], title_names[3], title_names[4]))
+
+        inference_time = 0.0
+
+        # Run VAD on the input file
+        waveFile = args.audio
+        segments, sample_rate, audio_length = wavTranscriber.vad_segment_generator(waveFile, args.aggressive)
+        f = open(waveFile.rstrip(".wav") + ".txt", 'w')
+        logging.debug("Saving Transcript @: %s" % waveFile.rstrip(".wav") + ".txt")
+
+        for i, segment in enumerate(segments):
+            # Run deepspeech on the chunk that just completed VAD
+            logging.debug("Processing chunk %002d" % (i,))
+            audio = np.frombuffer(segment, dtype=np.int16)
+            output = wavTranscriber.stt(model_retval[0], audio, sample_rate)
+            inference_time += output[1]
+            logging.debug("Transcript: %s" % output[0])
+
+            f.write(output[0] + " ")
+
+        # Summary of the files processed
+        f.close()
+
+        # Extract filename from the full file path
+        filename, ext = os.path.split(os.path.basename(waveFile))
+        logging.debug("************************************************************************************************************")
+        logging.debug("%-30s %-20s %-20s %-20s %s" % (title_names[0], title_names[1], title_names[2], title_names[3], title_names[4]))
+        logging.debug("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, model_retval[1], model_retval[2]))
+        logging.debug("************************************************************************************************************")
+        print("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, model_retval[1], model_retval[2]))
+    else:
+        sctx = model_retval[0].createStream()
+        subproc = subprocess.Popen(shlex.split('rec -q -V0 -e signed -L -c 1 -b 16 -r 16k -t raw - gain -2'),
+                                   stdout=subprocess.PIPE,
+                                   bufsize=0)
+        print('You can start speaking now. Press Control-C to stop recording.')
+
+        try:
+            while True:
+                data = subproc.stdout.read(512)
+                model_retval[0].feedAudioContent(sctx, np.frombuffer(data, np.int16))
+        except KeyboardInterrupt:
+            print('Transcription: ', model_retval[0].finishStream(sctx))
+            subproc.terminate()
+            subproc.wait()
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
--- a/vad_transcriber/audioTranscript_gui.py
+++ b/vad_transcriber/audioTranscript_gui.py
@ -0,0 +1,388 @@
+import sys
+import os
+import time
+import logging
+import traceback
+import numpy as np
+import wavTranscriber
+from PyQt5.QtWidgets import *
+from PyQt5.QtGui import *
+from PyQt5.QtCore import *
+import shlex
+import subprocess
+
+# Debug helpers
+logging.basicConfig(stream=sys.stderr,
+                    level=logging.DEBUG,
+                    format='%(filename)s - %(funcName)s@%(lineno)d %(name)s:%(levelname)s  %(message)s')
+
+
+class WorkerSignals(QObject):
+    '''
+    Defines the signals available from a running worker thread.
+    Supported signals are:
+
+    finished:
+        No data
+
+    error
+       'tuple' (ecxtype, value, traceback.format_exc())
+
+    result
+        'object' data returned from processing, anything
+
+    progress
+            'object' indicating the transcribed result
+    '''
+
+    finished = pyqtSignal()
+    error = pyqtSignal(tuple)
+    result = pyqtSignal(object)
+    progress = pyqtSignal(object)
+
+
+class Worker(QRunnable):
+    '''
+    Worker Thread
+
+    Inherits from QRunnable to handle worker thread setup, signals and wrap-up
+
+    @param callback:
+    The funtion callback to run on this worker thread.
+    Supplied args and kwargs will be passed through the runner.
+    @type calllback: function
+    @param args: Arguments to pass to the callback function
+    @param kwargs: Keywords to pass to the callback function
+    '''
+
+    def __init__(self, fn, *args, **kwargs):
+        super(Worker, self).__init__()
+
+        # Store the conctructor arguments (re-used for processing)
+        self.fn = fn
+        self.args = args
+        self.kwargs = kwargs
+        self.signals = WorkerSignals()
+
+        # Add the callback to our kwargs
+        self.kwargs['progress_callback'] = self.signals.progress
+
+    @pyqtSlot()
+    def run(self):
+        '''
+        Initialise the runner function with the passed args, kwargs
+        '''
+
+        # Retrieve args/kwargs here; and fire up the processing using them
+        try:
+            transcript = self.fn(*self.args, **self.kwargs)
+        except:
+            traceback.print_exc()
+            exctype, value = sys.exc_info()[:2]
+            self.signals.error.emit((exctype, value, traceback.format_exc()))
+        else:
+            # Return the result of the processing
+            self.signals.result.emit(transcript)
+        finally:
+            # Done
+            self.signals.finished.emit()
+
+
+class App(QMainWindow):
+    dirName = ""
+
+    def __init__(self):
+        super().__init__()
+        self.title = 'Deepspeech Transcriber'
+        self.left = 10
+        self.top = 10
+        self.width = 480
+        self.height = 400
+        self.initUI()
+
+    def initUI(self):
+        self.setWindowTitle(self.title)
+        self.setGeometry(self.left, self.top, self.width, self.height)
+        layout = QGridLayout()
+        layout.setSpacing(10)
+
+        self.microphone = QRadioButton("Microphone")
+        self.fileUpload = QRadioButton("File Upload")
+        self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian")
+        self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, lm & trie")
+        self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription")
+        self.browseButton = QPushButton('Browse', self)
+        self.browseButton.setToolTip('Select a wav file')
+        self.modelsButton = QPushButton('Browse', self)
+        self.modelsButton.setToolTip('Select deepspeech models folder')
+        self.transcribeWav = QPushButton('Transcribe Wav', self)
+        self.transcribeWav.setToolTip('Start Wav Transcription')
+        self.openMicrophone = QPushButton('Start Speaking', self)
+        self.openMicrophone.setToolTip('Open Microphone')
+
+        layout.addWidget(self.microphone, 0, 1, 1, 2)
+        layout.addWidget(self.fileUpload, 0, 3, 1, 2)
+        layout.addWidget(self.browseBox, 1, 0, 1, 4)
+        layout.addWidget(self.browseButton, 1, 4)
+        layout.addWidget(self.modelsBox, 2, 0, 1, 4)
+        layout.addWidget(self.modelsButton, 2, 4)
+        layout.addWidget(self.transcribeWav, 3, 1, 1, 1)
+        layout.addWidget(self.openMicrophone, 3, 3, 1, 1)
+        layout.addWidget(self.textboxTranscript, 5, 0, -1, 0)
+
+        w = QWidget()
+        w.setLayout(layout)
+
+        self.setCentralWidget(w)
+
+        # Microphone
+        self.microphone.clicked.connect(self.mic_activate)
+
+        # File Upload
+        self.fileUpload.clicked.connect(self.wav_activate)
+
+        # Connect Browse Button to Function on_click
+        self.browseButton.clicked.connect(self.browse_on_click)
+
+        # Connect the Models Button
+        self.modelsButton.clicked.connect(self.models_on_click)
+
+        # Connect Transcription button to threadpool
+        self.transcribeWav.clicked.connect(self.transcriptionStart_on_click)
+
+        # Connect Microphone button to threadpool
+        self.openMicrophone.clicked.connect(self.openMicrophone_on_click)
+        self.openMicrophone.setCheckable(True)
+        self.openMicrophone.toggle()
+
+        self.browseButton.setEnabled(False)
+        self.browseBox.setEnabled(False)
+        self.modelsBox.setEnabled(False)
+        self.modelsButton.setEnabled(False)
+        self.transcribeWav.setEnabled(False)
+        self.openMicrophone.setEnabled(False)
+
+        self.show()
+
+        # Setup Threadpool
+        self.threadpool = QThreadPool()
+        logging.debug("Multithreading with maximum %d threads" % self.threadpool.maxThreadCount())
+
+    @pyqtSlot()
+    def mic_activate(self):
+        logging.debug("Enable streaming widgets")
+        self.en_mic = True
+        self.browseButton.setEnabled(False)
+        self.browseBox.setEnabled(False)
+        self.modelsBox.setEnabled(True)
+        self.modelsButton.setEnabled(True)
+        self.transcribeWav.setEnabled(False)
+        self.openMicrophone.setStyleSheet('QPushButton {background-color: #70cc7c; color: black;}')
+        self.openMicrophone.setEnabled(True)
+
+    @pyqtSlot()
+    def wav_activate(self):
+        logging.debug("Enable wav transcription widgets")
+        self.en_mic = False
+        self.openMicrophone.setStyleSheet('QPushButton {background-color: #f7f7f7; color: black;}')
+        self.openMicrophone.setEnabled(False)
+        self.browseButton.setEnabled(True)
+        self.browseBox.setEnabled(True)
+        self.modelsBox.setEnabled(True)
+        self.modelsButton.setEnabled(True)
+
+    @pyqtSlot()
+    def browse_on_click(self):
+        logging.debug('Browse button clicked')
+        options = QFileDialog.Options()
+        options |= QFileDialog.DontUseNativeDialog
+        self.fileName, _ = QFileDialog.getOpenFileName(self, "Select wav file to be Transcribed", "","All Files (*.wav)")
+        if self.fileName:
+            self.browseBox.setText(self.fileName)
+            self.transcribeWav.setEnabled(True)
+            logging.debug(self.fileName)
+
+    @pyqtSlot()
+    def models_on_click(self):
+        logging.debug('Models Browse Button clicked')
+        self.dirName = QFileDialog.getExistingDirectory(self, "Select deepspeech models directory")
+        if self.dirName:
+            self.modelsBox.setText(self.dirName)
+            logging.debug(self.dirName)
+
+            # Threaded signal passing worker functions
+            worker = Worker(self.modelWorker, self.dirName)
+            worker.signals.result.connect(self.modelResult)
+            worker.signals.finished.connect(self.modelFinish)
+            worker.signals.progress.connect(self.modelProgress)
+
+            # Execute
+            self.threadpool.start(worker)
+        else:
+            logging.critical("*****************************************************")
+            logging.critical("Model path not specified..")
+            logging.critical("*****************************************************")
+            return "Transcription Failed, models path not specified"
+
+    def modelWorker(self, dirName, progress_callback):
+        self.textboxTranscript.setPlainText("Loading Models...")
+        self.openMicrophone.setStyleSheet('QPushButton {background-color: #f7f7f7; color: black;}')
+        self.openMicrophone.setEnabled(False)
+        self.show()
+        time.sleep(1)
+        return dirName
+
+    def modelProgress(self, s):
+        # FixMe: Write code to show progress here
+        pass
+
+    def modelResult(self, dirName):
+        # Fetch and Resolve all the paths of model files
+        output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
+        # Load output_graph, alpahbet, lm and trie
+        self.model = wavTranscriber.load_model(output_graph, lm, trie)
+
+    def modelFinish(self):
+        # self.timer.stop()
+        self.textboxTranscript.setPlainText("Loaded Models, start transcribing")
+        if self.en_mic is True:
+            self.openMicrophone.setStyleSheet('QPushButton {background-color: #70cc7c; color: black;}')
+            self.openMicrophone.setEnabled(True)
+        self.show()
+
+    @pyqtSlot()
+    def transcriptionStart_on_click(self):
+        logging.debug('Transcription Start button clicked')
+
+        # Clear out older data
+        self.textboxTranscript.setPlainText("")
+        self.show()
+
+        # Threaded signal passing worker functions
+        worker = Worker(self.wavWorker, self.fileName)
+        worker.signals.progress.connect(self.progress)
+        worker.signals.result.connect(self.transcription)
+        worker.signals.finished.connect(self.wavFinish)
+
+        # Execute
+        self.threadpool.start(worker)
+
+    @pyqtSlot()
+    def openMicrophone_on_click(self):
+        logging.debug('Preparing to open microphone...')
+
+        # Clear out older data
+        self.textboxTranscript.setPlainText("")
+        self.show()
+
+        # Threaded signal passing worker functions
+        # Prepare env for capturing from microphone and offload work to micWorker worker thread
+        if (not self.openMicrophone.isChecked()):
+            self.openMicrophone.setStyleSheet('QPushButton {background-color: #C60000; color: black;}')
+            self.openMicrophone.setText("Stop")
+            logging.debug("Start Recording pressed")
+            logging.debug("Preparing for transcription...")
+
+            sctx = self.model[0].createStream()
+            subproc = subprocess.Popen(shlex.split('rec -q -V0 -e signed -L -c 1 -b 16 -r 16k -t raw - gain -2'),
+                                       stdout=subprocess.PIPE,
+                                       bufsize=0)
+            self.textboxTranscript.insertPlainText('You can start speaking now\n\n')
+            self.show()
+            logging.debug('You can start speaking now')
+            context = (sctx, subproc, self.model[0])
+
+            # Pass the state to streaming worker
+            worker = Worker(self.micWorker, context)
+            worker.signals.progress.connect(self.progress)
+            worker.signals.result.connect(self.transcription)
+            worker.signals.finished.connect(self.micFinish)
+
+            # Execute
+            self.threadpool.start(worker)
+        else:
+            logging.debug("Stop Recording")
+
+    '''
+    Capture the audio stream from the microphone.
+    The context is prepared by the openMicrophone_on_click()
+    @param Context: Is a tuple containing three objects
+                    1. Speech samples, sctx
+                    2. subprocess handle
+                    3. Deepspeech model object
+    '''
+    def micWorker(self, context, progress_callback):
+        # Deepspeech Streaming will be run from this method
+        logging.debug("Recording from your microphone")
+        while (not self.openMicrophone.isChecked()):
+            data = context[1].stdout.read(512)
+            context[2].feedAudioContent(context[0], np.frombuffer(data, np.int16))
+        else:
+            transcript = context[2].finishStream(context[0])
+            context[1].terminate()
+            context[1].wait()
+            self.show()
+            progress_callback.emit(transcript)
+            return "\n*********************\nTranscription Done..."
+
+    def micFinish(self):
+        self.openMicrophone.setText("Start Speaking")
+        self.openMicrophone.setStyleSheet('QPushButton {background-color: #70cc7c; color: black;}')
+
+    def transcription(self, out):
+        logging.debug("%s" % out)
+        self.textboxTranscript.insertPlainText(out)
+        self.show()
+
+    def wavFinish(self):
+        logging.debug("File processed")
+
+    def progress(self, chunk):
+        logging.debug("Progress: %s" % chunk)
+        self.textboxTranscript.insertPlainText(chunk)
+        self.show()
+
+    def wavWorker(self, waveFile, progress_callback):
+        # Deepspeech will be run from this method
+        logging.debug("Preparing for transcription...")
+        inference_time = 0.0
+
+        # Run VAD on the input file
+        segments, sample_rate, audio_length = wavTranscriber.vad_segment_generator(waveFile, 1)
+        f = open(waveFile.rstrip(".wav") + ".txt", 'w')
+        logging.debug("Saving Transcript @: %s" % waveFile.rstrip(".wav") + ".txt")
+
+        for i, segment in enumerate(segments):
+            # Run deepspeech on the chunk that just completed VAD
+            logging.debug("Processing chunk %002d" % (i,))
+            audio = np.frombuffer(segment, dtype=np.int16)
+            output = wavTranscriber.stt(self.model[0], audio, sample_rate)
+            inference_time += output[1]
+
+            f.write(output[0] + " ")
+            progress_callback.emit(output[0] + " ")
+
+        # Summary of the files processed
+        f.close()
+
+        # Format pretty, extract filename from the full file path
+        filename, ext = os.path.split(os.path.basename(waveFile))
+        title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
+        logging.debug("************************************************************************************************************")
+        logging.debug("%-30s %-20s %-20s %-20s %s" % (title_names[0], title_names[1], title_names[2], title_names[3], title_names[4]))
+        logging.debug("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, self.model[1], self.model[2]))
+        logging.debug("************************************************************************************************************")
+        print("\n%-30s %-20s %-20s %-20s %s" % (title_names[0], title_names[1], title_names[2], title_names[3], title_names[4]))
+        print("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, self.model[1], self.model[2]))
+
+        return "\n*********************\nTranscription Done..."
+
+
+def main(args):
+    app = QApplication(sys.argv)
+    w = App()
+    sys.exit(app.exec_())
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
--- a/vad_transcriber/requirements.txt
+++ b/vad_transcriber/requirements.txt
@ -0,0 +1,3 @@
+deepspeech==0.6.0
+webrtcvad
+pyqt5
--- a/vad_transcriber/test.sh
+++ b/vad_transcriber/test.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -xe
+
+THIS=$(dirname "$0")
+
+pushd ${THIS}
+  source ../tests.sh
+
+  pip install --user $(get_python_wheel_url "$1")
+  pip install --user -r requirements.txt
+
+  python audioTranscript_cmd.py \
+	  --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
+	  --aggressive 0 \
+	  --model $HOME/DeepSpeech/models/
+
+  python audioTranscript_cmd.py \
+	  --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
+	  --aggressive 0 \
+	  --model $HOME/DeepSpeech/models/ \
+	  --stream
+popd
--- a/vad_transcriber/wavSplit.py
+++ b/vad_transcriber/wavSplit.py
@ -0,0 +1,134 @@
+import collections
+import contextlib
+import wave
+
+
+def read_wave(path):
+    """Reads a .wav file.
+
+    Takes the path, and returns (PCM audio data, sample rate).
+    """
+    with contextlib.closing(wave.open(path, 'rb')) as wf:
+        num_channels = wf.getnchannels()
+        assert num_channels == 1
+        sample_width = wf.getsampwidth()
+        assert sample_width == 2
+        sample_rate = wf.getframerate()
+        assert sample_rate in (8000, 16000, 32000)
+        frames = wf.getnframes()
+        pcm_data = wf.readframes(frames)
+        duration = frames / sample_rate
+        return pcm_data, sample_rate, duration
+
+
+def write_wave(path, audio, sample_rate):
+    """Writes a .wav file.
+
+    Takes path, PCM audio data, and sample rate.
+    """
+    with contextlib.closing(wave.open(path, 'wb')) as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes(audio)
+
+
+class Frame(object):
+    """Represents a "frame" of audio data."""
+    def __init__(self, bytes, timestamp, duration):
+        self.bytes = bytes
+        self.timestamp = timestamp
+        self.duration = duration
+
+
+def frame_generator(frame_duration_ms, audio, sample_rate):
+    """Generates audio frames from PCM audio data.
+
+    Takes the desired frame duration in milliseconds, the PCM data, and
+    the sample rate.
+
+    Yields Frames of the requested duration.
+    """
+    n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
+    offset = 0
+    timestamp = 0.0
+    duration = (float(n) / sample_rate) / 2.0
+    while offset + n < len(audio):
+        yield Frame(audio[offset:offset + n], timestamp, duration)
+        timestamp += duration
+        offset += n
+
+
+def vad_collector(sample_rate, frame_duration_ms,
+                  padding_duration_ms, vad, frames):
+    """Filters out non-voiced audio frames.
+
+    Given a webrtcvad.Vad and a source of audio frames, yields only
+    the voiced audio.
+
+    Uses a padded, sliding window algorithm over the audio frames.
+    When more than 90% of the frames in the window are voiced (as
+    reported by the VAD), the collector triggers and begins yielding
+    audio frames. Then the collector waits until 90% of the frames in
+    the window are unvoiced to detrigger.
+
+    The window is padded at the front and back to provide a small
+    amount of silence or the beginnings/endings of speech around the
+    voiced frames.
+
+    Arguments:
+
+    sample_rate - The audio sample rate, in Hz.
+    frame_duration_ms - The frame duration in milliseconds.
+    padding_duration_ms - The amount to pad the window, in milliseconds.
+    vad - An instance of webrtcvad.Vad.
+    frames - a source of audio frames (sequence or generator).
+
+    Returns: A generator that yields PCM audio data.
+    """
+    num_padding_frames = int(padding_duration_ms / frame_duration_ms)
+    # We use a deque for our sliding window/ring buffer.
+    ring_buffer = collections.deque(maxlen=num_padding_frames)
+    # We have two states: TRIGGERED and NOTTRIGGERED. We start in the
+    # NOTTRIGGERED state.
+    triggered = False
+
+    voiced_frames = []
+    for frame in frames:
+        is_speech = vad.is_speech(frame.bytes, sample_rate)
+
+        if not triggered:
+            ring_buffer.append((frame, is_speech))
+            num_voiced = len([f for f, speech in ring_buffer if speech])
+            # If we're NOTTRIGGERED and more than 90% of the frames in
+            # the ring buffer are voiced frames, then enter the
+            # TRIGGERED state.
+            if num_voiced > 0.9 * ring_buffer.maxlen:
+                triggered = True
+                # We want to yield all the audio we see from now until
+                # we are NOTTRIGGERED, but we have to start with the
+                # audio that's already in the ring buffer.
+                for f, s in ring_buffer:
+                    voiced_frames.append(f)
+                ring_buffer.clear()
+        else:
+            # We're in the TRIGGERED state, so collect the audio data
+            # and add it to the ring buffer.
+            voiced_frames.append(frame)
+            ring_buffer.append((frame, is_speech))
+            num_unvoiced = len([f for f, speech in ring_buffer if not speech])
+            # If more than 90% of the frames in the ring buffer are
+            # unvoiced, then enter NOTTRIGGERED and yield whatever
+            # audio we've collected.
+            if num_unvoiced > 0.9 * ring_buffer.maxlen:
+                triggered = False
+                yield b''.join([f.bytes for f in voiced_frames])
+                ring_buffer.clear()
+                voiced_frames = []
+    if triggered:
+        pass
+    # If we have any leftover voiced audio when we run out of input,
+    # yield it.
+    if voiced_frames:
+        yield b''.join([f.bytes for f in voiced_frames])
+
--- a/vad_transcriber/wavTranscriber.py
+++ b/vad_transcriber/wavTranscriber.py
@ -0,0 +1,97 @@
+import glob
+import webrtcvad
+import logging
+import wavSplit
+from deepspeech import Model
+from timeit import default_timer as timer
+
+'''
+Load the pre-trained model into the memory
+@param models: Output Grapgh Protocol Buffer file
+@param lm: Language model file
+@param trie: Trie file
+
+@Retval
+Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
+'''
+def load_model(models, lm, trie):
+    BEAM_WIDTH = 500
+    LM_ALPHA = 0.75
+    LM_BETA = 1.85
+
+    model_load_start = timer()
+    ds = Model(models, BEAM_WIDTH)
+    model_load_end = timer() - model_load_start
+    logging.debug("Loaded model in %0.3fs." % (model_load_end))
+
+    lm_load_start = timer()
+    ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
+    lm_load_end = timer() - lm_load_start
+    logging.debug('Loaded language model in %0.3fs.' % (lm_load_end))
+
+    return [ds, model_load_end, lm_load_end]
+
+'''
+Run Inference on input audio file
+@param ds: Deepspeech object
+@param audio: Input audio for running inference on
+@param fs: Sample rate of the input audio file
+
+@Retval:
+Returns a list [Inference, Inference Time, Audio Length]
+
+'''
+def stt(ds, audio, fs):
+    inference_time = 0.0
+    audio_length = len(audio) * (1 / fs)
+
+    # Run Deepspeech
+    logging.debug('Running inference...')
+    inference_start = timer()
+    output = ds.stt(audio)
+    inference_end = timer() - inference_start
+    inference_time += inference_end
+    logging.debug('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length))
+
+    return [output, inference_time]
+
+'''
+Resolve directory path for the models and fetch each of them.
+@param dirName: Path to the directory containing pre-trained models
+
+@Retval:
+Retunns a tuple containing each of the model files (pb, lm and trie)
+'''
+def resolve_models(dirName):
+    pb = glob.glob(dirName + "/*.pb")[0]
+    logging.debug("Found Model: %s" % pb)
+
+    lm = glob.glob(dirName + "/lm.binary")[0]
+    trie = glob.glob(dirName + "/trie")[0]
+    logging.debug("Found Language Model: %s" % lm)
+    logging.debug("Found Trie: %s" % trie)
+
+    return pb, lm, trie
+
+'''
+Generate VAD segments. Filters out non-voiced audio frames.
+@param waveFile: Input wav file to run VAD on.0
+
+@Retval:
+Returns tuple of
+    segments: a bytearray of multiple smaller audio frames
+              (The longer audio split into mutiple smaller one's)
+    sample_rate: Sample rate of the input audio file
+    audio_length: Duraton of the input audio file
+
+'''
+def vad_segment_generator(wavFile, aggressiveness):
+    logging.debug("Caught the wav file @: %s" % (wavFile))
+    audio, sample_rate, audio_length = wavSplit.read_wave(wavFile)
+    assert sample_rate == 16000, "Only 16000Hz input WAV files are supported for now!"
+    vad = webrtcvad.Vad(int(aggressiveness))
+    frames = wavSplit.frame_generator(30, audio, sample_rate)
+    frames = list(frames)
+    segments = wavSplit.vad_collector(sample_rate, 30, 300, vad, frames)
+
+    return segments, sample_rate, audio_length
--- a/vad_transcriber/wavTranscription.md
+++ b/vad_transcriber/wavTranscription.md
@ -0,0 +1,107 @@
+## Transcribing longer audio clips
+
+The Command and GUI tools perform transcription on long wav files.
+They take in a wav file of any duration, use the WebRTC Voice Activity Detector (VAD)
+to split it into smaller chunks and finally save a consolidated transcript.
+
+### 0. Prerequisites
+#### 0.1 Install requiered packages
+Install the package which contains rec on the machine:
+
+Fedora:
+
+``` sudo dnf install sox ```
+
+Tested on: 29
+
+Ubuntu/Debian
+
+``` sudo apt install sox ```
+
+A list of distributions where the package is available can be found at: https://pkgs.org/download/sox
+
+#### 0.1 Download Deepspeech 
+Either clone from git via git clone, or Download a version from the release page
+
+For the next steps we assume you have extracted the files to ~/Deepspeech
+
+
+#### 0.2 Setup your environment
+
+Ubuntu/Debian:
+
+```
+~/Deepspeech$ sudo apt install virtualenv
+~/Deepspeech$ cd examples/vad_transcriber
+~/Deepspeech/examples/vad_transcriber$ virtualenv -p python3 venv
+~/Deepspeech/examples/vad_transcriber$ source venv/bin/activate
+(venv) ~/Deepspeech/examples/vad_transcriber$ pip3 install -r requirements.txt
+```
+
+Fedora
+
+```
+~/Deepspeech$ sudo dnf install python-virtualenv
+~/Deepspeech$ cd examples/vad_transcriber
+~/Deepspeech/examples/vad_transcriber$ virtualenv -p python3 venv
+~/Deepspeech/examples/vad_transcriber$ source venv/bin/activate
+(venv) ~/Deepspeech/examples/vad_transcriber$ pip3 install -r requirements.txt
+```
+
+Tested on: 29
+
+### 1. Command line tool
+
+The command line tool processes a wav file of any duration and returns a trancript
+which will the saved in the same directory as the input audio file.
+
+The command line tool gives you control over the aggressiveness of the VAD.
+Set the aggressiveness mode, to an integer between 0 and 3.
+0 being the least aggressive about filtering out non-speech, 3 is the most aggressive.
+
+```
+(venv) ~/Deepspeech/examples/vad_transcriber
+$ python3 audioTranscript_cmd.py --aggressive 1 --audio ./audio/guido-van-rossum.wav --model ./models/0.4.1/
+
+
+Filename                       Duration(s)          Inference Time(s)    Model Load Time(s)   LM Load Time(s)
+sample_rec.wav                 13.710               20.797               5.593                17.742
+
+```
+
+**Note:** Only `wav` files with a 16kHz sample rate are supported for now, you can convert your files to the appropriate format with ffmpeg if available on your system.
+
+    ffmpeg -i infile.mp3  -ar 16000 -ac 1  outfile.wav
+
+### 2. Minimalistic GUI
+
+The GUI tool does the same job as the CLI tool. The VAD is fixed at an aggressiveness of 1.
+The output is displayed in the transcription window and saved into the directory as the input
+audio file as well.
+
+```
+(venv) ~/Deepspeech/examples/vad_transcriber
+$ python3 audioTranscript_gui.py
+
+```
+
+![Deepspeech Transcriber](../../doc/audioTranscript.png)
+
+
+#### 2.1. Sporadic failures in pyqt
+Some systems have encountered **_Cannot mix incompatible Qt library with this with this library_** issue.
+In such a scenario, the GUI tool will not work. The following steps is known to have solved the issue in most cases
+```
+(venv) ~/Deepspeech/examples/vad_transcriber$ pip3 uninstall pyqt5
+(venv) ~/Deepspeech/examples/vad_transcriber$ sudo apt install python3-pyqt5 canberra-gtk-module
+(venv) ~/Deepspeech/examples/vad_transcriber$ export PYTHONPATH=/usr/lib/python3/dist-packages/
+(venv) ~/Deepspeech/examples/vad_transcriber$ python3 audioTranscript_gui.py
+
+```
+#### 2.2 Useful Tips
+#####  The GUI programm immediately crashes when you press start recording
+This happens when you don't load the models via the "Browse Models" button, before pressing the "Start recording" button.
+
+#####  What does error XYZ mean?
+You can find a list of error codes and what they mean at https://deepspeech.readthedocs.io/en/latest/Error-Codes.html
+