Adapt to decoder API changes and new scorer packaging

2020-01-20 16:20:22 +01:00 · 2020-01-20 16:20:22 +01:00 · 4b97ac41d0
--- a/ffmpeg_vad_streaming/README.MD
+++ b/ffmpeg_vad_streaming/README.MD
@ -21,41 +21,37 @@ sudo apt-get install ffmpeg
 Here is an example for a local audio file:
 ```bash
 node ./index.js --audio <AUDIO_FILE> \
-                --model $HOME/models/output_graph.pbmm \
+                --model $HOME/models/output_graph.pbmm
 ```

 Here is an example for a remote RTMP-Stream:
 ```bash
 node ./index.js  --audio rtmp://<IP>:1935/live/teststream \
-                 --model $HOME/models/output_graph.pbmm \
+                 --model $HOME/models/output_graph.pbmm
 ```

 ## Examples
 Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)).
 ```bash
 node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
-                --lm $HOME/models/lm.binary \
-                --trie $HOME/models/trie \
-                --model $HOME/models/output_graph.pbmm \
+                --scorer $HOME/models/kenlm.scorer \
+                --model $HOME/models/output_graph.pbmm
 ```
 ```bash
 node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
-                --lm $HOME/models/lm.binary \
-                --trie $HOME/models/trie \
-                --model $HOME/models/output_graph.pbmm \
+                --scorer $HOME/models/kenlm.scorer \
+                --model $HOME/models/output_graph.pbmm
 ```
 ```bash
 node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
-                --lm $HOME/models/lm.binary \
-                --trie $HOME/models/trie \
-                --model $HOME/models/output_graph.pbmm \
+                --scorer $HOME/models/kenlm.scorer \
+                --model $HOME/models/output_graph.pbmm
 ```
 Real time streaming inference in combination with a RTMP server.
 ```bash
 node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
-                --lm $HOME/models/lm.binary \
-                --trie $HOME/models/trie \
-                --model $HOME/models/output_graph.pbmm \
+                --scorer $HOME/models/kenlm.scorer \
+                --model $HOME/models/output_graph.pbmm
 ```

 ## Notes
--- a/ffmpeg_vad_streaming/index.js
+++ b/ffmpeg_vad_streaming/index.js
@ -11,12 +11,6 @@ const { spawn } = require('child_process');
 // Beam width used in the CTC decoder when building candidate transcriptions
 const BEAM_WIDTH = 500;

-// The alpha hyperparameter of the CTC decoder. Language Model weight
-const LM_ALPHA = 0.75;
-
-// The beta hyperparameter of the CTC decoder. Word insertion bonus.
-const LM_BETA = 1.85;
-
 let VersionAction = function VersionAction(options) {
 	options = options || {};
 	options.nargs = 0;
@ -32,8 +26,7 @@ VersionAction.prototype.call = function(parser) {

 let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
 parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
-parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
-parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
+parser.addArgument(['--scorer'], {help: 'Path to the scorer file', nargs: '?'});
 parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
 parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'});
 let args = parser.parseArgs();
@ -48,12 +41,12 @@ let model = new Ds.Model(args['model'], BEAM_WIDTH);
 const model_load_end = process.hrtime(model_load_start);
 console.error('Loaded model in %ds.', totalTime(model_load_end));

-if (args['lm'] && args['trie']) {
-	console.error('Loading language model from files %s %s', args['lm'], args['trie']);
-	const lm_load_start = process.hrtime();
-	model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
-	const lm_load_end = process.hrtime(lm_load_start);
-	console.error('Loaded language model in %ds.', totalTime(lm_load_end));
+if (args['scorer']) {
+	console.error('Loading scorer from file %s', args['scorer']);
+	const scorer_load_start = process.hrtime();
+	model.enableExternalScorer(args['scorer']);
+	const scorer_load_end = process.hrtime(scorer_load_start);
+	console.error('Loaded scorer in %ds.', totalTime(scorer_load_end));
 }

 // Default is 16kHz
@ -99,7 +92,7 @@ let sctx = model.createStream();
 function finishStream() {
 	const model_load_start = process.hrtime();
 	console.error('Running inference.');
-	console.log('Transcription: ', model.finishStream(sctx));
+	console.log('Transcription: ', sctx.finishStream());
 	const model_load_end = process.hrtime(model_load_start);
 	console.error('Inference took %ds for %ds audio file.', totalTime(model_load_end), audioLength.toPrecision(4));
 	audioLength = 0;
@ -112,7 +105,7 @@ function intermediateDecode() {

 function feedAudioContent(chunk) {
 	audioLength += (chunk.length / 2) * ( 1 / AUDIO_SAMPLE_RATE);
-	model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
+	sctx.feedAudioContent(chunk.slice(0, chunk.length / 2));
 }

 function processVad(data) {
--- a/ffmpeg_vad_streaming/test.sh
+++ b/ffmpeg_vad_streaming/test.sh
@ -11,17 +11,14 @@ pushd ${THIS}
  npm install

  node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
-                  --lm $HOME/DeepSpeech/models/lm.binary \
-                  --trie $HOME/DeepSpeech/models/trie \
+                  --scorer $HOME/DeepSpeech/models/kenlm.scorer \
                  --model $HOME/DeepSpeech/models/output_graph.pbmm

  node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
-                  --lm $HOME/DeepSpeech/models/lm.binary \
-                  --trie $HOME/DeepSpeech/models/trie \
+                  --scorer $HOME/DeepSpeech/models/kenlm.scorer \
                  --model $HOME/DeepSpeech/models/output_graph.pbmm

  node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
-                  --lm $HOME/DeepSpeech/models/lm.binary \
-                  --trie $HOME/DeepSpeech/models/trie \
+                  --scorer $HOME/DeepSpeech/models/kenlm.scorer \
                  --model $HOME/DeepSpeech/models/output_graph.pbmm
 popd
--- a/mic_vad_streaming/README.rst
+++ b/mic_vad_streaming/README.rst
@ -29,9 +29,8 @@ Usage
 .. code-block::

   usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner]
-                               [-w SAVEWAV] -m MODEL [-l LM]
-                               [-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT]
-                               [-la LM_ALPHA] [-lb LM_BETA]
+                               [-w SAVEWAV] -m MODEL [-s SCORER]
+                               [-nf N_FEATURES] [-nc N_CONTEXT]
                               [-bw BEAM_WIDTH]

   Stream from microphone to DeepSpeech using VAD
@ -49,21 +48,13 @@ Usage
                           Path to the model (protocol buffer binary file, or
                           entire directory containing all standard-named files
                           for model)
-     -l LM, --lm LM        Path to the language model binary file. Default:
-                           lm.binary
-     -t TRIE, --trie TRIE  Path to the language model trie file created with
-                           native_client/generate_trie. Default: trie
+     -s SCORER, --scorer SCORER
+                           Path to the external scorer file. Default: kenlm.scorer
     -nf N_FEATURES, --n_features N_FEATURES
                           Number of MFCC features to use. Default: 26
     -nc N_CONTEXT, --n_context N_CONTEXT
                           Size of the context window used for producing
                           timesteps in the input vector. Default: 9
-     -la LM_ALPHA, --lm_alpha LM_ALPHA
-                           The alpha hyperparameter of the CTC decoder. Language
-                           Model weight. Default: 0.75
-     -lb LM_BETA, --lm_beta LM_BETA
-                           The beta hyperparameter of the CTC decoder. Word insertion
-                           bonus. Default: 1.85
     -bw BEAM_WIDTH, --beam_width BEAM_WIDTH
                           Beam width used in the CTC decoder when building
                           candidate transcriptions. Default: 500
--- a/mic_vad_streaming/mic_vad_streaming.py
+++ b/mic_vad_streaming/mic_vad_streaming.py
@ -156,16 +156,14 @@ def main(ARGS):
    if os.path.isdir(ARGS.model):
        model_dir = ARGS.model
        ARGS.model = os.path.join(model_dir, 'output_graph.pb')
-        ARGS.lm = os.path.join(model_dir, ARGS.lm)
-        ARGS.trie = os.path.join(model_dir, ARGS.trie)
+        ARGS.scorer = os.path.join(model_dir, ARGS.scorer)

    print('Initializing model...')
    logging.info("ARGS.model: %s", ARGS.model)
    model = deepspeech.Model(ARGS.model, ARGS.beam_width)
-    if ARGS.lm and ARGS.trie:
-        logging.info("ARGS.lm: %s", ARGS.lm)
-        logging.info("ARGS.trie: %s", ARGS.trie)
-        model.enableDecoderWithLM(ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta)
+    if ARGS.scorer:
+        logging.info("ARGS.scorer: %s", ARGS.scorer)
+        model.enableExternalScorer(ARGS.scorer)

    # Start audio with VAD
    vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
@ -185,7 +183,7 @@ def main(ARGS):
        if frame is not None:
            if spinner: spinner.start()
            logging.debug("streaming frame")
-            model.feedAudioContent(stream_context, np.frombuffer(frame, np.int16))
+            stream_context.feedAudioContent(np.frombuffer(frame, np.int16))
            if ARGS.savewav: wav_data.extend(frame)
        else:
            if spinner: spinner.stop()
@ -193,15 +191,13 @@ def main(ARGS):
            if ARGS.savewav:
                vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data)
                wav_data = bytearray()
-            text = model.finishStream(stream_context)
+            text = stream_context.finishStream()
            print("Recognized: %s" % text)
            stream_context = model.createStream()

 if __name__ == '__main__':
    BEAM_WIDTH = 500
    DEFAULT_SAMPLE_RATE = 16000
-    LM_ALPHA = 0.75
-    LM_BETA = 1.85

    import argparse
    parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD")
@ -217,18 +213,12 @@ if __name__ == '__main__':

    parser.add_argument('-m', '--model', required=True,
                        help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
-    parser.add_argument('-l', '--lm', default='lm.binary',
-                        help="Path to the language model binary file. Default: lm.binary")
-    parser.add_argument('-t', '--trie', default='trie',
-                        help="Path to the language model trie file created with native_client/generate_trie. Default: trie")
+    parser.add_argument('-s', '--scorer', default='kenlm.scorer',
+                        help="Path to the external scorer file. Default: kenlm.scorer")
    parser.add_argument('-d', '--device', type=int, default=None,
                        help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device().")
    parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
                        help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
-    parser.add_argument('-la', '--lm_alpha', type=float, default=LM_ALPHA,
-                        help=f"The alpha hyperparameter of the CTC decoder. Language Model weight. Default: {LM_ALPHA}")
-    parser.add_argument('-lb', '--lm_beta', type=float, default=LM_BETA,
-                        help=f"The beta hyperparameter of the CTC decoder. Word insertion bonus. Default: {LM_BETA}")
    parser.add_argument('-bw', '--beam_width', type=int, default=BEAM_WIDTH,
                        help=f"Beam width used in the CTC decoder when building candidate transcriptions. Default: {BEAM_WIDTH}")

--- a/mic_vad_streaming/test.sh
+++ b/mic_vad_streaming/test.sh
@ -8,13 +8,12 @@ pushd ${THIS}
  source ../tests.sh

  pip install --user $(get_python_wheel_url "$1")
-  pip install --user -r requirements.txt
+  pip install --user -r <(grep -v deepspeech requirements.txt)

  pulseaudio &

  python mic_vad_streaming.py \
 	  --model $HOME/DeepSpeech/models/output_graph.pbmm \
-	  --lm $HOME/DeepSpeech/models/lm.binary \
-	  --trie $HOME/DeepSpeech/models/trie \
+	  --scorer $HOME/DeepSpeech/models/kenlm.scorer \
 	  --file $HOME/DeepSpeech/audio/2830-3980-0043.wav
 popd
--- a/net_framework/DeepSpeechWPF/MainWindow.xaml
+++ b/net_framework/DeepSpeechWPF/MainWindow.xaml
@ -52,8 +52,8 @@
            Margin="95,69,0,0"
            HorizontalAlignment="Left"
            VerticalAlignment="Top"
-            Command="{Binding EnableLanguageModelCommand}"
-            Content="Enable LM" />
+            Command="{Binding EnableExternalScorerCommand}"
+            Content="Enable external scorer" />
        <Button
            Width="75"
            Height="25"
--- a/net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs
+++ b/net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs
@ -24,17 +24,16 @@ namespace DeepSpeech.WPF.ViewModels
    {
        #region Constants
        private const int SampleRate = 16000;
-        private const string LMPath = "lm.binary";
-        private const string TriePath = "trie";
+        private const string ScorerPath = "kenlm.scorer";
        #endregion

        private readonly IDeepSpeech _sttClient;

        #region Commands
        /// <summary>
-        /// Gets or sets the command that enables the language model.
+        /// Gets or sets the command that enables the external scorer.
        /// </summary>
-        public IAsyncCommand EnableLanguageModelCommand { get; private set; }
+        public IAsyncCommand EnableExternalScorerCommand { get; private set; }

        /// <summary>
        /// Gets or sets the command that runs inference using an audio file.
@ -146,15 +145,15 @@ namespace DeepSpeech.WPF.ViewModels
            set => SetProperty(ref _statusMessage, value);
        }

-        private bool _languageModelEnabled;
+        private bool _externalScorerEnabled;
        /// <summary>
-        /// Gets or sets the language model status.
+        /// Gets or sets the external scorer status.
        /// </summary>
-        private bool LanguageModelEnabled
+        private bool ExternalScorerEnabled
        {
-            get => _languageModelEnabled;
-            set => SetProperty(ref _languageModelEnabled, value,
-                    onChanged: () => ((AsyncCommand)EnableLanguageModelCommand).RaiseCanExecuteChanged());
+            get => _externalScorerEnabled;
+            set => SetProperty(ref _externalScorerEnabled, value,
+                    onChanged: () => ((AsyncCommand)EnableExternalScorerCommand).RaiseCanExecuteChanged());
        }

        private bool _isRunningInference;
@ -205,8 +204,8 @@ namespace DeepSpeech.WPF.ViewModels
        {
            _sttClient = sttClient;

-            EnableLanguageModelCommand = new AsyncCommand(()=>EnableLanguageModelAsync(LMPath,TriePath),
-                _ => !LanguageModelEnabled);
+            EnableExternalScorerCommand = new AsyncCommand(()=>EnableExternalScorerAsync(ScorerPath),
+                _ => !ExternalScorerEnabled);

            InferenceFromFileCommand = new AsyncCommand(ExecuteInferenceFromFileAsync,
                _ => !IsRunningInference);
@ -322,21 +321,18 @@ namespace DeepSpeech.WPF.ViewModels
        }
       
        /// <summary>
-        /// Enables the language model.
+        /// Enables the external scorer.
        /// </summary>
-        /// <param name="lmPath">Language model path.</param>
-        /// <param name="triePath">Trie path.</param>
+        /// <param name="scorerPath">External scorer path.</param>
        /// <returns>A Task to await.</returns>
-        public async Task EnableLanguageModelAsync(string lmPath, string triePath)
+        public async Task EnableExternalScorerAsync(string scorerPath)
        {
            try
            {
-                StatusMessage = "Loading language model...";
-                const float LM_ALPHA = 0.75f;
-                const float LM_BETA = 1.85f;
-                await Task.Run(() => _sttClient.EnableDecoderWithLM(LMPath, TriePath, LM_ALPHA, LM_BETA));
-                LanguageModelEnabled = true;
-                StatusMessage = "Language model loaded.";
+                StatusMessage = "Loading external scorer...";
+                await Task.Run(() => _sttClient.EnableExternalScorer(ScorerPath));
+                ExternalScorerEnabled = true;
+                StatusMessage = "External scorer loaded.";
            }
            catch (Exception ex)
            {
--- a/nodejs_wav/Readme.md
+++ b/nodejs_wav/Readme.md
@ -11,8 +11,7 @@ Edit references to models path if necessary:

 ```
 let modelPath = './models/output_graph.pbmm';
-let lmPath = './models/lm.binary';
-let triePath = './models/trie';
+let scorerPath = './models/kenlm.scorer';
 ```

 Install Sox (for .wav file loading):
--- a/nodejs_wav/index.js
+++ b/nodejs_wav/index.js
@ -12,12 +12,9 @@ let model = new DeepSpeech.Model(modelPath, BEAM_WIDTH);

 let desiredSampleRate = model.sampleRate();

-const LM_ALPHA = 0.75;
-const LM_BETA = 1.85;
-let lmPath = './models/lm.binary';
-let triePath = './models/trie';
+let scorerPath = './models/kenlm.scorer';

-model.enableDecoderWithLM(lmPath, triePath, LM_ALPHA, LM_BETA);
+model.enableExternalScorer(scorerPath);

 let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';

--- a/vad_transcriber/audioTranscript_cmd.py
+++ b/vad_transcriber/audioTranscript_cmd.py
@ -18,7 +18,7 @@ def main(args):
    parser.add_argument('--audio', required=False,
                        help='Path to the audio file to run (WAV format)')
    parser.add_argument('--model', required=True,
-                        help='Path to directory that contains all model files (output_graph, lm and trie)')
+                        help='Path to directory that contains all model files (output_graph and scorer)')
    parser.add_argument('--stream', required=False, action='store_true',
                        help='To use deepspeech streaming interface')
    args = parser.parse_args()
@ -34,13 +34,13 @@ def main(args):
    dirName = os.path.expanduser(args.model)

    # Resolve all the paths of model files
-    output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
+    output_graph, scorer = wavTranscriber.resolve_models(dirName)

-    # Load output_graph, alpahbet, lm and trie
-    model_retval = wavTranscriber.load_model(output_graph, lm, trie)
+    # Load output_graph, alpahbet and scorer
+    model_retval = wavTranscriber.load_model(output_graph, scorer)

    if args.audio is not None:
-        title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
+        title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'Scorer Load Time(s)']
        print("\n%-30s %-20s %-20s %-20s %s" % (title_names[0], title_names[1], title_names[2], title_names[3], title_names[4]))

        inference_time = 0.0
@ -81,9 +81,9 @@ def main(args):
        try:
            while True:
                data = subproc.stdout.read(512)
-                model_retval[0].feedAudioContent(sctx, np.frombuffer(data, np.int16))
+                sctx.feedAudioContent(np.frombuffer(data, np.int16))
        except KeyboardInterrupt:
-            print('Transcription: ', model_retval[0].finishStream(sctx))
+            print('Transcription: ', sctx.finishStream())
            subproc.terminate()
            subproc.wait()

--- a/vad_transcriber/audioTranscript_gui.py
+++ b/vad_transcriber/audioTranscript_gui.py
@ -109,7 +109,7 @@ class App(QMainWindow):
        self.microphone = QRadioButton("Microphone")
        self.fileUpload = QRadioButton("File Upload")
        self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian")
-        self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, lm & trie")
+        self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph and scorer")
        self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription")
        self.browseButton = QPushButton('Browse', self)
        self.browseButton.setToolTip('Select a wav file')
@ -238,9 +238,9 @@ class App(QMainWindow):

    def modelResult(self, dirName):
        # Fetch and Resolve all the paths of model files
-        output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
-        # Load output_graph, alpahbet, lm and trie
-        self.model = wavTranscriber.load_model(output_graph, lm, trie)
+        output_graph, scorer = wavTranscriber.resolve_models(dirName)
+        # Load output_graph, alphabet and scorer
+        self.model = wavTranscriber.load_model(output_graph, scorer)

    def modelFinish(self):
        # self.timer.stop()
@ -316,9 +316,9 @@ class App(QMainWindow):
        logging.debug("Recording from your microphone")
        while (not self.openMicrophone.isChecked()):
            data = context[1].stdout.read(512)
-            context[2].feedAudioContent(context[0], np.frombuffer(data, np.int16))
+            context[0].feedAudioContent(np.frombuffer(data, np.int16))
        else:
-            transcript = context[2].finishStream(context[0])
+            transcript = context[0].finishStream()
            context[1].terminate()
            context[1].wait()
            self.show()
@ -367,7 +367,7 @@ class App(QMainWindow):

        # Format pretty, extract filename from the full file path
        filename, ext = os.path.split(os.path.basename(waveFile))
-        title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
+        title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'Scorer Load Time(s)']
        logging.debug("************************************************************************************************************")
        logging.debug("%-30s %-20s %-20s %-20s %s" % (title_names[0], title_names[1], title_names[2], title_names[3], title_names[4]))
        logging.debug("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, self.model[1], self.model[2]))
--- a/vad_transcriber/test.sh
+++ b/vad_transcriber/test.sh
@ -8,7 +8,7 @@ pushd ${THIS}
  source ../tests.sh

  pip install --user $(get_python_wheel_url "$1")
-  pip install --user -r requirements.txt
+  pip install --user -r <(grep -v deepspeech requirements.txt)

  python audioTranscript_cmd.py \
 	  --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
--- a/vad_transcriber/wavTranscriber.py
+++ b/vad_transcriber/wavTranscriber.py
@ -8,28 +8,25 @@ from timeit import default_timer as timer
 '''
 Load the pre-trained model into the memory
@param models: Output Grapgh Protocol Buffer file
-@param lm: Language model file
-@param trie: Trie file
+@param scorer: Scorer file

@Retval
-Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
+Returns a list [DeepSpeech Object, Model Load Time, Scorer Load Time]
 '''
-def load_model(models, lm, trie):
+def load_model(models, scorer):
    BEAM_WIDTH = 500
-    LM_ALPHA = 0.75
-    LM_BETA = 1.85

    model_load_start = timer()
    ds = Model(models, BEAM_WIDTH)
    model_load_end = timer() - model_load_start
    logging.debug("Loaded model in %0.3fs." % (model_load_end))

-    lm_load_start = timer()
-    ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
-    lm_load_end = timer() - lm_load_start
-    logging.debug('Loaded language model in %0.3fs.' % (lm_load_end))
+    scorer_load_start = timer()
+    ds.enableExternalScorer(scorer)
+    scorer_load_end = timer() - scorer_load_start
+    logging.debug('Loaded external scorer in %0.3fs.' % (scorer_load_end))

-    return [ds, model_load_end, lm_load_end]
+    return [ds, model_load_end, scorer_load_end]

 '''
 Run Inference on input audio file
@ -60,18 +57,16 @@ Resolve directory path for the models and fetch each of them.
@param dirName: Path to the directory containing pre-trained models

@Retval:
-Retunns a tuple containing each of the model files (pb, lm and trie)
+Retunns a tuple containing each of the model files (pb, scorer)
 '''
 def resolve_models(dirName):
    pb = glob.glob(dirName + "/*.pb")[0]
    logging.debug("Found Model: %s" % pb)

-    lm = glob.glob(dirName + "/lm.binary")[0]
-    trie = glob.glob(dirName + "/trie")[0]
-    logging.debug("Found Language Model: %s" % lm)
-    logging.debug("Found Trie: %s" % trie)
+    scorer = glob.glob(dirName + "/kenlm.scorer")[0]
+    logging.debug("Found scorer: %s" % scorer)

-    return pb, lm, trie
+    return pb, scorer

 '''
 Generate VAD segments. Filters out non-voiced audio frames.
--- a/vad_transcriber/wavTranscription.md
+++ b/vad_transcriber/wavTranscription.md
@ -23,7 +23,7 @@ A list of distributions where the package is available can be found at: https://
 #### 0.1 Download Deepspeech 
 Either clone from git via git clone, or Download a version from the release page

-For the next steps we assume you have extracted the files to ~/Deepspeech
+For the next steps we assume you have extracted the files to `~/Deepspeech`


 #### 0.2 Setup your environment
@ -64,7 +64,7 @@ Set the aggressiveness mode, to an integer between 0 and 3.
 $ python3 audioTranscript_cmd.py --aggressive 1 --audio ./audio/guido-van-rossum.wav --model ./models/0.4.1/


-Filename                       Duration(s)          Inference Time(s)    Model Load Time(s)   LM Load Time(s)
+Filename                       Duration(s)          Inference Time(s)    Model Load Time(s)   Scorer Load Time(s)
 sample_rec.wav                 13.710               20.797               5.593                17.742

 ```