WIP: Adding DeepSpeech w/ TFLite

2018-11-12 15:44:49 +01:00 · 2018-11-12 15:44:49 +01:00 · 77665e35b7
--- a/app/build.gradle
+++ b/app/build.gradle
@ -9,6 +9,9 @@ android {
        versionCode 1
        versionName "1.0"
        testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
+        ndk {
+            abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86_64'
+        }
    }
    buildTypes {
        release {
@ -20,6 +23,10 @@ android {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
+
+    lintOptions {
+        abortOnError false
+    }
 }

 dependencies {
@ -28,6 +35,7 @@ dependencies {
    implementation 'com.android.support:appcompat-v7:27.1.1'
    implementation 'com.android.support.constraint:constraint-layout:1.1.2'
    implementation 'com.jjoe64:graphview:4.2.2'
+    implementation 'net.lingala.zip4j:zip4j:1.3.2'
    testImplementation 'junit:junit:4.12'
    androidTestImplementation 'com.android.support.test:runner:1.0.2'
    androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
--- a/app/src/main/java/com/mozilla/speechapp/MainActivity.java
+++ b/app/src/main/java/com/mozilla/speechapp/MainActivity.java
@ -1,32 +1,58 @@
 package com.mozilla.speechapp;

 import android.Manifest;
+
 import android.app.Activity;
+import android.app.DownloadManager;
+
 import android.content.pm.PackageManager;
+import android.content.BroadcastReceiver;
+import android.content.Context;
+import android.content.Intent;
+import android.content.IntentFilter;
+
+import android.database.Cursor;
+
 import android.support.annotation.NonNull;
 import android.support.v4.app.ActivityCompat;
 import android.support.v7.app.AppCompatActivity;
+
+import android.net.Uri;
+
 import android.os.Bundle;
+import android.os.AsyncTask;
+
 import android.util.Log;
+
 import android.view.View;
 import android.view.WindowManager;
+
 import android.widget.Button;
 import android.widget.CompoundButton;
 import android.widget.EditText;
 import android.widget.Switch;
+import android.widget.Toast;

 import com.jjoe64.graphview.GraphView;
 import com.jjoe64.graphview.series.DataPoint;
 import com.jjoe64.graphview.series.LineGraphSeries;
+
 import com.mozilla.speechlibrary.ISpeechRecognitionListener;
 import com.mozilla.speechlibrary.MozillaSpeechService;
 import com.mozilla.speechlibrary.STTResult;
 import com.mozilla.speechmodule.R;

+import java.io.File;
+
+import net.lingala.zip4j.core.ZipFile;
+
 import static android.support.constraint.Constraints.TAG;

 public class MainActivity extends AppCompatActivity implements ISpeechRecognitionListener, CompoundButton.OnCheckedChangeListener {

+    private static long sDownloadId;
+    private static DownloadManager sDownloadManager;
+
    private MozillaSpeechService mMozillaSpeechService;
    private GraphView mGraph;
    private long mDtstart;
@ -47,6 +73,7 @@ public class MainActivity extends AppCompatActivity implements ISpeechRecognitio
        EditText txtProdutTag, txtLanguage;
        Switch switchTranscriptions = findViewById(R.id.switchTranscriptions);
        Switch switchSamples = findViewById(R.id.switchSamples);
+        Switch useDeepSpeech = findViewById(R.id.useDeepSpeech);

        if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
                != PackageManager.PERMISSION_GRANTED) {
@ -75,7 +102,12 @@ public class MainActivity extends AppCompatActivity implements ISpeechRecognitio
                mSeries1.resetData(new DataPoint[0]);
                mMozillaSpeechService.setLanguage(txtLanguage.getText().toString());
                mMozillaSpeechService.setProductTag(txtProdutTag.getText().toString());
-                mMozillaSpeechService.start(getApplicationContext());
+                mMozillaSpeechService.setModelPath(getExternalFilesDir("models").getAbsolutePath());
+                if (mMozillaSpeechService.ensureModelInstalled()) {
+                    mMozillaSpeechService.start(getApplicationContext());
+                } else {
+                    maybeDownloadOrExtractModel(getExternalFilesDir("models").getAbsolutePath(), mMozillaSpeechService.getLanguageDir());
+                }
            } catch (Exception e) {
                Log.d(TAG, e.getLocalizedMessage());
                e.printStackTrace();
@ -93,8 +125,10 @@ public class MainActivity extends AppCompatActivity implements ISpeechRecognitio

        switchTranscriptions.setOnCheckedChangeListener(this);
        switchSamples.setOnCheckedChangeListener(this);
+        useDeepSpeech.setOnCheckedChangeListener(this);
        switchTranscriptions.toggle();
        switchSamples.toggle();
+        useDeepSpeech.toggle();

        mGraph = findViewById(R.id.graph);
        mSeries1 = new LineGraphSeries<>(new DataPoint[0]);
@ -154,8 +188,89 @@ public class MainActivity extends AppCompatActivity implements ISpeechRecognitio
    public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
        if (buttonView.equals(findViewById(R.id.switchTranscriptions))) {
            mMozillaSpeechService.storeTranscriptions(isChecked);
-        } else {
+        } else if (buttonView.equals(findViewById(R.id.switchSamples))) {
            mMozillaSpeechService.storeSamples(isChecked);
+        } else if (buttonView.equals(findViewById(R.id.useDeepSpeech))) {
+            mMozillaSpeechService.useDeepSpeech(isChecked);
        }
    }
+
+    private class AsyncUnzip extends AsyncTask<String, Void, Boolean> {
+
+        @Override
+        protected void onPreExecute() {
+            Toast noModel = Toast.makeText(getApplicationContext(), "Extracting downloaded model", Toast.LENGTH_LONG);
+            mPlain_text_input.append("Extracting downloaded model\n");
+            noModel.show();
+        }
+
+        @Override
+        protected Boolean doInBackground(String...params) {
+            String aZipFile = params[0], aRootModelsPath = params[1];
+            try {
+                ZipFile zf = new ZipFile(aZipFile);
+                zf.extractAll(aRootModelsPath);
+            } catch (Exception e) {
+                Log.d(TAG, e.getLocalizedMessage());
+                e.printStackTrace();
+            }
+
+            return (new File(aZipFile)).delete();
+        }
+
+        @Override
+        protected void onPostExecute(Boolean result) {
+            Button buttonStart = findViewById(R.id.button_start), buttonCancel = findViewById(R.id.button_cancel);
+            mMozillaSpeechService.start(getApplicationContext());
+            buttonStart.setEnabled(true);
+            buttonCancel.setEnabled(true);
+        }
+
+    }
+
+    public void maybeDownloadOrExtractModel(String aModelsPath, String aLang) {
+        String zipFile   = aModelsPath + "/" + aLang + ".zip";
+        Uri modelZipURL  = Uri.parse(mMozillaSpeechService.getModelDownloadURL());
+        Uri modelZipFile = Uri.parse("file://" + zipFile);
+
+        Button buttonStart = findViewById(R.id.button_start), buttonCancel = findViewById(R.id.button_cancel);
+        buttonStart.setEnabled(false);
+        buttonCancel.setEnabled(false);
+
+        BroadcastReceiver receiver = new BroadcastReceiver() {
+            @Override
+            public void onReceive(Context context, Intent intent) {
+                String action = intent.getAction();
+                if (DownloadManager.ACTION_DOWNLOAD_COMPLETE.equals(action)) {
+                    long downloadId = intent.getLongExtra(DownloadManager.EXTRA_DOWNLOAD_ID, 0);
+                    DownloadManager.Query query = new DownloadManager.Query();
+                    query.setFilterById(downloadId);
+                    Cursor c = sDownloadManager.query(query);
+                    if (c.moveToFirst()) {
+                        int columnIndex = c.getColumnIndex(DownloadManager.COLUMN_STATUS);
+                        if (DownloadManager.STATUS_SUCCESSFUL == c.getInt(columnIndex)) {
+                            Log.d(TAG, "Download successfull");
+
+                            new AsyncUnzip().execute(zipFile, aModelsPath);
+                        }
+                    }
+                }
+            }
+        };
+
+        Toast noModel = Toast.makeText(getApplicationContext(), "No model has been found for language '" + aLang + "'. Triggering download ...", Toast.LENGTH_LONG);
+        mPlain_text_input.append("No model has been found for language '" + aLang + "'. Triggering download ...\n");
+        noModel.show();
+
+        sDownloadManager = (DownloadManager) getSystemService(Context.DOWNLOAD_SERVICE);
+        DownloadManager.Request request = new DownloadManager.Request(modelZipURL);
+        request.setTitle("DeepSpeech " + aLang);
+        request.setDescription("DeepSpeech Model");
+        request.setNotificationVisibility(DownloadManager.Request.VISIBILITY_VISIBLE_NOTIFY_COMPLETED);
+        request.setVisibleInDownloadsUi(false);
+        request.setDestinationUri(modelZipFile);
+        sDownloadId = sDownloadManager.enqueue(request);
+
+        getApplicationContext().registerReceiver(receiver, new IntentFilter(DownloadManager.ACTION_DOWNLOAD_COMPLETE));
+    }
 }
--- a/app/src/main/res/layout/activity_main.xml
+++ b/app/src/main/res/layout/activity_main.xml
@ -49,12 +49,21 @@
            android:inputType="textMultiLine"
            android:singleLine="false" />

+        <Switch
+            android:id="@+id/useDeepSpeech"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:layout_alignParentEnd="true"
+            android:layout_below="@+id/switchTranscriptions"
+            android:text="Use DeepSpeech" />
+
        <Switch
            android:id="@+id/switchTranscriptions"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_alignParentEnd="true"
            android:layout_centerVertical="true"
+            android:layout_below="@+id/switchSamples"
            android:text="Store Transcriptions" />

        <Switch
@ -71,7 +80,7 @@
            android:layout_height="wrap_content"
            android:layout_above="@+id/plain_text_input"
            android:layout_alignStart="@+id/graph"
-            android:layout_marginBottom="-71dp"
+            android:layout_marginBottom="-50dp"
            android:ems="10"
            android:inputType="textPersonName"
            android:text="ProductTag" />
@ -82,9 +91,10 @@
            android:layout_height="wrap_content"
            android:layout_above="@+id/plain_text_input"
            android:layout_alignStart="@+id/graph"
-            android:layout_marginBottom="-120dp"
+            android:layout_marginBottom="-80dp"
            android:ems="10"
            android:inputType="textPersonName"
-            android:text="Language" />
+            android:text="eng" />
+    
    </RelativeLayout>
-</android.support.constraint.ConstraintLayout>
+</android.support.constraint.ConstraintLayout>
--- a/mozillaspeechlibrary/build.gradle
+++ b/mozillaspeechlibrary/build.gradle
@ -1,9 +1,9 @@
 apply plugin: 'com.android.library'
 apply from: 'maven-push.gradle'

-def versionMajor = 1
+def versionMajor = 2
 def versionMinor = 0
-def versionPatch = 4
+def versionPatch = 0

 android {
    compileSdkVersion 25
@ -13,6 +13,10 @@ android {
        versionCode versionMajor * 10000 + versionMinor * 100 + versionPatch
        versionName "${versionMajor}.${versionMinor}.${versionPatch}"
        testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
+
+        ndk {
+            abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86_64'
+        }
    }

    buildTypes {
@ -27,15 +31,21 @@ android {
            path 'src/main/cpp/Android.mk'
        }
    }
+
    compileOptions {
        sourceCompatibility = 1.7
        targetCompatibility = 1.7
    }
+
+    lintOptions {
+        abortOnError false
+    }
 }

 dependencies {
    implementation fileTree(include: ['*.jar'], dir: 'libs')
    implementation 'com.loopj.android:android-async-http:1.4.9'
+    implementation 'org.mozilla.deepspeech:libdeepspeech:0.5.0-alpha.1@aar'
    testImplementation 'junit:junit:4.12'
    androidTestImplementation 'com.android.support.test:runner:1.0.2'
    androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
--- a/mozillaspeechlibrary/src/main/java/com/mozilla/speechlibrary/LocalSpeechRecognition.java
+++ b/mozillaspeechlibrary/src/main/java/com/mozilla/speechlibrary/LocalSpeechRecognition.java
@ -0,0 +1,341 @@
+package com.mozilla.speechlibrary;
+
+import android.content.Context;
+import android.media.AudioRecord;
+import android.os.Process;
+import com.github.axet.audiolibrary.encoders.Sound;
+
+import java.util.Arrays;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.HashMap;
+import java.util.Map;
+
+import java.io.FileOutputStream;
+import java.io.File;
+
+import java.nio.ByteOrder;
+import java.nio.ShortBuffer;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import android.util.Log;
+
+import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
+import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState;
+
+class LocalDSInference implements Runnable {
+
+    DeepSpeechModel mModel;
+    DeepSpeechStreamingState mStreamingState;
+    MozillaSpeechService mService;
+
+    Queue<short[]> mBuffers = new ConcurrentLinkedQueue<short[]>();
+
+    boolean stopStream;
+
+    final int N_CEP = 26;
+    final int N_CONTEXT = 9;
+    final int BEAM_WIDTH = 250;
+
+    final float LM_WEIGHT = 0.75f;
+    final float VALID_WORD_COUNT_WEIGHT = 1.85f;
+
+    static final String _tag = "LocalDSInference";
+
+    static boolean keepClips  = false;
+    static boolean useDecoder = false;
+    static int clipNumber = 0;
+    FileChannel clipDebug;
+
+    String modelRoot;
+    String tfliteModel;
+    String alphabet;
+    String LM;
+    String trie;
+
+    protected LocalDSInference(MozillaSpeechService aService, int aFrameSize, int aSampleRate) {
+        Log.e(this._tag, "new LocalDSInference()");
+
+        modelRoot = aService.getModelPath() + "/" + aService.getLanguageDir();
+
+        Log.e(this._tag, "Loading model from " + modelRoot);
+
+        this.tfliteModel = this.modelRoot + "/" + LocalSpeechRecognition.kTfLiteModel;
+        this.alphabet    = this.modelRoot + "/" + LocalSpeechRecognition.kAlphabet;
+        this.LM          = this.modelRoot + "/" + LocalSpeechRecognition.kLM;
+        this.trie        = this.modelRoot + "/" + LocalSpeechRecognition.kTrie;
+
+        this.clipNumber += 1;
+
+        this.keepClips  = (new File(this.modelRoot + "/.keepClips")).exists();
+        this.useDecoder = (new File(this.modelRoot + "/.useDecoder")).exists();
+
+        Log.e(this._tag, "keepClips=" + this.keepClips);
+        Log.e(this._tag, "useDecoder=" + this.useDecoder);
+
+        this.mService = aService;
+
+        if (this.mModel == null) {
+            Log.e(this._tag, "new DeepSpeechModel(\"" + this.tfliteModel + "\")");
+            this.mModel = new DeepSpeechModel(this.tfliteModel, N_CEP, N_CONTEXT, this.alphabet, BEAM_WIDTH);
+        }
+
+        if (this.useDecoder) {
+            this.mModel.enableDecoderWihLM(this.alphabet, this.LM, this.trie, LM_WEIGHT, VALID_WORD_COUNT_WEIGHT);
+        }
+
+        if (this.keepClips) {
+            try {
+                this.clipDebug = new FileOutputStream(this.modelRoot + "/clip_" + this.clipNumber + ".wav").getChannel();
+            } catch (Exception ex) {
+            }
+        }
+
+        this.mStreamingState = this.mModel.setupStream(aFrameSize * 2, aSampleRate);
+        this.stopStream      = false;
+    }
+
+    public void closeModel() {
+        Log.e(this._tag, "closeModel()");
+
+        if (this.mStreamingState != null) {
+             String _ = this.mModel.finishStream(this.mStreamingState);
+        }
+
+        if (this.mModel != null) {
+            Log.e(this._tag, "closeModel()");
+            this.mModel.destroyModel();
+        }
+
+        this.mStreamingState = null;
+        this.mModel          = null;
+    }
+
+    public void appendAudio(short[] aBuffer) {
+        Log.e(this._tag, "appendAudio()");
+        if (!this.stopStream) {
+            // Log.e(this._tag, "appendAudio()::add");
+            this.mBuffers.add(aBuffer);
+
+            if (this.keepClips) {
+                // DEBUG
+                ByteBuffer myByteBuffer = ByteBuffer.allocate(aBuffer.length * 2);
+                myByteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+
+                ShortBuffer myShortBuffer = myByteBuffer.asShortBuffer();
+                myShortBuffer.put(aBuffer);
+
+                try {
+                    this.clipDebug.write(myByteBuffer);
+                } catch (Exception ex) {
+                }
+            }
+        }
+    }
+
+    public void endOfStream() {
+        Log.e(this._tag, "endOfStream()");
+        this.stopStream = true;
+        if (this.keepClips) {
+            try {
+                this.clipDebug.close();
+            } catch (Exception ex) {
+            }
+        }
+    }
+
+    public void run() {
+        Log.e(this._tag, "run()");
+
+        while ((!this.stopStream) || (this.mBuffers.size() > 0)) {
+            short[] aBuffer = this.mBuffers.poll();
+
+            if (aBuffer == null) {
+                continue;
+            }
+
+            this.mModel.feedAudioContent(this.mStreamingState, aBuffer, aBuffer.length);
+        }
+
+        Log.e(this._tag, "finishStream()");
+        mService.notifyListeners(MozillaSpeechService.SpeechState.DECODING, null);
+        String finalDecoded = this.mModel.finishStream(this.mStreamingState);
+        Log.e(this._tag, "finalDecoded(" + finalDecoded.length() + ")=" + finalDecoded);
+        this.mStreamingState = null;
+
+        STTResult sttResult = new STTResult(finalDecoded, (float)(1.0));
+        mService.notifyListeners(MozillaSpeechService.SpeechState.STT_RESULT, sttResult);
+    }
+}
+
+class LocalSpeechRecognition implements Runnable {
+
+    Vad mVad;
+    boolean done;
+    boolean cancelled;
+
+    int mMinimumVoice = 150;
+    int mMaximumSilence = 500;
+    int mUpperLimit = 10;
+
+    static final int FRAME_SIZE = 80;
+
+    int mSampleRate;
+    int mChannels;
+    MozillaSpeechService mService;
+
+    static final String _tag = "LocalSpeechRecognition";
+
+    LocalDSInference mInferer;
+    Thread mInferenceThread;
+
+    public static String kTfLiteModel = "output_graph.tflite";
+    public static String kAlphabet    = "alphabet.txt";
+    public static String kLM          = "lm.binary";
+    public static String kTrie        = "trie";
+
+    private static Map<String,String> mLanguages = new HashMap<String, String>();
+    static {
+        mLanguages.put("en-US", "eng");
+        mLanguages.put("fr-FR", "fra");
+    }
+
+    private static String kBaseModelURL = "https://github.com/lissyx/DeepSpeech/releases/download/android-test/";
+
+    protected LocalSpeechRecognition(int aSampleRate, int aChannels, Vad aVad,
+                                MozillaSpeechService aService) {
+        Log.e(this._tag, "new LocalSpeechRecognition()");
+        this.mVad = aVad;
+        this.mSampleRate = aSampleRate;
+        this.mChannels = aChannels;
+        this.mService = aService;
+
+        this.mInferer = new LocalDSInference(this.mService, FRAME_SIZE, this.mSampleRate);
+        this.mInferenceThread = new Thread(this.mInferer);
+        this.mInferenceThread.start();
+    }
+
+    public void run() {
+        try {
+            int vad = 0;
+
+            boolean finishedvoice = false;
+            boolean touchedvoice = false;
+            boolean touchedsilence = false;
+            boolean raisenovoice = false;
+            long samplesvoice = 0 ;
+            long samplessilence = 0 ;
+            long dtantes = System.currentTimeMillis();
+            long dtantesmili =         System.currentTimeMillis();
+
+            Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO);
+            AudioRecord recorder = Sound.getAudioRecord(mChannels, mSampleRate);
+            recorder.startRecording();
+            mService.notifyListeners(MozillaSpeechService.SpeechState.START_LISTEN, null);
+
+            while (!this.done && !this.cancelled) {
+                int nshorts = 0 ;
+
+                short[] mBuftemp = new short[FRAME_SIZE * mChannels * 2];
+                nshorts = recorder.read(mBuftemp, 0, mBuftemp.length);
+
+                vad = mVad.feed(mBuftemp, nshorts);
+                double[] fft =  Sound.fft(mBuftemp, 0, nshorts);
+                double fftsum = Arrays.stream(fft).sum()/fft.length;
+                mService.notifyListeners(MozillaSpeechService.SpeechState.MIC_ACTIVITY, fftsum);
+
+                long dtdepois = System.currentTimeMillis();
+
+                if (vad == 0) {
+                    if (touchedvoice) {
+                        samplessilence += dtdepois - dtantesmili;
+                        if (samplessilence >  mMaximumSilence) touchedsilence = true;
+                    }
+                } else { // vad == 1 => Active voice
+                    samplesvoice  += dtdepois - dtantesmili;
+                    if (samplesvoice >  mMinimumVoice) touchedvoice = true;
+
+                    for (int i = 0; i < mBuftemp.length; ++i) {
+                        mBuftemp[i] *= 5.0;
+                    }
+                }
+                dtantesmili = dtdepois;
+
+                this.mInferer.appendAudio(mBuftemp);
+
+                if (touchedvoice && touchedsilence)
+                    finishedvoice = true;
+
+                if (finishedvoice) {
+                    this.done = true;
+                    this.mInferer.endOfStream();
+                }
+
+                if ((dtdepois - dtantes)/1000 > mUpperLimit ) {
+                    this.done = true;
+                    if (touchedvoice) {
+                        this.mInferer.endOfStream();
+                    }
+                    else {
+                        raisenovoice = true;
+                    }
+                }
+
+                if (nshorts <= 0)
+                    break;
+            }
+
+            mVad.stop();
+            recorder.stop();
+            recorder.release();
+
+            if (raisenovoice) mService.notifyListeners(MozillaSpeechService.SpeechState.NO_VOICE, null);
+
+            if (cancelled) {
+                cancelled = false;
+                mService.notifyListeners(MozillaSpeechService.SpeechState.CANCELED, null);
+                return;
+            }
+
+        } catch (Exception exc) {
+            String error = String.format("General audio error %s", exc.getMessage());
+            mService.notifyListeners(MozillaSpeechService.SpeechState.ERROR, error);
+            exc.printStackTrace();
+        }
+    }
+
+    public void cancel() {
+        Log.e(this._tag, "cancel()");
+        this.cancelled = true;
+        this.done      = true;
+
+        if (this.mInferer != null) {
+            this.mInferer.closeModel();
+        }
+    }
+
+    public static String getLanguageDir(String aLanguage) {
+        String rv = aLanguage;
+
+        if (rv.length() != 3) {
+            if (mLanguages.containsKey(rv)) {
+                rv = mLanguages.get(rv);
+            }
+        }
+
+        return rv;
+    }
+    
+    public static boolean ensureModelInstalled(String aModelPath) {
+        Log.e(_tag, "ensureModelInstalled(" + aModelPath + ")");
+        return (new File(aModelPath + "/" + kTfLiteModel)).exists()
+            && (new File(aModelPath + "/" + kAlphabet)).exists()
+            && (new File(aModelPath + "/" + kLM)).exists()
+            && (new File(aModelPath + "/" + kTrie)).exists();
+    }
+
+    public static String getModelDownloadURL(String aLang) {
+        return kBaseModelURL + aLang + ".zip";
+    }
+}
--- a/mozillaspeechlibrary/src/main/java/com/mozilla/speechlibrary/MozillaSpeechService.java
+++ b/mozillaspeechlibrary/src/main/java/com/mozilla/speechlibrary/MozillaSpeechService.java
@ -15,6 +15,8 @@ public class MozillaSpeechService {
    private Context mContext;
    private boolean isIdle = true;
    NetworkSettings mNetworkSettings;
+    private boolean useDeepSpeech = false;
+    private String mModelPath;

    public enum SpeechState
    {
@ -23,7 +25,8 @@ public class MozillaSpeechService {
    }

    private static final MozillaSpeechService ourInstance = new MozillaSpeechService();
-    private SpeechRecognition mSpeechRecognition;
+    private NetworkSpeechRecognition mNetworkSpeechRecognition;
+    private LocalSpeechRecognition mLocalSpeechRecognition;
    private SpeechState mState;
    private Vad mVad;

@ -47,15 +50,23 @@ public class MozillaSpeechService {
                if (retVal < 0) {
                    notifyListeners(SpeechState.ERROR, "Error Initializing VAD " + String.valueOf(retVal));
                } else {
-                    this.mSpeechRecognition = new SpeechRecognition(SAMPLERATE, CHANNELS, mVad, aContext,
-                            this, mNetworkSettings);
-                    Thread audio_thread = new Thread(this.mSpeechRecognition);
+                    Thread audio_thread;
+
+                    if (this.useDeepSpeech) {
+                        this.mLocalSpeechRecognition = new LocalSpeechRecognition(SAMPLERATE, CHANNELS, mVad, this);
+                        audio_thread = new Thread(this.mLocalSpeechRecognition);
+                    } else {
+                        this.mNetworkSpeechRecognition = new NetworkSpeechRecognition(SAMPLERATE, CHANNELS, mVad, aContext, this, mNetworkSettings);
+                        audio_thread = new Thread(this.mNetworkSpeechRecognition);
+                    }
+
                    audio_thread.start();
                    isIdle = false;
                }
            }
        } catch (Exception exc) {
-            notifyListeners(SpeechState.ERROR, "General error loading the module.");
+            Log.e("MozillaSpeechService", "General error loading the module: " + exc);
+            notifyListeners(SpeechState.ERROR, "General error loading the module: " + exc);
        }
    }

@ -78,7 +89,10 @@ public class MozillaSpeechService {
    }

    public void cancel() {
-        this.mSpeechRecognition.cancel();
+        // this.mNetworkSpeechRecognition.cancel();
+        if (this.mLocalSpeechRecognition != null) {
+            this.mLocalSpeechRecognition.cancel();
+        }
    }

    public void removeListener(ISpeechRecognitionListener aListener) {
@ -99,6 +113,31 @@ public class MozillaSpeechService {
        this.mNetworkSettings.mLanguage = language;
    }

+    public String getLanguageDir() {
+        return LocalSpeechRecognition.getLanguageDir(this.mNetworkSettings.mLanguage);
+    }
+
+    public void useDeepSpeech(boolean yesOrNo) {
+        this.useDeepSpeech = yesOrNo;
+    }
+
+    public String getModelPath() {
+        return this.mModelPath;
+    }
+
+    // This sets model's root path, not including the language
+    public void setModelPath(String aModelPath) {
+        this.mModelPath = aModelPath;
+    }
+
+    public boolean ensureModelInstalled() {
+        return LocalSpeechRecognition.ensureModelInstalled(this.getModelPath() + "/" + this.getLanguageDir());
+    }
+
+    public String getModelDownloadURL() {
+        return LocalSpeechRecognition.getModelDownloadURL(this.getLanguageDir());
+    }
+
    public void setProductTag(String tag) {
        this.mNetworkSettings.mProductTag = tag;
    }
--- a/mozillaspeechlibrary/src/main/java/com/mozilla/speechlibrary/NetworkSpeechRecognition.java
+++ b/mozillaspeechlibrary/src/main/java/com/mozilla/speechlibrary/NetworkSpeechRecognition.java
@ -11,7 +11,7 @@ import com.github.axet.audiolibrary.encoders.Sound;
 import java.io.ByteArrayOutputStream;
 import java.util.Arrays;

-class SpeechRecognition implements Runnable {
+class NetworkSpeechRecognition implements Runnable {

    Vad mVad;
    short[] mBuftemp;
@ -29,7 +29,7 @@ class SpeechRecognition implements Runnable {
    Networking network;
    NetworkSettings mNetworkSettings;

-    protected SpeechRecognition(int aSampleRate, int aChannels, Vad aVad, Context aContext,
+    protected NetworkSpeechRecognition(int aSampleRate, int aChannels, Vad aVad, Context aContext,
                                MozillaSpeechService aService, NetworkSettings mNetworkSettings) {
        this.mVad = aVad;
        this.mContext = aContext;