From 160b5700e6849fac4908614bc98b3c4e06fbfdc2 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 25 Aug 2020 16:49:29 +0200 Subject: [PATCH] Revert "Merge pull request #73 from lissyx/master-stt-rename" This reverts commit adde02be1676d3ec4a2c18008b4871489f3bb42a, reversing changes made to 93914db805408b4a130e4489cb5d4ff88cd95159. --- README.rst | 20 ++++++-------- android_mic_streaming/README.md | 6 ++-- android_mic_streaming/app/build.gradle | 4 +-- .../app/src/main/AndroidManifest.xml | 2 +- .../MainActivity.kt | 12 ++++---- .../app/src/main/res/values/strings.xml | 2 +- android_mic_streaming/settings.gradle | 2 +- batch_processing/Readme.md | 4 +-- batch_processing/driver.py | 2 +- batch_processing/requirements.txt | 2 +- batch_processing/test.ps1 | 2 +- electron/Readme.md | 10 +++---- electron/package-lock.json | 26 +++++++++--------- electron/package.json | 14 +++++----- electron/public/create-window.js | 2 +- electron/public/electron.js | 4 +-- electron/public/index.html | 2 +- electron/public/recognize-wav.js | 10 +++---- electron/src/App.js | 2 +- ffmpeg_vad_streaming/README.MD | 4 +-- ffmpeg_vad_streaming/index.js | 8 +++--- ffmpeg_vad_streaming/package.json | 2 +- mic_vad_streaming/README.rst | 5 ++-- mic_vad_streaming/mic_vad_streaming.py | 8 +++--- mic_vad_streaming/requirements.txt | 2 +- mic_vad_streaming/test.sh | 2 +- .../App.config | 0 .../App.xaml | 4 +-- .../App.xaml.cs | 18 ++++++------ .../DeepSpeech.WPF.csproj} | 10 +++---- .../DeepSpeech.WPF.sln} | 4 +-- .../MainWindow.xaml | 4 +-- .../MainWindow.xaml.cs | 4 +-- .../Properties/AssemblyInfo.cs | 4 +-- .../Properties/Resources.Designer.cs | 4 +-- .../Properties/Resources.resx | 0 .../Properties/Settings.Designer.cs | 2 +- .../Properties/Settings.settings | 0 .../ViewModels/BindableBase.cs | 2 +- .../ViewModels/MainWindowViewModel.cs | 18 ++++++------ .../packages.config | 0 nim_mic_vad_streaming/README.md | 14 +++++----- .../linux_nim_vad_streaming/README.md | 6 ++-- .../linux_nim_vad_streaming/vad_stream.nim | 10 +++---- .../win_nim_vad_streaming/README.md | 6 ++-- .../win_nim_vad_streaming/vad_stream.nim | 8 +++--- nodejs_mic_vad_streaming/Readme.md | 8 +++--- nodejs_mic_vad_streaming/package.json | 2 +- nodejs_mic_vad_streaming/start.js | 6 ++-- nodejs_wav/Readme.md | 2 +- nodejs_wav/index.js | 4 +-- nodejs_wav/package.json | 4 +-- tests.sh | 2 +- ...zillaVoiceSttUWP.sln => DeepSpeechUWP.sln} | 2 +- .../.gitignore | 0 .../App.xaml | 4 +-- .../App.xaml.cs | 2 +- .../Assets/LockScreenLogo.scale-200.png | Bin .../Assets/SplashScreen.scale-200.png | Bin .../Assets/Square150x150Logo.scale-200.png | Bin .../Assets/Square44x44Logo.scale-200.png | Bin ...x44Logo.targetsize-24_altform-unplated.png | Bin .../Assets/StoreLogo.png | Bin .../Assets/Wide310x150Logo.scale-200.png | Bin .../DeepSpeechUWP.csproj} | 14 +++++----- .../MainPage.xaml | 6 ++-- .../MainPage.xaml.cs | 16 +++++------ .../Package.appxmanifest | 8 +++--- .../Properties/AssemblyInfo.cs | 4 +-- .../Properties/Default.rd.xml | 0 .../models/.gitkeep | 0 vad_transcriber/audioTranscript_cmd.py | 4 +-- vad_transcriber/audioTranscript_gui.py | 14 +++++----- vad_transcriber/requirements.txt | 2 +- vad_transcriber/test.sh | 2 +- vad_transcriber/wavTranscriber.py | 16 +++++------ web_microphone_websocket/Readme.md | 4 +-- web_microphone_websocket/package.json | 2 +- web_microphone_websocket/public/index.html | 2 +- web_microphone_websocket/server.js | 8 +++--- 80 files changed, 207 insertions(+), 208 deletions(-) rename android_mic_streaming/app/src/main/java/org/{mozilla/voice/sttdemo => deepspeechdemo}/MainActivity.kt (92%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/App.config (100%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/App.xaml (71%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/App.xaml.cs (57%) rename net_framework/{MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj => DeepSpeechWPF/DeepSpeech.WPF.csproj} (95%) rename net_framework/{MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln => DeepSpeechWPF/DeepSpeech.WPF.sln} (77%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/MainWindow.xaml (97%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/MainWindow.xaml.cs (85%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/Properties/AssemblyInfo.cs (95%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/Properties/Resources.Designer.cs (94%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/Properties/Resources.resx (100%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/Properties/Settings.Designer.cs (96%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/Properties/Settings.settings (100%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/ViewModels/BindableBase.cs (98%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/ViewModels/MainWindowViewModel.cs (97%) rename net_framework/{MozillaVoiceSttWPF => DeepSpeechWPF}/packages.config (100%) rename uwp/{MozillaVoiceSttUWP.sln => DeepSpeechUWP.sln} (94%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/.gitignore (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/App.xaml (66%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/App.xaml.cs (99%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/LockScreenLogo.scale-200.png (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/SplashScreen.scale-200.png (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/Square150x150Logo.scale-200.png (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/Square44x44Logo.scale-200.png (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/Square44x44Logo.targetsize-24_altform-unplated.png (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/StoreLogo.png (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Assets/Wide310x150Logo.scale-200.png (100%) rename uwp/{MozillaVoiceSttUWP/MozillaVoiceSttUWP.csproj => DeepSpeechUWP/DeepSpeechUWP.csproj} (95%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/MainPage.xaml (94%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/MainPage.xaml.cs (96%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Package.appxmanifest (88%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Properties/AssemblyInfo.cs (87%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/Properties/Default.rd.xml (100%) rename uwp/{MozillaVoiceSttUWP => DeepSpeechUWP}/models/.gitkeep (100%) diff --git a/README.rst b/README.rst index b523f61..ae8bc37 100644 --- a/README.rst +++ b/README.rst @@ -1,19 +1,17 @@ -Mozilla Voice STT master Examples -================================= +DeepSpeech master Examples +========================== -These are various user-contributed examples on how to use or integrate Mozilla Voice STT using our packages. +These are various examples on how to use or integrate DeepSpeech using our packages. -It is a good way to just try out Mozilla Voice STT before learning how it works in detail, as well as a source of inspiration for ways you can integrate it into your application or solve common tasks like voice activity detection (VAD) or microphone streaming. +It is a good way to just try out DeepSpeech before learning how it works in detail, as well as a source of inspiration for ways you can integrate it into your application or solve common tasks like voice activity detection (VAD) or microphone streaming. -Please understand that those examples are provided as-is, with no guarantee it will work in every configuration. +Contributions are welcome! -Contributions like fixes to existing examples or new ones are welcome! +**Note:** These examples target DeepSpeech **master branch** only. If you're using a different release, you need to go to the corresponding branch for the release: -**Note:** These examples target Mozilla Voice STT **master branch** only. If you're using a different release, you need to go to the corresponding branch for the release: - -* `v0.7.x `_ -* `v0.6.x `_ -* `master branch `_ +* `v0.7.x `_ +* `v0.6.x `_ +* `master branch `_ **List of examples** diff --git a/android_mic_streaming/README.md b/android_mic_streaming/README.md index e157cca..a414761 100644 --- a/android_mic_streaming/README.md +++ b/android_mic_streaming/README.md @@ -1,6 +1,6 @@ # Android Microphone Streaming -Android demo application that streams audio from the microphone to mozilla voice stt and transcribes it. +Android demo application that streams audio from the microphone to deepspeech and transcribes it. ## Prerequisites @@ -16,7 +16,7 @@ Move the model files `deepspeech-0.8.0-models.pbmm`, `deepspeech-0.8.0-models.sc Mind that the data directory will only be present after installing and launching the app once. ``` -adb push deepspeech-0.8.0-models.tflite deepspeech-0.8.0-models.scorer /storage/emulated/0/Android/data/org.mozilla.voice.sttdemo/files/ +adb push deepspeech-0.8.0-models.tflite deepspeech-0.8.0-models.scorer /storage/emulated/0/Android/data/org.deepspeechdemo/files/ ``` You can also copy the files from your file browser to the device. @@ -49,4 +49,4 @@ Start recording by pressing the button and the app will transcribe the spoken te Based on your use case or the language you are using you might change the values of `BEAM_WIDTH`, `LM_ALPHA` and `LM_BETA` to improve the speech recogintion. -You can also alter the `NUM_BUFFER_ELEMENTS` to change the size of the audio data buffer that is fed into the model. +You can also alter the `NUM_BUFFER_ELEMENTS` to change the size of the audio data buffer that is fed into the model. \ No newline at end of file diff --git a/android_mic_streaming/app/build.gradle b/android_mic_streaming/app/build.gradle index e2cecc8..cb55519 100644 --- a/android_mic_streaming/app/build.gradle +++ b/android_mic_streaming/app/build.gradle @@ -8,7 +8,7 @@ android { compileSdkVersion 29 buildToolsVersion "29.0.2" defaultConfig { - applicationId "org.mozilla.voice.sttdemo" + applicationId "org.deepspeechdemo" minSdkVersion 22 targetSdkVersion 29 versionCode 1 @@ -34,7 +34,7 @@ dependencies { implementation 'androidx.core:core-ktx:1.0.2' implementation 'androidx.constraintlayout:constraintlayout:1.1.3' - implementation 'org.mozilla.voice:stt:0.9.0-alpha.5' + implementation 'org.mozilla.deepspeech:libdeepspeech:0.8.0' testImplementation 'junit:junit:4.12' androidTestImplementation 'androidx.test.ext:junit:1.1.0' diff --git a/android_mic_streaming/app/src/main/AndroidManifest.xml b/android_mic_streaming/app/src/main/AndroidManifest.xml index 3e25733..5cabe09 100644 --- a/android_mic_streaming/app/src/main/AndroidManifest.xml +++ b/android_mic_streaming/app/src/main/AndroidManifest.xml @@ -1,6 +1,6 @@ + package="org.deepspeechdemo"> diff --git a/android_mic_streaming/app/src/main/java/org/mozilla/voice/sttdemo/MainActivity.kt b/android_mic_streaming/app/src/main/java/org/deepspeechdemo/MainActivity.kt similarity index 92% rename from android_mic_streaming/app/src/main/java/org/mozilla/voice/sttdemo/MainActivity.kt rename to android_mic_streaming/app/src/main/java/org/deepspeechdemo/MainActivity.kt index 6c639e4..624afe9 100644 --- a/android_mic_streaming/app/src/main/java/org/mozilla/voice/sttdemo/MainActivity.kt +++ b/android_mic_streaming/app/src/main/java/org/deepspeechdemo/MainActivity.kt @@ -1,4 +1,4 @@ -package org.mozilla.voice.sttdemo +package org.deepspeechdemo import android.Manifest import android.content.pm.PackageManager @@ -11,14 +11,14 @@ import android.view.View import androidx.appcompat.app.AppCompatActivity import androidx.core.app.ActivityCompat import kotlinx.android.synthetic.main.activity_main.* -import org.mozilla.voice.stt.MozillaVoiceSttModel.MozillaVoiceSttModel -import org.mozilla.voice.stt.MozillaVoiceSttModel.MozillaVoiceSttStreamingState +import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel +import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState import java.io.File class MainActivity : AppCompatActivity() { - private var model: MozillaVoiceSttModel? = null - private var streamContext: MozillaVoiceSttStreamingState? = null + private var model: DeepSpeechModel? = null + private var streamContext: DeepSpeechStreamingState? = null // Change the following parameters regarding // what works best for your use case or your language. @@ -77,7 +77,7 @@ class MainActivity : AppCompatActivity() { } } - model = mozillaVoiceSttModel(tfliteModelPath) + model = DeepSpeechModel(tfliteModelPath) model?.setBeamWidth(BEAM_WIDTH) model?.enableExternalScorer(scorerPath) model?.setScorerAlphaBeta(LM_ALPHA, LM_BETA) diff --git a/android_mic_streaming/app/src/main/res/values/strings.xml b/android_mic_streaming/app/src/main/res/values/strings.xml index 0a405a8..9d33071 100644 --- a/android_mic_streaming/app/src/main/res/values/strings.xml +++ b/android_mic_streaming/app/src/main/res/values/strings.xml @@ -1,3 +1,3 @@ - Mozilla Voice STT Demo + DeepSpeech Demo diff --git a/android_mic_streaming/settings.gradle b/android_mic_streaming/settings.gradle index 76e1ee8..01ad4d8 100644 --- a/android_mic_streaming/settings.gradle +++ b/android_mic_streaming/settings.gradle @@ -1,2 +1,2 @@ include ':app' -rootProject.name='MozillaVoiceSttDemo' +rootProject.name='DeepSpeechDemo' diff --git a/batch_processing/Readme.md b/batch_processing/Readme.md index 87f642d..9a9d938 100644 --- a/batch_processing/Readme.md +++ b/batch_processing/Readme.md @@ -134,7 +134,7 @@ Running via the GPU takes half the time of using the CPU and has good results. It will then run the individual commands like : -`mozilla_voice_stt --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.scorer --audio 'C:\Users\jmike\Downloads\podcast\45374977-48000-2-24d9a365625bb.mp3.wav' --json` +`deepspeech --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.scorer --audio 'C:\Users\jmike\Downloads\podcast\45374977-48000-2-24d9a365625bb.mp3.wav' --json` Websites referenced: @@ -154,4 +154,4 @@ https://hacks.mozilla.org/2019/12/deepspeech-0-6-mozillas-speech-to-text-engine/ https://palletsprojects.com/p/click/ https://www.howtoforge.com/tutorial/ffmpeg-audio-conversion/ https://www.joe0.com/2019/10/19/how-resolve-tensorflow-2-0-error-could-not-load-dynamic-library-cudart64_100-dll-dlerror-cudart64_100-dll-not-found/ -https://www.programcreek.com/python/example/88033/click.Path +https://www.programcreek.com/python/example/88033/click.Path \ No newline at end of file diff --git a/batch_processing/driver.py b/batch_processing/driver.py index 2d3db7c..04f19ad 100644 --- a/batch_processing/driver.py +++ b/batch_processing/driver.py @@ -61,7 +61,7 @@ def main(dirname, ext, model, scorer): command = " ".join( [ - "mozilla_voice_stt", + "deepspeech", "--model", model, "--scorer", diff --git a/batch_processing/requirements.txt b/batch_processing/requirements.txt index f0d0960..e5bd87d 100644 --- a/batch_processing/requirements.txt +++ b/batch_processing/requirements.txt @@ -10,7 +10,7 @@ cachetools==4.1.0 certifi==2020.4.5.2 chardet==3.0.4 click==7.1.2 -mozilla_voice_stt==0.9.0a5 +deepspeech==0.8.0 delegator.py @ git+https://github.com/amitt001/delegator.py.git@194aa92543fbdbfbae0bcc24ca217819a7805da2 flask==1.1.2 gast==0.2.2 diff --git a/batch_processing/test.ps1 b/batch_processing/test.ps1 index b1c5791..1894afa 100644 --- a/batch_processing/test.ps1 +++ b/batch_processing/test.ps1 @@ -1 +1 @@ -mozilla_voice_stt --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.scorer --audio C:\Users\jmike\Documents\Audacity\clip.wav --json +deepspeech --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.scorer --audio C:\Users\jmike\Documents\Audacity\clip.wav --json \ No newline at end of file diff --git a/electron/Readme.md b/electron/Readme.md index 61292ac..92baff4 100644 --- a/electron/Readme.md +++ b/electron/Readme.md @@ -1,6 +1,6 @@ -# Mozilla Voice STT Electron example +# DeepSpeech Electron example -This is an example of Mozilla Voice STT running in an Electron app with a ReactJS front-end and processing .wav files. +This is an example of DeepSpeech running in an Electron app with a ReactJS front-end and processing .wav files. ## Install @@ -66,6 +66,6 @@ Test the (dmg/appimage/exe) package file that has been generated in `/dist`. The model files download to the following directories and must be deleted manually -- MacOSX: `~/Library/Application\ Support/mozilla_voice_stt-electron` -- Linux: `~/.config/mozilla_voice_stt-electron` -- Windows: `~/AppData/Roaming/mozilla_voice_stt-electron` +- MacOSX: `~/Library/Application\ Support/deepspeech-electron` +- Linux: `~/.config/deepspeech-electron` +- Windows: `~/AppData/Roaming/deepspeech-electron` diff --git a/electron/package-lock.json b/electron/package-lock.json index 3f5d0f8..a7b89cb 100644 --- a/electron/package-lock.json +++ b/electron/package-lock.json @@ -1,5 +1,5 @@ { - "name": "mozilla_voice_stt-electron", + "name": "deepspeech-electron", "version": "1.0.0", "lockfileVersion": 1, "requires": true, @@ -1384,18 +1384,6 @@ "@types/yargs": "^13.0.0" } }, - "@mozilla-voice/stt": { - "version": "0.9.0-alpha.5", - "resolved": "https://registry.npmjs.org/@mozilla-voice/stt/-/stt-0.9.0-alpha.5.tgz", - "integrity": "sha512-lyZmMnLKdmBzWonDazIvkbnyAlIXd5NDUaINf5wfOdsw4Rliv/hy/FjYdYN9Tccq4Zvcd+dbqgXGLeZuECGmIg==", - "requires": { - "argparse": "1.0.x", - "memory-stream": "1.0.x", - "node-pre-gyp": "0.15.x", - "node-wav": "0.0.2", - "sox-stream": "2.0.x" - } - }, "@mrmlnc/readdir-enhanced": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/@mrmlnc/readdir-enhanced/-/readdir-enhanced-2.2.1.tgz", @@ -4802,6 +4790,18 @@ "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=" }, + "deepspeech": { + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/deepspeech/-/deepspeech-0.8.0.tgz", + "integrity": "sha512-jqU+NbXVZnS+okMgoiOhJz22RaHSmvIjmHaRu7IZ0xBDQbcqNGff4GXk4a5etfSXm3bXddRtBlfFr5KyQExjbw==", + "requires": { + "argparse": "1.0.x", + "memory-stream": "1.0.x", + "node-pre-gyp": "0.15.x", + "node-wav": "0.0.2", + "sox-stream": "2.0.x" + } + }, "default-gateway": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/default-gateway/-/default-gateway-4.2.0.tgz", diff --git a/electron/package.json b/electron/package.json index c7cddf8..a634a7d 100644 --- a/electron/package.json +++ b/electron/package.json @@ -1,6 +1,6 @@ { - "name": "mozilla_voice_stt-electron", - "productName": "mozilla_voice_stt-electron", + "name": "deepspeech-electron", + "productName": "deepspeech-electron", "version": "1.0.0", "description": "My Electron application description", "main": "public/electron.js", @@ -20,15 +20,15 @@ "postinstall": "electron-builder install-app-deps", "homepage": "./", "build": { - "appId": "mozilla_voice_stt-electron", - "productName": "mozilla_voice_stt-electron", + "appId": "deepspeech-electron", + "productName": "deepspeech-electron", "files": [ "build/**/*", "node_modules/**/*", "package.json" ], "buildDependenciesFromSource": true, - "artifactName": "mozilla_voice_stt-electron-${version}-${os}-${arch}.${ext}", + "artifactName": "deepspeech-electron-${version}-${os}-${arch}.${ext}", "dmg": { "title": "${productName}" }, @@ -52,7 +52,7 @@ }, "win": { "target": "nsis", - "artifactName": "mozilla_voice_stt-electron-${version}-${os}-${arch}.${ext}" + "artifactName": "deepspeech-electron-${version}-${os}-${arch}.${ext}" }, "linux": { "target": [ @@ -66,7 +66,7 @@ "keywords": [], "license": "MIT", "dependencies": { - "@mozilla-voice/stt": "^0.9.0-alpha.5", + "deepspeech": "^0.8.0", "electron-is-dev": "^1.1.0", "lodash": "^4.17.15", "node-abi": "^2.18.0", diff --git a/electron/public/create-window.js b/electron/public/create-window.js index a53ad55..c301ced 100644 --- a/electron/public/create-window.js +++ b/electron/public/create-window.js @@ -35,7 +35,7 @@ function createWindow(model) { app.quit() }); - // message from front-end App.js, request that this file be processed by Mozilla Voice STT + // message from front-end App.js, request that this file be processed by DeepSpeech ipcMain.handle('recognize-wav', async function (event, file) { const filePath = path.resolve(__dirname, 'audio', file); const results = await recognizeWav(filePath, model); diff --git a/electron/public/electron.js b/electron/public/electron.js index bc4b5db..fafa766 100644 --- a/electron/public/electron.js +++ b/electron/public/electron.js @@ -8,12 +8,12 @@ const {getModel} = require('./recognize-wav'); let appDataPath; if (fs.existsSync(path.resolve(__dirname, '../models/deepspeech-0.8.0-models.pbmm'))) { - // if the model was found at the root, use that directory + // if the deepspeech model was found at the root, use that directory appDataPath = path.resolve(__dirname, '../models'); } else { // otherwise use the electron "appData" path - appDataPath = path.resolve(electron.app.getPath('appData'), 'mozilla_voice_stt-electron'); + appDataPath = path.resolve(electron.app.getPath('appData'), 'deepspeech-electron'); } app.on('ready', function () { diff --git a/electron/public/index.html b/electron/public/index.html index 177953b..9b15225 100644 --- a/electron/public/index.html +++ b/electron/public/index.html @@ -25,7 +25,7 @@ Learn how to configure a non-root public URL by running `npm run build`. --> - Mozilla Voice STT Electron Example + DeepSpeech Electron Example diff --git a/electron/public/recognize-wav.js b/electron/public/recognize-wav.js index 72be2d0..14b166a 100644 --- a/electron/public/recognize-wav.js +++ b/electron/public/recognize-wav.js @@ -1,10 +1,10 @@ -const mozillaVoiceStt = require('@mozilla-voice/stt'); +const DeepSpeech = require('deepspeech'); const fs = require('fs'); const path = require('path'); const wav = require('wav'); const download = require('./download'); -// return the model or download it if it is not found +// return the deepspeech model or download it if it is not found function getModel(appDataPath, callback) { let modelPath = path.resolve(appDataPath, 'deepspeech-0.8.0-models.pbmm'); let scorerPath = path.resolve(appDataPath, 'deepspeech-0.8.0-models.scorer'); @@ -23,14 +23,14 @@ function getModel(appDataPath, callback) { } } -// create the model +// create the deepspeech model function createModel(modelPath, scorerPath) { - const model = new mozillaVoiceStt.Model(modelPath); + const model = new DeepSpeech.Model(modelPath); model.enableExternalScorer(scorerPath); return model; } -// create a stream to process a .wav file +// create a deepspeech stream to process a .wav file function recognizeWav(path, model) { return new Promise(function(resolve, reject) { try { diff --git a/electron/src/App.js b/electron/src/App.js index a9c55a1..de6c64d 100644 --- a/electron/src/App.js +++ b/electron/src/App.js @@ -21,7 +21,7 @@ class App extends Component { files }, () => { files.forEach(file => { - // request that each file be processed by mozilla voice stt + // request that each file be processed by deepspeech console.log('recognize', file); window.ipcRenderer.invoke('recognize-wav', file).then(result => { // add the recognition results to this.state.results diff --git a/ffmpeg_vad_streaming/README.MD b/ffmpeg_vad_streaming/README.MD index 2bcdc49..e065d48 100644 --- a/ffmpeg_vad_streaming/README.MD +++ b/ffmpeg_vad_streaming/README.MD @@ -1,6 +1,6 @@ # FFmpeg VAD Streaming -Streaming inference from arbitrary source (FFmpeg input) to Mozilla Voice STT, using VAD (voice activity detection). A fairly simple example demonstrating the Mozilla Voice STT streaming API in Node.js. +Streaming inference from arbitrary source (FFmpeg input) to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Node.js. This example was successfully tested with a mobile phone streaming a live feed to a RTMP server (nginx-rtmp), which then could be used by this script for near real time speech recognition. @@ -31,7 +31,7 @@ node ./index.js --audio rtmp://:1935/live/teststream \ ``` ## Examples -Real time streaming inference with Mozilla Voice STT's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)). +Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)). ```bash node ./index.js --audio $HOME/audio/2830-3980-0043.wav \ --scorer $HOME/models/kenlm.scorer \ diff --git a/ffmpeg_vad_streaming/index.js b/ffmpeg_vad_streaming/index.js index 58227d6..d6e5a4f 100644 --- a/ffmpeg_vad_streaming/index.js +++ b/ffmpeg_vad_streaming/index.js @@ -1,7 +1,7 @@ #!/usr/bin/env node const VAD = require("node-vad"); -const mVS = require('@mozilla-voice/stt'); +const Ds = require('deepspeech'); const argparse = require('argparse'); const util = require('util'); const { spawn } = require('child_process'); @@ -15,11 +15,11 @@ let VersionAction = function VersionAction(options) { util.inherits(VersionAction, argparse.Action); VersionAction.prototype.call = function(parser) { - mVS.printVersions(); + Ds.printVersions(); process.exit(0); }; -let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running Mozilla Voice STT inference.'}); +let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); parser.addArgument(['--scorer'], {help: 'Path to the scorer file', nargs: '?'}); parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'}); @@ -32,7 +32,7 @@ function totalTime(hrtimeValue) { console.error('Loading model from file %s', args['model']); const model_load_start = process.hrtime(); -let model = new mVS.Model(args['model']); +let model = new Ds.Model(args['model']); const model_load_end = process.hrtime(model_load_start); console.error('Loaded model in %ds.', totalTime(model_load_end)); diff --git a/ffmpeg_vad_streaming/package.json b/ffmpeg_vad_streaming/package.json index af1fefa..0718f23 100644 --- a/ffmpeg_vad_streaming/package.json +++ b/ffmpeg_vad_streaming/package.json @@ -8,7 +8,7 @@ }, "dependencies": { "argparse": "^1.0.10", - "@mozilla-voice/stt": "0.9.0-alpha.5", + "deepspeech": "0.8.0", "node-vad": "^1.1.1", "util": "^0.11.1" }, diff --git a/mic_vad_streaming/README.rst b/mic_vad_streaming/README.rst index 57d8144..b2da986 100644 --- a/mic_vad_streaming/README.rst +++ b/mic_vad_streaming/README.rst @@ -1,7 +1,8 @@ + Microphone VAD Streaming ======================== -Stream from microphone to Mozilla Voice STT, using VAD (voice activity detection). A fairly simple example demonstrating the Mozilla Voice STT streaming API in Python. Also useful for quick, real-time testing of models and decoding parameters. +Stream from microphone to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Python. Also useful for quick, real-time testing of models and decoding parameters. Installation ------------ @@ -31,7 +32,7 @@ Usage [-w SAVEWAV] [-f FILE] -m MODEL [-s SCORER] [-d DEVICE] [-r RATE] - Stream from microphone to Mozilla Voice STT using VAD + Stream from microphone to DeepSpeech using VAD optional arguments: -h, --help show this help message and exit diff --git a/mic_vad_streaming/mic_vad_streaming.py b/mic_vad_streaming/mic_vad_streaming.py index 0fa5e56..074e5b2 100755 --- a/mic_vad_streaming/mic_vad_streaming.py +++ b/mic_vad_streaming/mic_vad_streaming.py @@ -60,7 +60,7 @@ class Audio(object): """ Microphone may not support our native processing sampling rate, so resample from input_rate to RATE_PROCESS here for webrtcvad and - mozilla_voice_stt + deepspeech Args: data (binary): Input audio stream @@ -152,7 +152,7 @@ class VADAudio(Audio): ring_buffer.clear() def main(ARGS): - # Load model + # Load DeepSpeech model if os.path.isdir(ARGS.model): model_dir = ARGS.model ARGS.model = os.path.join(model_dir, 'output_graph.pb') @@ -173,7 +173,7 @@ def main(ARGS): print("Listening (ctrl-C to exit)...") frames = vad_audio.vad_collector() - # Stream from microphone to Mozilla Voice STT using VAD + # Stream from microphone to DeepSpeech using VAD spinner = None if not ARGS.nospinner: spinner = Halo(spinner='line') @@ -199,7 +199,7 @@ if __name__ == '__main__': DEFAULT_SAMPLE_RATE = 16000 import argparse - parser = argparse.ArgumentParser(description="Stream from microphone to Mozilla Voice STT using VAD") + parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD") parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3, help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3") diff --git a/mic_vad_streaming/requirements.txt b/mic_vad_streaming/requirements.txt index b048a1d..ce0b4cf 100644 --- a/mic_vad_streaming/requirements.txt +++ b/mic_vad_streaming/requirements.txt @@ -1,4 +1,4 @@ -mozilla_voice_stt~=0.9.0a5 +deepspeech~=0.8.0 pyaudio~=0.2.11 webrtcvad~=2.0.10 halo~=0.0.18 diff --git a/mic_vad_streaming/test.sh b/mic_vad_streaming/test.sh index 9ff97b1..e7129cf 100755 --- a/mic_vad_streaming/test.sh +++ b/mic_vad_streaming/test.sh @@ -8,7 +8,7 @@ pushd ${THIS} source ../tests.sh pip install --user $(get_python_wheel_url "$1") - pip install --user -r <(grep -v mozilla_voice_stt requirements.txt) + pip install --user -r <(grep -v deepspeech requirements.txt) pulseaudio & diff --git a/net_framework/MozillaVoiceSttWPF/App.config b/net_framework/DeepSpeechWPF/App.config similarity index 100% rename from net_framework/MozillaVoiceSttWPF/App.config rename to net_framework/DeepSpeechWPF/App.config diff --git a/net_framework/MozillaVoiceSttWPF/App.xaml b/net_framework/DeepSpeechWPF/App.xaml similarity index 71% rename from net_framework/MozillaVoiceSttWPF/App.xaml rename to net_framework/DeepSpeechWPF/App.xaml index ca6a0f1..16ebb0d 100644 --- a/net_framework/MozillaVoiceSttWPF/App.xaml +++ b/net_framework/DeepSpeechWPF/App.xaml @@ -1,8 +1,8 @@  diff --git a/net_framework/MozillaVoiceSttWPF/App.xaml.cs b/net_framework/DeepSpeechWPF/App.xaml.cs similarity index 57% rename from net_framework/MozillaVoiceSttWPF/App.xaml.cs rename to net_framework/DeepSpeechWPF/App.xaml.cs index a01f1b1..d4b87d6 100644 --- a/net_framework/MozillaVoiceSttWPF/App.xaml.cs +++ b/net_framework/DeepSpeechWPF/App.xaml.cs @@ -1,10 +1,10 @@ using CommonServiceLocator; -using MozillaVoiceStt.WPF.ViewModels; -using MozillaVoiceSttClient.Interfaces; +using DeepSpeech.WPF.ViewModels; +using DeepSpeechClient.Interfaces; using GalaSoft.MvvmLight.Ioc; using System.Windows; -namespace MozillaVoiceSttWPF +namespace DeepSpeechWPF { /// /// Interaction logic for App.xaml @@ -18,11 +18,11 @@ namespace MozillaVoiceSttWPF try { - //Register instance of Mozilla Voice STT - MozillaVoiceSttClient.MozillaVoiceStt mozillaVoiceSttClient = - new MozillaVoiceSttClient.MozillaVoiceSttModel("deepspeech-0.8.0-models.pbmm"); + //Register instance of DeepSpeech + DeepSpeechClient.DeepSpeech deepSpeechClient = + new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm"); - SimpleIoc.Default.Register(() => mozillaVoiceSttClient); + SimpleIoc.Default.Register(() => deepSpeechClient); SimpleIoc.Default.Register(); } catch (System.Exception ex) @@ -35,8 +35,8 @@ namespace MozillaVoiceSttWPF protected override void OnExit(ExitEventArgs e) { base.OnExit(e); - //Dispose instance of MozillaVoiceStt - ServiceLocator.Current.GetInstance()?.Dispose(); + //Dispose instance of DeepSpeech + ServiceLocator.Current.GetInstance()?.Dispose(); } } } diff --git a/net_framework/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj b/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj similarity index 95% rename from net_framework/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj rename to net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj index 3e8d2d3..2a14c95 100644 --- a/net_framework/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj +++ b/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj @@ -6,8 +6,8 @@ AnyCPU {54BFD766-4305-4F4C-BA59-AF45505DF3C1} WinExe - MozillaVoiceStt.WPF - MozillaVoiceStt.WPF + DeepSpeech.WPF + DeepSpeech.WPF v4.6.2 512 {60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} @@ -131,10 +131,10 @@ - + {56de4091-bbbe-47e4-852d-7268b33b971f} - MozillaVoiceSttClient + DeepSpeechClient - + \ No newline at end of file diff --git a/net_framework/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln b/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln similarity index 77% rename from net_framework/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln rename to net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln index cc7aa8e..bad57f3 100644 --- a/net_framework/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln +++ b/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln @@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.28307.421 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceStt.WPF", "MozillaVoiceStt.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttClient", "..\..\..\ds\native_client\dotnet\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\..\..\ds\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/net_framework/MozillaVoiceSttWPF/MainWindow.xaml b/net_framework/DeepSpeechWPF/MainWindow.xaml similarity index 97% rename from net_framework/MozillaVoiceSttWPF/MainWindow.xaml rename to net_framework/DeepSpeechWPF/MainWindow.xaml index 7b16b1f..4fbe5e7 100644 --- a/net_framework/MozillaVoiceSttWPF/MainWindow.xaml +++ b/net_framework/DeepSpeechWPF/MainWindow.xaml @@ -1,10 +1,10 @@  /// Interaction logic for MainWindow.xaml diff --git a/net_framework/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs b/net_framework/DeepSpeechWPF/Properties/AssemblyInfo.cs similarity index 95% rename from net_framework/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs rename to net_framework/DeepSpeechWPF/Properties/AssemblyInfo.cs index 034ac3d..f9ae7d7 100644 --- a/net_framework/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs +++ b/net_framework/DeepSpeechWPF/Properties/AssemblyInfo.cs @@ -7,11 +7,11 @@ using System.Windows; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("MozillaVoiceStt.WPF")] +[assembly: AssemblyTitle("DeepSpeech.WPF")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("MozillaVoiceStt.WPF.SingleFiles")] +[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")] [assembly: AssemblyCopyright("Copyright © 2018")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] diff --git a/net_framework/MozillaVoiceSttWPF/Properties/Resources.Designer.cs b/net_framework/DeepSpeechWPF/Properties/Resources.Designer.cs similarity index 94% rename from net_framework/MozillaVoiceSttWPF/Properties/Resources.Designer.cs rename to net_framework/DeepSpeechWPF/Properties/Resources.Designer.cs index b470f9a..2da2b4b 100644 --- a/net_framework/MozillaVoiceSttWPF/Properties/Resources.Designer.cs +++ b/net_framework/DeepSpeechWPF/Properties/Resources.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace MozillaVoiceStt.WPF.Properties { +namespace DeepSpeech.WPF.Properties { using System; @@ -39,7 +39,7 @@ namespace MozillaVoiceStt.WPF.Properties { internal static global::System.Resources.ResourceManager ResourceManager { get { if (object.ReferenceEquals(resourceMan, null)) { - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MozillaVoiceStt.WPF.Properties.Resources", typeof(Resources).Assembly); + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly); resourceMan = temp; } return resourceMan; diff --git a/net_framework/MozillaVoiceSttWPF/Properties/Resources.resx b/net_framework/DeepSpeechWPF/Properties/Resources.resx similarity index 100% rename from net_framework/MozillaVoiceSttWPF/Properties/Resources.resx rename to net_framework/DeepSpeechWPF/Properties/Resources.resx diff --git a/net_framework/MozillaVoiceSttWPF/Properties/Settings.Designer.cs b/net_framework/DeepSpeechWPF/Properties/Settings.Designer.cs similarity index 96% rename from net_framework/MozillaVoiceSttWPF/Properties/Settings.Designer.cs rename to net_framework/DeepSpeechWPF/Properties/Settings.Designer.cs index a721869..0f464bc 100644 --- a/net_framework/MozillaVoiceSttWPF/Properties/Settings.Designer.cs +++ b/net_framework/DeepSpeechWPF/Properties/Settings.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace MozillaVoiceStt.WPF.Properties { +namespace DeepSpeech.WPF.Properties { [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] diff --git a/net_framework/MozillaVoiceSttWPF/Properties/Settings.settings b/net_framework/DeepSpeechWPF/Properties/Settings.settings similarity index 100% rename from net_framework/MozillaVoiceSttWPF/Properties/Settings.settings rename to net_framework/DeepSpeechWPF/Properties/Settings.settings diff --git a/net_framework/MozillaVoiceSttWPF/ViewModels/BindableBase.cs b/net_framework/DeepSpeechWPF/ViewModels/BindableBase.cs similarity index 98% rename from net_framework/MozillaVoiceSttWPF/ViewModels/BindableBase.cs rename to net_framework/DeepSpeechWPF/ViewModels/BindableBase.cs index 92fd2f5..909327e 100644 --- a/net_framework/MozillaVoiceSttWPF/ViewModels/BindableBase.cs +++ b/net_framework/DeepSpeechWPF/ViewModels/BindableBase.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.ComponentModel; using System.Runtime.CompilerServices; -namespace MozillaVoiceStt.WPF.ViewModels +namespace DeepSpeech.WPF.ViewModels { /// /// Implementation of to simplify models. diff --git a/net_framework/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs b/net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs similarity index 97% rename from net_framework/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs rename to net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs index 913b941..230fd42 100644 --- a/net_framework/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs +++ b/net_framework/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs @@ -3,8 +3,8 @@ using CSCore; using CSCore.CoreAudioAPI; using CSCore.SoundIn; using CSCore.Streams; -using MozillaVoiceSttClient.Interfaces; -using MozillaVoiceSttClient.Models; +using DeepSpeechClient.Interfaces; +using DeepSpeechClient.Models; using GalaSoft.MvvmLight.CommandWpf; using Microsoft.Win32; using System; @@ -15,7 +15,7 @@ using System.IO; using System.Threading; using System.Threading.Tasks; -namespace MozillaVoiceStt.WPF.ViewModels +namespace DeepSpeech.WPF.ViewModels { /// /// View model of the MainWindow View. @@ -27,7 +27,7 @@ namespace MozillaVoiceStt.WPF.ViewModels private const string ScorerPath = "kenlm.scorer"; #endregion - private readonly IMozillaVoiceSttClient _sttClient; + private readonly IDeepSpeech _sttClient; #region Commands /// @@ -62,7 +62,7 @@ namespace MozillaVoiceStt.WPF.ViewModels /// /// Stream used to feed data into the acoustic model. /// - private MozillaVoiceSttStream _sttStream; + private DeepSpeechStream _sttStream; /// /// Records the audio of the selected device. @@ -75,7 +75,7 @@ namespace MozillaVoiceStt.WPF.ViewModels private SoundInSource _soundInSource; /// - /// Target wave source.(16KHz Mono 16bit for MozillaVoiceStt) + /// Target wave source.(16KHz Mono 16bit for DeepSpeech) /// private IWaveSource _convertedSource; @@ -200,7 +200,7 @@ namespace MozillaVoiceStt.WPF.ViewModels #endregion #region Ctors - public MainWindowViewModel(IMozillaVoiceStt sttClient) + public MainWindowViewModel(IDeepSpeech sttClient) { _sttClient = sttClient; @@ -290,7 +290,7 @@ namespace MozillaVoiceStt.WPF.ViewModels //read data from the converedSource //important: don't use the e.Data here //the e.Data contains the raw data provided by the - //soundInSource which won't have the model required audio format + //soundInSource which won't have the deepspeech required audio format byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2]; int read; @@ -422,4 +422,4 @@ namespace MozillaVoiceStt.WPF.ViewModels } } } -} +} \ No newline at end of file diff --git a/net_framework/MozillaVoiceSttWPF/packages.config b/net_framework/DeepSpeechWPF/packages.config similarity index 100% rename from net_framework/MozillaVoiceSttWPF/packages.config rename to net_framework/DeepSpeechWPF/packages.config diff --git a/nim_mic_vad_streaming/README.md b/nim_mic_vad_streaming/README.md index f59a484..dcab7c0 100644 --- a/nim_mic_vad_streaming/README.md +++ b/nim_mic_vad_streaming/README.md @@ -8,29 +8,29 @@ Only difference for both OS is the library used for gathering audio data from Interface to both the libs is provided through NIM code. ## PREREQUISITIES : -* ```libmozilla_voice_stt.so``` +* ```libdeepspeech.so``` Go to the [releases](https://github.com/mozilla/DeepSpeech/releases/tag/v0.8.0) page and download the native client package based on your OS and CPU architecture. -Extract the ``libmozilla_voice_stt.so`` and put into the subdirectory depending on OS of native Client used. +Extract the ``libdeepspeech.so`` and put into the subdirectory depending on OS of native Client used. #### On WINDOWS: * Download the ```native.client.amd64.win.tar.xz ``` package . [ same is true for ``xx.xx.amd64.cuda.win.xx`` if CUDA installed or ``xx.xx.amd64.tflite.win.xx``] -* Extract and place the ```libmozilla_voice_stt.so``` in ```win_nim_vad_streaming``` subdirectory +* Extract and place the ```libdeepspeech.so``` in ```win_nim_vad_streaming``` subdirectory * Now see ``README.md`` in ```win_nim_vad_streaming``` subdirectory. #### On LINUX: * Download the ```native_client.amd64.linux.cpu ``` package .[ same is true for ``xx.xx.amd64.cuda.linux.xx`` is CUDA installed or ``xx.xx.amd64.tflite.linux.xx``] -* Extract and place the ```libmozilla_voice_stt.so``` in ```linux_nim_vad_streaming``` subdirectory +* Extract and place the ```libdeepspeech.so``` in ```linux_nim_vad_streaming``` subdirectory * Now see ``README.md`` in ```linux_nim_vad_streaming``` subdirectory. -_Note: One can put ``libmozilla_voice_stt.so`` in the system's PATH rather than copying it to one of subdirectories for easy usage._ +_Note: One can put ``libdeepspeech.so`` in the system's PATH rather than copying it to one of subdirectories for easy usage._ ## NOTE: -Used NIM code only depends on the shared library(``libmozilla_voice_stt.so``) used. -Given one has downloaded the native client package and extracted the ``libmozilla_voice_stt.so`` shared library and copied it to one of the subdirectories or in system's PATH ,Code can be modified to add more functionalities in pure NIM and modified code would compile on any platform as long as that platform is supported by NIM. +Used NIM code only depends on the shared library(``libdeepspeech.so``) used. +Given one has downloaded the native client package and extracted the ``libdeepspeech.so`` shared library and copied it to one of the subdirectories or in system's PATH ,Code can be modified to add more functionalities in pure NIM and modified code would compile on any platform as long as that platform is supported by NIM. diff --git a/nim_mic_vad_streaming/linux_nim_vad_streaming/README.md b/nim_mic_vad_streaming/linux_nim_vad_streaming/README.md index 4f54dcb..4525e4b 100644 --- a/nim_mic_vad_streaming/linux_nim_vad_streaming/README.md +++ b/nim_mic_vad_streaming/linux_nim_vad_streaming/README.md @@ -1,15 +1,15 @@ # MICROPHONE VAD STREAMING -Minimalistic example to demonstrate the Mozilla Voice STT streaming API in NIM.Raw audio is streamed from microphone to the Mozilla Voice STT based on VAD (voice Activity Detection). +Minimalistic example to demonstrate the DeepSpeech streaming API in NIM.Raw audio is streamed from microphone to the DeepSpeech based on VAD (voice Activity Detection). ## Prerequisites: -0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libmozilla_voice_stt.so`` shared library. +0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libdeepspeech.so`` shared library. 1) This example depends on the ``libasound.so``(which is distributed along with all major linux distros and present in linker's default path) _Note: You may need to install ``libasound.so`` if not found_ ``` sudo apt-get install libasound2 ``` -2) Download the pre-trained Mozilla Voice STT english model (1089MB) and Scorer Package(~900MB): +2) Download the pre-trained DeepSpeech english model (1089MB) and Scorer Package(~900MB): ``` wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.0/deepspeech-0.8.0-models.pbmm diff --git a/nim_mic_vad_streaming/linux_nim_vad_streaming/vad_stream.nim b/nim_mic_vad_streaming/linux_nim_vad_streaming/vad_stream.nim index 49823f7..0b4d912 100644 --- a/nim_mic_vad_streaming/linux_nim_vad_streaming/vad_stream.nim +++ b/nim_mic_vad_streaming/linux_nim_vad_streaming/vad_stream.nim @@ -1,6 +1,6 @@ import os ,deques,math,strutils,parseopt,tables,strformat import alsa,webrtcvad,wav -import mozilla_voice_stt +import deepspeech var args = initTable[string, string]() @@ -28,8 +28,8 @@ let hw_params: snd_pcm_hw_params_ref = nil device_name = "plughw:0,0" #PCM hardware alsa Device. size = (int((frameDuration*int(rate))/1000)) - modelPtr: ModelState = nil #mozilla Voice Stt model - deepStreamPtr: StreamingState = nil #mozilla Voice Stt model stream + modelPtr: ModelState = nil #deepSpeech model + deepStreamPtr: StreamingState = nil #deepSpeech model stream modelPath = args["model"] var @@ -40,7 +40,7 @@ var framesLen: clong vad:vadObj #VAD Object declaration codeV: cint #to hold the error codes for VAD. - codeD: cint #to hold the error codes for mozilla Voice Stt + codeD: cint #to hold the error codes for deepSpeech #to get the data from the channel. frame : seq[int16] buff = initDeque[tuple[data: seq[int16],flag:int32]](nextPowerOfTwo(windowSize)) @@ -187,4 +187,4 @@ while true: #joinThread(thread) -#echo("Thread finished..") +#echo("Thread finished..") \ No newline at end of file diff --git a/nim_mic_vad_streaming/win_nim_vad_streaming/README.md b/nim_mic_vad_streaming/win_nim_vad_streaming/README.md index 36509fb..9d9343b 100644 --- a/nim_mic_vad_streaming/win_nim_vad_streaming/README.md +++ b/nim_mic_vad_streaming/win_nim_vad_streaming/README.md @@ -1,11 +1,11 @@ # MICROPHONE VAD STREAMING -Minimalistic example to demonstrate the Mozilla Voice STT streaming API in NIM.Raw audio is streamed from microphone to the Mozilla Voice STT based on VAD (voice Activity Detection). +Minimalistic example to demonstrate the DeepSpeech streaming API in NIM.Raw audio is streamed from microphone to the DeepSpeech based on VAD (voice Activity Detection). ## Prerequisites: -0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libmozilla_voice_stt.so`` shared library. +0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libdeepspeech.so`` shared library. 1) This example depends on the ``libportaudio.dll``(precompiled portaudio library).Make sure you have this library in PATH.If you don't have one or are unable to build one ,you can get one from [here](https://gitlab.com/eagledot/nim-portaudio/lib). -2) Download the pre-trained Mozilla Voice STT english model (1089MB): +2) Download the pre-trained DeepSpeech english model (1089MB): ``` wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.0/deepspeech-0.8.0-models.pbmm diff --git a/nim_mic_vad_streaming/win_nim_vad_streaming/vad_stream.nim b/nim_mic_vad_streaming/win_nim_vad_streaming/vad_stream.nim index 9e309fc..8e282b9 100644 --- a/nim_mic_vad_streaming/win_nim_vad_streaming/vad_stream.nim +++ b/nim_mic_vad_streaming/win_nim_vad_streaming/vad_stream.nim @@ -1,6 +1,6 @@ import os ,deques,math,strutils,parseopt,tables import strformat -import webrtcvad,portaudio,mozilla_voice_stt,wav +import webrtcvad,portaudio,deepspeech,wav proc sum[T](temp: Deque[T]): int = @@ -47,8 +47,8 @@ let f1 = open("FIFO_rgb",fmWrite) f2 = open("FIFO_rgb",fmREAD) stream: pointer = nil #portaudio Stream pointer holder. - modelPtr: ModelState = nil #mozilla Voice Stt model - deepStreamPtr: StreamingState = nil #mozilla Voice Stt model stream + modelPtr: ModelState = nil #deepSpeech model + deepStreamPtr: StreamingState = nil #deepSpeech model stream modelPath = args["model"] if "scorer" in args: scorerPath = args["scorer"] @@ -68,7 +68,7 @@ when isMainModule: codeV = setMode(vad,3'i32) assert codeV == 0'i32 - #Mozilla Voice STT model initialization. + #DeepSpeech model initialization. codeD = createModel(modelPath,unsafeaddr(modelPtr)) if codeD == 0'i32: echo("Model Created Successfully") diff --git a/nodejs_mic_vad_streaming/Readme.md b/nodejs_mic_vad_streaming/Readme.md index a02f30d..97ee655 100644 --- a/nodejs_mic_vad_streaming/Readme.md +++ b/nodejs_mic_vad_streaming/Readme.md @@ -1,14 +1,14 @@ # NodeJS Microphone VAD Streaming This is a NodeJS example of recording from the microphone and streaming to -Mozilla Voice STT with voice activity detection. +DeepSpeech with voice activity detection. ### Prerequisites: 1) The example utilized the [mic](https://github.com/ashishbajaj99/mic) NPM module which requires either [sox](http://sox.sourceforge.net/) (Windows/Mac) or [arecord](http://alsa-project.org/) (Linux). -2) Download the pre-trained Mozilla Voice STT english model (1089MB): +2) Download the pre-trained DeepSpeech english model (1089MB): ``` wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.0/deepspeech-0.8.0-models.pbmm @@ -35,10 +35,10 @@ npm install node start.js ``` -#### Specify alternate Mozilla Voice STT model path: +#### Specify alternate DeepSpeech model path: Use the `DEEPSPEECH_MODEL` environment variable to change models. ``` DEEPSPEECH_MODEL=~/dev/jaxcore/deepspeech-0.8.0-models/ node start.js -``` +``` \ No newline at end of file diff --git a/nodejs_mic_vad_streaming/package.json b/nodejs_mic_vad_streaming/package.json index 77f91c1..4c33244 100644 --- a/nodejs_mic_vad_streaming/package.json +++ b/nodejs_mic_vad_streaming/package.json @@ -3,7 +3,7 @@ "version": "0.1.0", "private": true, "dependencies": { - "@mozilla-voice/stt": "^0.9.0-alpha.5", + "deepspeech": "^0.8.0", "mic": "^2.1.2", "node-vad": "^1.1.4", "speaker": "^0.5.1", diff --git a/nodejs_mic_vad_streaming/start.js b/nodejs_mic_vad_streaming/start.js index a3386c9..febcad2 100644 --- a/nodejs_mic_vad_streaming/start.js +++ b/nodejs_mic_vad_streaming/start.js @@ -1,11 +1,11 @@ -const mozillaVoiceStt = require('@mozilla-voice/stt'); +const DeepSpeech = require('deepspeech'); const VAD = require('node-vad'); const mic = require('mic'); const fs = require('fs'); const wav = require('wav'); const Speaker = require('speaker'); -let DEEPSPEECH_MODEL; // path to model directory +let DEEPSPEECH_MODEL; // path to deepspeech model directory if (process.env.DEEPSPEECH_MODEL) { DEEPSPEECH_MODEL = process.env.DEEPSPEECH_MODEL; } @@ -24,7 +24,7 @@ const vad = new VAD(VAD_MODE); function createModel(modelDir) { let modelPath = modelDir + '.pbmm'; let scorerPath = modelDir + '.scorer'; - let model = new mozillaVoiceStt.Model(modelPath); + let model = new DeepSpeech.Model(modelPath); model.enableExternalScorer(scorerPath); return model; } diff --git a/nodejs_wav/Readme.md b/nodejs_wav/Readme.md index fe2b583..5a90646 100644 --- a/nodejs_wav/Readme.md +++ b/nodejs_wav/Readme.md @@ -1,4 +1,4 @@ -# NodeJS voice recognition example using Mozilla Mozilla Voice STT +# NodeJS voice recognition example using Mozilla DeepSpeech Download the pre-trained model (1.8GB): diff --git a/nodejs_wav/index.js b/nodejs_wav/index.js index 0f316a8..4e7c10e 100644 --- a/nodejs_wav/index.js +++ b/nodejs_wav/index.js @@ -1,4 +1,4 @@ -const mozillaVoiceStt = require('@mozilla-voice/stt'); +const DeepSpeech = require('deepspeech'); const Fs = require('fs'); const Sox = require('sox-stream'); const MemoryStream = require('memory-stream'); @@ -7,7 +7,7 @@ const Wav = require('node-wav'); let modelPath = './models/deepspeech-0.8.0-models.pbmm'; -let model = new mozillaVoiceStt.Model(modelPath); +let model = new DeepSpeech.Model(modelPath); let desiredSampleRate = model.sampleRate(); diff --git a/nodejs_wav/package.json b/nodejs_wav/package.json index 889afd8..896fa01 100644 --- a/nodejs_wav/package.json +++ b/nodejs_wav/package.json @@ -1,5 +1,5 @@ { - "name": "mozilla_voice_stt-nodejs_wav", + "name": "deepspeech-nodejs_wav", "version": "1.0.0", "description": "Simple audio processing", "main": "index.js", @@ -8,7 +8,7 @@ }, "dependencies": { "argparse": "^1.0.10", - "@mozilla-voice/stt": "0.9.0-alpha.5", + "deepspeech": "0.8.0", "node-wav": "0.0.2", "sox-stream": "^2.0.3", "util": "^0.11.1" diff --git a/tests.sh b/tests.sh index 81edd12..8764cb3 100755 --- a/tests.sh +++ b/tests.sh @@ -19,5 +19,5 @@ get_python_wheel_url() get_npm_package_url() { - echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${DEP_TASK_ID}/artifacts/public/mozilla-voice-stt-${DS_VERSION}.tgz" + echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${DEP_TASK_ID}/artifacts/public/mozilla_voice_stt-${DS_VERSION}.tgz" } diff --git a/uwp/MozillaVoiceSttUWP.sln b/uwp/DeepSpeechUWP.sln similarity index 94% rename from uwp/MozillaVoiceSttUWP.sln rename to uwp/DeepSpeechUWP.sln index 8e2ffa3..94b23cb 100644 --- a/uwp/MozillaVoiceSttUWP.sln +++ b/uwp/DeepSpeechUWP.sln @@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.29519.87 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttUWP", "MozillaVoiceSttUWP\MozillaVoiceSttUWP.csproj", "{49AAC24D-6A76-4910-913A-94D2D67B6226}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechUWP", "DeepSpeechUWP\DeepSpeechUWP.csproj", "{49AAC24D-6A76-4910-913A-94D2D67B6226}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/uwp/MozillaVoiceSttUWP/.gitignore b/uwp/DeepSpeechUWP/.gitignore similarity index 100% rename from uwp/MozillaVoiceSttUWP/.gitignore rename to uwp/DeepSpeechUWP/.gitignore diff --git a/uwp/MozillaVoiceSttUWP/App.xaml b/uwp/DeepSpeechUWP/App.xaml similarity index 66% rename from uwp/MozillaVoiceSttUWP/App.xaml rename to uwp/DeepSpeechUWP/App.xaml index 457a464..dc8aec5 100644 --- a/uwp/MozillaVoiceSttUWP/App.xaml +++ b/uwp/DeepSpeechUWP/App.xaml @@ -1,7 +1,7 @@  + xmlns:local="using:DeepSpeechUWP"> diff --git a/uwp/MozillaVoiceSttUWP/App.xaml.cs b/uwp/DeepSpeechUWP/App.xaml.cs similarity index 99% rename from uwp/MozillaVoiceSttUWP/App.xaml.cs rename to uwp/DeepSpeechUWP/App.xaml.cs index 193c244..41fa6e6 100644 --- a/uwp/MozillaVoiceSttUWP/App.xaml.cs +++ b/uwp/DeepSpeechUWP/App.xaml.cs @@ -16,7 +16,7 @@ using Windows.UI.Xaml.Input; using Windows.UI.Xaml.Media; using Windows.UI.Xaml.Navigation; -namespace MozillaVoiceSttUWP +namespace DeepSpeechUWP { /// /// Provides application-specific behavior to supplement the default Application class. diff --git a/uwp/MozillaVoiceSttUWP/Assets/LockScreenLogo.scale-200.png b/uwp/DeepSpeechUWP/Assets/LockScreenLogo.scale-200.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/LockScreenLogo.scale-200.png rename to uwp/DeepSpeechUWP/Assets/LockScreenLogo.scale-200.png diff --git a/uwp/MozillaVoiceSttUWP/Assets/SplashScreen.scale-200.png b/uwp/DeepSpeechUWP/Assets/SplashScreen.scale-200.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/SplashScreen.scale-200.png rename to uwp/DeepSpeechUWP/Assets/SplashScreen.scale-200.png diff --git a/uwp/MozillaVoiceSttUWP/Assets/Square150x150Logo.scale-200.png b/uwp/DeepSpeechUWP/Assets/Square150x150Logo.scale-200.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/Square150x150Logo.scale-200.png rename to uwp/DeepSpeechUWP/Assets/Square150x150Logo.scale-200.png diff --git a/uwp/MozillaVoiceSttUWP/Assets/Square44x44Logo.scale-200.png b/uwp/DeepSpeechUWP/Assets/Square44x44Logo.scale-200.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/Square44x44Logo.scale-200.png rename to uwp/DeepSpeechUWP/Assets/Square44x44Logo.scale-200.png diff --git a/uwp/MozillaVoiceSttUWP/Assets/Square44x44Logo.targetsize-24_altform-unplated.png b/uwp/DeepSpeechUWP/Assets/Square44x44Logo.targetsize-24_altform-unplated.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/Square44x44Logo.targetsize-24_altform-unplated.png rename to uwp/DeepSpeechUWP/Assets/Square44x44Logo.targetsize-24_altform-unplated.png diff --git a/uwp/MozillaVoiceSttUWP/Assets/StoreLogo.png b/uwp/DeepSpeechUWP/Assets/StoreLogo.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/StoreLogo.png rename to uwp/DeepSpeechUWP/Assets/StoreLogo.png diff --git a/uwp/MozillaVoiceSttUWP/Assets/Wide310x150Logo.scale-200.png b/uwp/DeepSpeechUWP/Assets/Wide310x150Logo.scale-200.png similarity index 100% rename from uwp/MozillaVoiceSttUWP/Assets/Wide310x150Logo.scale-200.png rename to uwp/DeepSpeechUWP/Assets/Wide310x150Logo.scale-200.png diff --git a/uwp/MozillaVoiceSttUWP/MozillaVoiceSttUWP.csproj b/uwp/DeepSpeechUWP/DeepSpeechUWP.csproj similarity index 95% rename from uwp/MozillaVoiceSttUWP/MozillaVoiceSttUWP.csproj rename to uwp/DeepSpeechUWP/DeepSpeechUWP.csproj index 57310b7..043f0f0 100644 --- a/uwp/MozillaVoiceSttUWP/MozillaVoiceSttUWP.csproj +++ b/uwp/DeepSpeechUWP/DeepSpeechUWP.csproj @@ -7,8 +7,8 @@ {49AAC24D-6A76-4910-913A-94D2D67B6226} AppContainerExe Properties - MozillaVoiceSttUWP - MozillaVoiceSttUWP + DeepSpeechUWP + DeepSpeechUWP en-US UAP 10.0.18362.0 @@ -158,14 +158,14 @@ - - 0.9.0-alpha.5 + + 0.8.0 6.2.9 - - ..\packages\MozillaVoiceSttClient.0.9.0-alpha.5\lib\net46\MozillaVoiceSttClient.dll + + ..\packages\DeepSpeech.0.8.0\lib\net46\DeepSpeechClient.dll @@ -187,4 +187,4 @@ --> - + \ No newline at end of file diff --git a/uwp/MozillaVoiceSttUWP/MainPage.xaml b/uwp/DeepSpeechUWP/MainPage.xaml similarity index 94% rename from uwp/MozillaVoiceSttUWP/MainPage.xaml rename to uwp/DeepSpeechUWP/MainPage.xaml index 7e57f2a..f34ab26 100644 --- a/uwp/MozillaVoiceSttUWP/MainPage.xaml +++ b/uwp/DeepSpeechUWP/MainPage.xaml @@ -1,8 +1,8 @@  - MozillaVoiceStt UWP Demo + DeepSpeech UWP Demo powered by Audio Graph API Select an audio for transcription: diff --git a/uwp/MozillaVoiceSttUWP/MainPage.xaml.cs b/uwp/DeepSpeechUWP/MainPage.xaml.cs similarity index 96% rename from uwp/MozillaVoiceSttUWP/MainPage.xaml.cs rename to uwp/DeepSpeechUWP/MainPage.xaml.cs index e1db379..f8659ed 100644 --- a/uwp/MozillaVoiceSttUWP/MainPage.xaml.cs +++ b/uwp/DeepSpeechUWP/MainPage.xaml.cs @@ -1,5 +1,5 @@ -using MozillaVoiceSttClient.Interfaces; -using MozillaVoiceSttClient.Models; +using DeepSpeechClient.Interfaces; +using DeepSpeechClient.Models; using System; using System.Collections.Concurrent; using System.Diagnostics; @@ -17,7 +17,7 @@ using Windows.Storage; using Windows.UI.Xaml; using Windows.UI.Xaml.Controls; -namespace MozillaVoiceSttUWP +namespace DeepSpeechUWP { [ComImport] [Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")] @@ -34,8 +34,8 @@ namespace MozillaVoiceSttUWP private StorageFile audioFile; private DeviceInformation selectedInputDevice; private DeviceInformationCollection inputDevices; - private IMozillaVoiceSttClient client; - private MozillaVoiceSttStream stream; + private IDeepSpeech client; + private DeepSpeechStream stream; private MediaEncodingProfile encoding; private AudioGraph graph; @@ -44,7 +44,7 @@ namespace MozillaVoiceSttUWP public MainPage() { this.InitializeComponent(); - InitMozillaVoiceStt(); + InitDeepSpeech(); ListAudioInputDevices(); InitAudioGraph(); } @@ -61,14 +61,14 @@ namespace MozillaVoiceSttUWP } } - private void InitMozillaVoiceStt() + private void InitDeepSpeech() { string projectFolder = Directory.GetCurrentDirectory(); string modelsFolder = Path.Combine(projectFolder, "models"); string acousticModelPath = Path.Combine(modelsFolder, "deepspeech-0.8.0-models.pbmm"); string scorerPath = Path.Combine(modelsFolder, "deepspeech-0.8.0-models.scorer"); - client = new MozillaVoiceSttClient.MozillaVoiceSttModel(acousticModelPath); + client = new DeepSpeechClient.DeepSpeech(acousticModelPath); client.EnableExternalScorer(scorerPath); } diff --git a/uwp/MozillaVoiceSttUWP/Package.appxmanifest b/uwp/DeepSpeechUWP/Package.appxmanifest similarity index 88% rename from uwp/MozillaVoiceSttUWP/Package.appxmanifest rename to uwp/DeepSpeechUWP/Package.appxmanifest index 0b0e07b..4b2b3f9 100644 --- a/uwp/MozillaVoiceSttUWP/Package.appxmanifest +++ b/uwp/DeepSpeechUWP/Package.appxmanifest @@ -14,7 +14,7 @@ - MozillaVoiceSttUWP + DeepSpeechUWP erikz Assets\StoreLogo.png @@ -30,12 +30,12 @@ + EntryPoint="DeepSpeechUWP.App"> diff --git a/uwp/MozillaVoiceSttUWP/Properties/AssemblyInfo.cs b/uwp/DeepSpeechUWP/Properties/AssemblyInfo.cs similarity index 87% rename from uwp/MozillaVoiceSttUWP/Properties/AssemblyInfo.cs rename to uwp/DeepSpeechUWP/Properties/AssemblyInfo.cs index 91bec39..0932b77 100644 --- a/uwp/MozillaVoiceSttUWP/Properties/AssemblyInfo.cs +++ b/uwp/DeepSpeechUWP/Properties/AssemblyInfo.cs @@ -5,11 +5,11 @@ using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("MozillaVoiceSttUWP")] +[assembly: AssemblyTitle("DeepSpeechUWP")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("MozillaVoiceSttUWP")] +[assembly: AssemblyProduct("DeepSpeechUWP")] [assembly: AssemblyCopyright("Copyright © 2020")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] diff --git a/uwp/MozillaVoiceSttUWP/Properties/Default.rd.xml b/uwp/DeepSpeechUWP/Properties/Default.rd.xml similarity index 100% rename from uwp/MozillaVoiceSttUWP/Properties/Default.rd.xml rename to uwp/DeepSpeechUWP/Properties/Default.rd.xml diff --git a/uwp/MozillaVoiceSttUWP/models/.gitkeep b/uwp/DeepSpeechUWP/models/.gitkeep similarity index 100% rename from uwp/MozillaVoiceSttUWP/models/.gitkeep rename to uwp/DeepSpeechUWP/models/.gitkeep diff --git a/vad_transcriber/audioTranscript_cmd.py b/vad_transcriber/audioTranscript_cmd.py index fd4fdb6..d1190c7 100644 --- a/vad_transcriber/audioTranscript_cmd.py +++ b/vad_transcriber/audioTranscript_cmd.py @@ -20,7 +20,7 @@ def main(args): parser.add_argument('--model', required=True, help='Path to directory that contains all model files (output_graph and scorer)') parser.add_argument('--stream', required=False, action='store_true', - help='To use mozilla voice stt streaming interface') + help='To use deepspeech streaming interface') args = parser.parse_args() if args.stream is True: print("Opening mic for streaming") @@ -52,7 +52,7 @@ def main(args): logging.debug("Saving Transcript @: %s" % waveFile.rstrip(".wav") + ".txt") for i, segment in enumerate(segments): - # Run mozilla voice stt on the chunk that just completed VAD + # Run deepspeech on the chunk that just completed VAD logging.debug("Processing chunk %002d" % (i,)) audio = np.frombuffer(segment, dtype=np.int16) output = wavTranscriber.stt(model_retval[0], audio, sample_rate) diff --git a/vad_transcriber/audioTranscript_gui.py b/vad_transcriber/audioTranscript_gui.py index 6fe9d42..176b4a3 100644 --- a/vad_transcriber/audioTranscript_gui.py +++ b/vad_transcriber/audioTranscript_gui.py @@ -93,7 +93,7 @@ class App(QMainWindow): def __init__(self): super().__init__() - self.title = 'Mozilla Voice STT Transcriber' + self.title = 'Deepspeech Transcriber' self.left = 10 self.top = 10 self.width = 480 @@ -114,7 +114,7 @@ class App(QMainWindow): self.browseButton = QPushButton('Browse', self) self.browseButton.setToolTip('Select a wav file') self.modelsButton = QPushButton('Browse', self) - self.modelsButton.setToolTip('Select models folder') + self.modelsButton.setToolTip('Select deepspeech models folder') self.transcribeWav = QPushButton('Transcribe Wav', self) self.transcribeWav.setToolTip('Start Wav Transcription') self.openMicrophone = QPushButton('Start Speaking', self) @@ -205,7 +205,7 @@ class App(QMainWindow): @pyqtSlot() def models_on_click(self): logging.debug('Models Browse Button clicked') - self.dirName = QFileDialog.getExistingDirectory(self, "Select models directory") + self.dirName = QFileDialog.getExistingDirectory(self, "Select deepspeech models directory") if self.dirName: self.modelsBox.setText(self.dirName) logging.debug(self.dirName) @@ -309,10 +309,10 @@ class App(QMainWindow): @param Context: Is a tuple containing three objects 1. Speech samples, sctx 2. subprocess handle - 3. Mozilla Voice STT model object + 3. Deepspeech model object ''' def micWorker(self, context, progress_callback): - # Mozilla Voice STT Streaming will be run from this method + # Deepspeech Streaming will be run from this method logging.debug("Recording from your microphone") while (not self.openMicrophone.isChecked()): data = context[1].stdout.read(512) @@ -343,7 +343,7 @@ class App(QMainWindow): self.show() def wavWorker(self, waveFile, progress_callback): - # Mozilla Voice STT will be run from this method + # Deepspeech will be run from this method logging.debug("Preparing for transcription...") inference_time = 0.0 @@ -353,7 +353,7 @@ class App(QMainWindow): logging.debug("Saving Transcript @: %s" % waveFile.rstrip(".wav") + ".txt") for i, segment in enumerate(segments): - # Run mozilla voice stt on the chunk that just completed VAD + # Run deepspeech on the chunk that just completed VAD logging.debug("Processing chunk %002d" % (i,)) audio = np.frombuffer(segment, dtype=np.int16) output = wavTranscriber.stt(self.model[0], audio, sample_rate) diff --git a/vad_transcriber/requirements.txt b/vad_transcriber/requirements.txt index de6fd22..de47ac2 100644 --- a/vad_transcriber/requirements.txt +++ b/vad_transcriber/requirements.txt @@ -1,3 +1,3 @@ -mozilla_voice_stt==0.9.0a5 +deepspeech==0.8.0 webrtcvad pyqt5 diff --git a/vad_transcriber/test.sh b/vad_transcriber/test.sh index 59d08bf..2815222 100755 --- a/vad_transcriber/test.sh +++ b/vad_transcriber/test.sh @@ -8,7 +8,7 @@ pushd ${THIS} source ../tests.sh pip install --user $(get_python_wheel_url "$1") - pip install --user -r <(grep -v mozilla_voice_stt requirements.txt) + pip install --user -r <(grep -v deepspeech requirements.txt) python audioTranscript_cmd.py \ --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \ diff --git a/vad_transcriber/wavTranscriber.py b/vad_transcriber/wavTranscriber.py index d724075..fce4d5b 100644 --- a/vad_transcriber/wavTranscriber.py +++ b/vad_transcriber/wavTranscriber.py @@ -11,24 +11,24 @@ Load the pre-trained model into the memory @param scorer: Scorer file @Retval -Returns a list [MozillaVoiceStt Object, Model Load Time, Scorer Load Time] +Returns a list [DeepSpeech Object, Model Load Time, Scorer Load Time] ''' def load_model(models, scorer): model_load_start = timer() - mvs = mozilla_voice_stt.Model(models) + ds = mozilla_voice_stt.Model(models) model_load_end = timer() - model_load_start logging.debug("Loaded model in %0.3fs." % (model_load_end)) scorer_load_start = timer() - mvs.enableExternalScorer(scorer) + ds.enableExternalScorer(scorer) scorer_load_end = timer() - scorer_load_start logging.debug('Loaded external scorer in %0.3fs.' % (scorer_load_end)) - return [mvs, model_load_end, scorer_load_end] + return [ds, model_load_end, scorer_load_end] ''' Run Inference on input audio file -@param mvs: mozilla voice stt object +@param ds: Deepspeech object @param audio: Input audio for running inference on @param fs: Sample rate of the input audio file @@ -36,14 +36,14 @@ Run Inference on input audio file Returns a list [Inference, Inference Time, Audio Length] ''' -def stt(mvs, audio, fs): +def stt(ds, audio, fs): inference_time = 0.0 audio_length = len(audio) * (1 / fs) - # Run mozilla voice stt + # Run Deepspeech logging.debug('Running inference...') inference_start = timer() - output = mvs.stt(audio) + output = ds.stt(audio) inference_end = timer() - inference_start inference_time += inference_end logging.debug('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length)) diff --git a/web_microphone_websocket/Readme.md b/web_microphone_websocket/Readme.md index 011f281..52a538a 100644 --- a/web_microphone_websocket/Readme.md +++ b/web_microphone_websocket/Readme.md @@ -1,7 +1,7 @@ # Web Microphone Websocket This is an example of a ReactJS web application streaming microphone audio from the browser -to a NodeJS server and transmitting the Mozilla Voice STT results back to the browser. +to a NodeJS server and transmitting the DeepSpeech results back to the browser. #### Download the pre-trained model (1.8GB): @@ -26,4 +26,4 @@ yarn start ``` node server.js -``` +``` \ No newline at end of file diff --git a/web_microphone_websocket/package.json b/web_microphone_websocket/package.json index 8ada91c..60fbac2 100644 --- a/web_microphone_websocket/package.json +++ b/web_microphone_websocket/package.json @@ -8,7 +8,7 @@ "@testing-library/user-event": "^7.1.2", "chai": "^4.2.0", "chai-http": "^4.3.0", - "@mozilla-voice/stt": "^0.9.0-alpha.5", + "deepspeech": "^0.8.0", "defaults": "^1.0.3", "mocha": "^6.1.4", "node-vad": "^1.1.4", diff --git a/web_microphone_websocket/public/index.html b/web_microphone_websocket/public/index.html index d4c2f56..ea9cce3 100644 --- a/web_microphone_websocket/public/index.html +++ b/web_microphone_websocket/public/index.html @@ -24,7 +24,7 @@ work correctly both with client-side routing and a non-root public URL. Learn how to configure a non-root public URL by running `npm run build`. --> - Mozilla Voice STT - Web Microphone Websocket Example + DeepSpeech - Web Microphone Websocket Example diff --git a/web_microphone_websocket/server.js b/web_microphone_websocket/server.js index 732bdb9..4f295fb 100644 --- a/web_microphone_websocket/server.js +++ b/web_microphone_websocket/server.js @@ -1,9 +1,9 @@ const http = require('http'); const socketIO = require('socket.io'); -const mozillaVoiceStt = require('@mozilla-voice/stt'); +const DeepSpeech = require('deepspeech'); const VAD = require('node-vad'); -let DEEPSPEECH_MODEL = __dirname + '/deepspeech-0.8.0-models'; // path to english model directory +let DEEPSPEECH_MODEL = __dirname + '/deepspeech-0.8.0-models'; // path to deepspeech english model directory let SILENCE_THRESHOLD = 200; // how many milliseconds of inactivity before processing the audio @@ -18,7 +18,7 @@ const vad = new VAD(VAD_MODE); function createModel(modelDir) { let modelPath = modelDir + '.pbmm'; let scorerPath = modelDir + '.scorer'; - let model = new mozillaVoiceStt.Model(modelPath); + let model = new DeepSpeech.Model(modelPath); model.enableExternalScorer(scorerPath); return model; } @@ -222,4 +222,4 @@ app.listen(SERVER_PORT, 'localhost', () => { console.log('Socket server listening on:', SERVER_PORT); }); -module.exports = app; +module.exports = app; \ No newline at end of file