Revert "Merge pull request #73 from lissyx/master-stt-rename"

This reverts commit adde02be16, reversing
changes made to 93914db805.
This commit is contained in:
Reuben Morais 2020-08-25 16:49:29 +02:00
Родитель 394c134b0c
Коммит 160b5700e6
80 изменённых файлов: 207 добавлений и 208 удалений

Просмотреть файл

@ -1,19 +1,17 @@
Mozilla Voice STT master Examples
=================================
DeepSpeech master Examples
==========================
These are various user-contributed examples on how to use or integrate Mozilla Voice STT using our packages.
These are various examples on how to use or integrate DeepSpeech using our packages.
It is a good way to just try out Mozilla Voice STT before learning how it works in detail, as well as a source of inspiration for ways you can integrate it into your application or solve common tasks like voice activity detection (VAD) or microphone streaming.
It is a good way to just try out DeepSpeech before learning how it works in detail, as well as a source of inspiration for ways you can integrate it into your application or solve common tasks like voice activity detection (VAD) or microphone streaming.
Please understand that those examples are provided as-is, with no guarantee it will work in every configuration.
Contributions are welcome!
Contributions like fixes to existing examples or new ones are welcome!
**Note:** These examples target DeepSpeech **master branch** only. If you're using a different release, you need to go to the corresponding branch for the release:
**Note:** These examples target Mozilla Voice STT **master branch** only. If you're using a different release, you need to go to the corresponding branch for the release:
* `v0.7.x <https://github.com/mozilla/STT-examples/tree/r0.7>`_
* `v0.6.x <https://github.com/mozilla/STT-examples/tree/r0.6>`_
* `master branch <https://github.com/mozilla/STT-examples/tree/master>`_
* `v0.7.x <https://github.com/mozilla/DeepSpeech-examples/tree/r0.7>`_
* `v0.6.x <https://github.com/mozilla/DeepSpeech-examples/tree/r0.6>`_
* `master branch <https://github.com/mozilla/DeepSpeech-examples/tree/master>`_
**List of examples**

Просмотреть файл

@ -1,6 +1,6 @@
# Android Microphone Streaming
Android demo application that streams audio from the microphone to mozilla voice stt and transcribes it.
Android demo application that streams audio from the microphone to deepspeech and transcribes it.
## Prerequisites
@ -16,7 +16,7 @@ Move the model files `deepspeech-0.8.0-models.pbmm`, `deepspeech-0.8.0-models.sc
Mind that the data directory will only be present after installing and launching the app once.
```
adb push deepspeech-0.8.0-models.tflite deepspeech-0.8.0-models.scorer /storage/emulated/0/Android/data/org.mozilla.voice.sttdemo/files/
adb push deepspeech-0.8.0-models.tflite deepspeech-0.8.0-models.scorer /storage/emulated/0/Android/data/org.deepspeechdemo/files/
```
You can also copy the files from your file browser to the device.
@ -49,4 +49,4 @@ Start recording by pressing the button and the app will transcribe the spoken te
Based on your use case or the language you are using you might change the values of `BEAM_WIDTH`, `LM_ALPHA` and `LM_BETA` to improve the speech recogintion.
You can also alter the `NUM_BUFFER_ELEMENTS` to change the size of the audio data buffer that is fed into the model.
You can also alter the `NUM_BUFFER_ELEMENTS` to change the size of the audio data buffer that is fed into the model.

Просмотреть файл

@ -8,7 +8,7 @@ android {
compileSdkVersion 29
buildToolsVersion "29.0.2"
defaultConfig {
applicationId "org.mozilla.voice.sttdemo"
applicationId "org.deepspeechdemo"
minSdkVersion 22
targetSdkVersion 29
versionCode 1
@ -34,7 +34,7 @@ dependencies {
implementation 'androidx.core:core-ktx:1.0.2'
implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
implementation 'org.mozilla.voice:stt:0.9.0-alpha.5'
implementation 'org.mozilla.deepspeech:libdeepspeech:0.8.0'
testImplementation 'junit:junit:4.12'
androidTestImplementation 'androidx.test.ext:junit:1.1.0'

Просмотреть файл

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.mozilla.voice.sttdemo">
package="org.deepspeechdemo">
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />

Просмотреть файл

@ -1,4 +1,4 @@
package org.mozilla.voice.sttdemo
package org.deepspeechdemo
import android.Manifest
import android.content.pm.PackageManager
@ -11,14 +11,14 @@ import android.view.View
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import kotlinx.android.synthetic.main.activity_main.*
import org.mozilla.voice.stt.MozillaVoiceSttModel.MozillaVoiceSttModel
import org.mozilla.voice.stt.MozillaVoiceSttModel.MozillaVoiceSttStreamingState
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState
import java.io.File
class MainActivity : AppCompatActivity() {
private var model: MozillaVoiceSttModel? = null
private var streamContext: MozillaVoiceSttStreamingState? = null
private var model: DeepSpeechModel? = null
private var streamContext: DeepSpeechStreamingState? = null
// Change the following parameters regarding
// what works best for your use case or your language.
@ -77,7 +77,7 @@ class MainActivity : AppCompatActivity() {
}
}
model = mozillaVoiceSttModel(tfliteModelPath)
model = DeepSpeechModel(tfliteModelPath)
model?.setBeamWidth(BEAM_WIDTH)
model?.enableExternalScorer(scorerPath)
model?.setScorerAlphaBeta(LM_ALPHA, LM_BETA)

Просмотреть файл

@ -1,3 +1,3 @@
<resources>
<string name="app_name">Mozilla Voice STT Demo</string>
<string name="app_name">DeepSpeech Demo</string>
</resources>

Просмотреть файл

@ -1,2 +1,2 @@
include ':app'
rootProject.name='MozillaVoiceSttDemo'
rootProject.name='DeepSpeechDemo'

Просмотреть файл

@ -134,7 +134,7 @@ Running via the GPU takes half the time of using the CPU and has good results.
It will then run the individual commands like :
`mozilla_voice_stt --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.scorer --audio 'C:\Users\jmike\Downloads\podcast\45374977-48000-2-24d9a365625bb.mp3.wav' --json`
`deepspeech --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.8.0-models.scorer --audio 'C:\Users\jmike\Downloads\podcast\45374977-48000-2-24d9a365625bb.mp3.wav' --json`
Websites referenced:
@ -154,4 +154,4 @@ https://hacks.mozilla.org/2019/12/deepspeech-0-6-mozillas-speech-to-text-engine/
https://palletsprojects.com/p/click/
https://www.howtoforge.com/tutorial/ffmpeg-audio-conversion/
https://www.joe0.com/2019/10/19/how-resolve-tensorflow-2-0-error-could-not-load-dynamic-library-cudart64_100-dll-dlerror-cudart64_100-dll-not-found/
https://www.programcreek.com/python/example/88033/click.Path
https://www.programcreek.com/python/example/88033/click.Path

Просмотреть файл

@ -61,7 +61,7 @@ def main(dirname, ext, model, scorer):
command = " ".join(
[
"mozilla_voice_stt",
"deepspeech",
"--model",
model,
"--scorer",

Просмотреть файл

@ -10,7 +10,7 @@ cachetools==4.1.0
certifi==2020.4.5.2
chardet==3.0.4
click==7.1.2
mozilla_voice_stt==0.9.0a5
deepspeech==0.8.0
delegator.py @ git+https://github.com/amitt001/delegator.py.git@194aa92543fbdbfbae0bcc24ca217819a7805da2
flask==1.1.2
gast==0.2.2

Просмотреть файл

@ -1 +1 @@
mozilla_voice_stt --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.scorer --audio C:\Users\jmike\Documents\Audacity\clip.wav --json
deepspeech --model C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.pbmm --scorer C:\Users\jmike\Documents\GitHub\DeepSpeech\deepspeech-0.7.3-models.scorer --audio C:\Users\jmike\Documents\Audacity\clip.wav --json

Просмотреть файл

@ -1,6 +1,6 @@
# Mozilla Voice STT Electron example
# DeepSpeech Electron example
This is an example of Mozilla Voice STT running in an Electron app with a ReactJS front-end and processing .wav files.
This is an example of DeepSpeech running in an Electron app with a ReactJS front-end and processing .wav files.
## Install
@ -66,6 +66,6 @@ Test the (dmg/appimage/exe) package file that has been generated in `/dist`.
The model files download to the following directories and must be deleted manually
- MacOSX: `~/Library/Application\ Support/mozilla_voice_stt-electron`
- Linux: `~/.config/mozilla_voice_stt-electron`
- Windows: `~/AppData/Roaming/mozilla_voice_stt-electron`
- MacOSX: `~/Library/Application\ Support/deepspeech-electron`
- Linux: `~/.config/deepspeech-electron`
- Windows: `~/AppData/Roaming/deepspeech-electron`

26
electron/package-lock.json сгенерированный
Просмотреть файл

@ -1,5 +1,5 @@
{
"name": "mozilla_voice_stt-electron",
"name": "deepspeech-electron",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
@ -1384,18 +1384,6 @@
"@types/yargs": "^13.0.0"
}
},
"@mozilla-voice/stt": {
"version": "0.9.0-alpha.5",
"resolved": "https://registry.npmjs.org/@mozilla-voice/stt/-/stt-0.9.0-alpha.5.tgz",
"integrity": "sha512-lyZmMnLKdmBzWonDazIvkbnyAlIXd5NDUaINf5wfOdsw4Rliv/hy/FjYdYN9Tccq4Zvcd+dbqgXGLeZuECGmIg==",
"requires": {
"argparse": "1.0.x",
"memory-stream": "1.0.x",
"node-pre-gyp": "0.15.x",
"node-wav": "0.0.2",
"sox-stream": "2.0.x"
}
},
"@mrmlnc/readdir-enhanced": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/@mrmlnc/readdir-enhanced/-/readdir-enhanced-2.2.1.tgz",
@ -4802,6 +4790,18 @@
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz",
"integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ="
},
"deepspeech": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/deepspeech/-/deepspeech-0.8.0.tgz",
"integrity": "sha512-jqU+NbXVZnS+okMgoiOhJz22RaHSmvIjmHaRu7IZ0xBDQbcqNGff4GXk4a5etfSXm3bXddRtBlfFr5KyQExjbw==",
"requires": {
"argparse": "1.0.x",
"memory-stream": "1.0.x",
"node-pre-gyp": "0.15.x",
"node-wav": "0.0.2",
"sox-stream": "2.0.x"
}
},
"default-gateway": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/default-gateway/-/default-gateway-4.2.0.tgz",

Просмотреть файл

@ -1,6 +1,6 @@
{
"name": "mozilla_voice_stt-electron",
"productName": "mozilla_voice_stt-electron",
"name": "deepspeech-electron",
"productName": "deepspeech-electron",
"version": "1.0.0",
"description": "My Electron application description",
"main": "public/electron.js",
@ -20,15 +20,15 @@
"postinstall": "electron-builder install-app-deps",
"homepage": "./",
"build": {
"appId": "mozilla_voice_stt-electron",
"productName": "mozilla_voice_stt-electron",
"appId": "deepspeech-electron",
"productName": "deepspeech-electron",
"files": [
"build/**/*",
"node_modules/**/*",
"package.json"
],
"buildDependenciesFromSource": true,
"artifactName": "mozilla_voice_stt-electron-${version}-${os}-${arch}.${ext}",
"artifactName": "deepspeech-electron-${version}-${os}-${arch}.${ext}",
"dmg": {
"title": "${productName}"
},
@ -52,7 +52,7 @@
},
"win": {
"target": "nsis",
"artifactName": "mozilla_voice_stt-electron-${version}-${os}-${arch}.${ext}"
"artifactName": "deepspeech-electron-${version}-${os}-${arch}.${ext}"
},
"linux": {
"target": [
@ -66,7 +66,7 @@
"keywords": [],
"license": "MIT",
"dependencies": {
"@mozilla-voice/stt": "^0.9.0-alpha.5",
"deepspeech": "^0.8.0",
"electron-is-dev": "^1.1.0",
"lodash": "^4.17.15",
"node-abi": "^2.18.0",

Просмотреть файл

@ -35,7 +35,7 @@ function createWindow(model) {
app.quit()
});
// message from front-end App.js, request that this file be processed by Mozilla Voice STT
// message from front-end App.js, request that this file be processed by DeepSpeech
ipcMain.handle('recognize-wav', async function (event, file) {
const filePath = path.resolve(__dirname, 'audio', file);
const results = await recognizeWav(filePath, model);

Просмотреть файл

@ -8,12 +8,12 @@ const {getModel} = require('./recognize-wav');
let appDataPath;
if (fs.existsSync(path.resolve(__dirname, '../models/deepspeech-0.8.0-models.pbmm'))) {
// if the model was found at the root, use that directory
// if the deepspeech model was found at the root, use that directory
appDataPath = path.resolve(__dirname, '../models');
}
else {
// otherwise use the electron "appData" path
appDataPath = path.resolve(electron.app.getPath('appData'), 'mozilla_voice_stt-electron');
appDataPath = path.resolve(electron.app.getPath('appData'), 'deepspeech-electron');
}
app.on('ready', function () {

Просмотреть файл

@ -25,7 +25,7 @@
Learn how to configure a non-root public URL by running `npm run build`.
-->
<link rel="stylesheet" href="fonts/stylesheet.css" type="text/css" charset="utf-8" />
<title>Mozilla Voice STT Electron Example</title>
<title>DeepSpeech Electron Example</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>

Просмотреть файл

@ -1,10 +1,10 @@
const mozillaVoiceStt = require('@mozilla-voice/stt');
const DeepSpeech = require('deepspeech');
const fs = require('fs');
const path = require('path');
const wav = require('wav');
const download = require('./download');
// return the model or download it if it is not found
// return the deepspeech model or download it if it is not found
function getModel(appDataPath, callback) {
let modelPath = path.resolve(appDataPath, 'deepspeech-0.8.0-models.pbmm');
let scorerPath = path.resolve(appDataPath, 'deepspeech-0.8.0-models.scorer');
@ -23,14 +23,14 @@ function getModel(appDataPath, callback) {
}
}
// create the model
// create the deepspeech model
function createModel(modelPath, scorerPath) {
const model = new mozillaVoiceStt.Model(modelPath);
const model = new DeepSpeech.Model(modelPath);
model.enableExternalScorer(scorerPath);
return model;
}
// create a stream to process a .wav file
// create a deepspeech stream to process a .wav file
function recognizeWav(path, model) {
return new Promise(function(resolve, reject) {
try {

Просмотреть файл

@ -21,7 +21,7 @@ class App extends Component {
files
}, () => {
files.forEach(file => {
// request that each file be processed by mozilla voice stt
// request that each file be processed by deepspeech
console.log('recognize', file);
window.ipcRenderer.invoke('recognize-wav', file).then(result => {
// add the recognition results to this.state.results

Просмотреть файл

@ -1,6 +1,6 @@
# FFmpeg VAD Streaming
Streaming inference from arbitrary source (FFmpeg input) to Mozilla Voice STT, using VAD (voice activity detection). A fairly simple example demonstrating the Mozilla Voice STT streaming API in Node.js.
Streaming inference from arbitrary source (FFmpeg input) to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Node.js.
This example was successfully tested with a mobile phone streaming a live feed to a RTMP server (nginx-rtmp), which then could be used by this script for near real time speech recognition.
@ -31,7 +31,7 @@ node ./index.js --audio rtmp://<IP>:1935/live/teststream \
```
## Examples
Real time streaming inference with Mozilla Voice STT's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)).
Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)).
```bash
node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
--scorer $HOME/models/kenlm.scorer \

Просмотреть файл

@ -1,7 +1,7 @@
#!/usr/bin/env node
const VAD = require("node-vad");
const mVS = require('@mozilla-voice/stt');
const Ds = require('deepspeech');
const argparse = require('argparse');
const util = require('util');
const { spawn } = require('child_process');
@ -15,11 +15,11 @@ let VersionAction = function VersionAction(options) {
util.inherits(VersionAction, argparse.Action);
VersionAction.prototype.call = function(parser) {
mVS.printVersions();
Ds.printVersions();
process.exit(0);
};
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running Mozilla Voice STT inference.'});
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
parser.addArgument(['--scorer'], {help: 'Path to the scorer file', nargs: '?'});
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
@ -32,7 +32,7 @@ function totalTime(hrtimeValue) {
console.error('Loading model from file %s', args['model']);
const model_load_start = process.hrtime();
let model = new mVS.Model(args['model']);
let model = new Ds.Model(args['model']);
const model_load_end = process.hrtime(model_load_start);
console.error('Loaded model in %ds.', totalTime(model_load_end));

Просмотреть файл

@ -8,7 +8,7 @@
},
"dependencies": {
"argparse": "^1.0.10",
"@mozilla-voice/stt": "0.9.0-alpha.5",
"deepspeech": "0.8.0",
"node-vad": "^1.1.1",
"util": "^0.11.1"
},

Просмотреть файл

@ -1,7 +1,8 @@
Microphone VAD Streaming
========================
Stream from microphone to Mozilla Voice STT, using VAD (voice activity detection). A fairly simple example demonstrating the Mozilla Voice STT streaming API in Python. Also useful for quick, real-time testing of models and decoding parameters.
Stream from microphone to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Python. Also useful for quick, real-time testing of models and decoding parameters.
Installation
------------
@ -31,7 +32,7 @@ Usage
[-w SAVEWAV] [-f FILE] -m MODEL [-s SCORER]
[-d DEVICE] [-r RATE]
Stream from microphone to Mozilla Voice STT using VAD
Stream from microphone to DeepSpeech using VAD
optional arguments:
-h, --help show this help message and exit

Просмотреть файл

@ -60,7 +60,7 @@ class Audio(object):
"""
Microphone may not support our native processing sampling rate, so
resample from input_rate to RATE_PROCESS here for webrtcvad and
mozilla_voice_stt
deepspeech
Args:
data (binary): Input audio stream
@ -152,7 +152,7 @@ class VADAudio(Audio):
ring_buffer.clear()
def main(ARGS):
# Load model
# Load DeepSpeech model
if os.path.isdir(ARGS.model):
model_dir = ARGS.model
ARGS.model = os.path.join(model_dir, 'output_graph.pb')
@ -173,7 +173,7 @@ def main(ARGS):
print("Listening (ctrl-C to exit)...")
frames = vad_audio.vad_collector()
# Stream from microphone to Mozilla Voice STT using VAD
# Stream from microphone to DeepSpeech using VAD
spinner = None
if not ARGS.nospinner:
spinner = Halo(spinner='line')
@ -199,7 +199,7 @@ if __name__ == '__main__':
DEFAULT_SAMPLE_RATE = 16000
import argparse
parser = argparse.ArgumentParser(description="Stream from microphone to Mozilla Voice STT using VAD")
parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD")
parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3,
help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3")

Просмотреть файл

@ -1,4 +1,4 @@
mozilla_voice_stt~=0.9.0a5
deepspeech~=0.8.0
pyaudio~=0.2.11
webrtcvad~=2.0.10
halo~=0.0.18

Просмотреть файл

@ -8,7 +8,7 @@ pushd ${THIS}
source ../tests.sh
pip install --user $(get_python_wheel_url "$1")
pip install --user -r <(grep -v mozilla_voice_stt requirements.txt)
pip install --user -r <(grep -v deepspeech requirements.txt)
pulseaudio &

Просмотреть файл

@ -1,8 +1,8 @@
<Application
x:Class="MozillaVoiceSttWPF.App"
x:Class="DeepSpeechWPF.App"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="clr-namespace:MozillaVoiceSttWPF"
xmlns:local="clr-namespace:DeepSpeechWPF"
StartupUri="MainWindow.xaml">
<Application.Resources />
</Application>

Просмотреть файл

@ -1,10 +1,10 @@
using CommonServiceLocator;
using MozillaVoiceStt.WPF.ViewModels;
using MozillaVoiceSttClient.Interfaces;
using DeepSpeech.WPF.ViewModels;
using DeepSpeechClient.Interfaces;
using GalaSoft.MvvmLight.Ioc;
using System.Windows;
namespace MozillaVoiceSttWPF
namespace DeepSpeechWPF
{
/// <summary>
/// Interaction logic for App.xaml
@ -18,11 +18,11 @@ namespace MozillaVoiceSttWPF
try
{
//Register instance of Mozilla Voice STT
MozillaVoiceSttClient.MozillaVoiceStt mozillaVoiceSttClient =
new MozillaVoiceSttClient.MozillaVoiceSttModel("deepspeech-0.8.0-models.pbmm");
//Register instance of DeepSpeech
DeepSpeechClient.DeepSpeech deepSpeechClient =
new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm");
SimpleIoc.Default.Register<IMozillaVoiceStt>(() => mozillaVoiceSttClient);
SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
SimpleIoc.Default.Register<MainWindowViewModel>();
}
catch (System.Exception ex)
@ -35,8 +35,8 @@ namespace MozillaVoiceSttWPF
protected override void OnExit(ExitEventArgs e)
{
base.OnExit(e);
//Dispose instance of MozillaVoiceStt
ServiceLocator.Current.GetInstance<IMozillaVoiceStt>()?.Dispose();
//Dispose instance of DeepSpeech
ServiceLocator.Current.GetInstance<IDeepSpeech>()?.Dispose();
}
}
}

Просмотреть файл

@ -6,8 +6,8 @@
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{54BFD766-4305-4F4C-BA59-AF45505DF3C1}</ProjectGuid>
<OutputType>WinExe</OutputType>
<RootNamespace>MozillaVoiceStt.WPF</RootNamespace>
<AssemblyName>MozillaVoiceStt.WPF</AssemblyName>
<RootNamespace>DeepSpeech.WPF</RootNamespace>
<AssemblyName>DeepSpeech.WPF</AssemblyName>
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
@ -131,10 +131,10 @@
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\ds\native_client\dotnet\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj">
<ProjectReference Include="..\..\..\ds\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj">
<Project>{56de4091-bbbe-47e4-852d-7268b33b971f}</Project>
<Name>MozillaVoiceSttClient</Name>
<Name>DeepSpeechClient</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>
</Project>

Просмотреть файл

@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28307.421
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceStt.WPF", "MozillaVoiceStt.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttClient", "..\..\..\ds\native_client\dotnet\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\..\..\ds\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution

Просмотреть файл

@ -1,10 +1,10 @@
<Window
x:Class="MozillaVoiceSttWPF.MainWindow"
x:Class="DeepSpeechWPF.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
Title="Mozilla Voice STT client"
Title="Deepspeech client"
Width="800"
Height="600"
Loaded="Window_Loaded"

Просмотреть файл

@ -1,8 +1,8 @@
using CommonServiceLocator;
using MozillaVoiceStt.WPF.ViewModels;
using DeepSpeech.WPF.ViewModels;
using System.Windows;
namespace MozillaVoiceSttWPF
namespace DeepSpeechWPF
{
/// <summary>
/// Interaction logic for MainWindow.xaml

Просмотреть файл

@ -7,11 +7,11 @@ using System.Windows;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("MozillaVoiceStt.WPF")]
[assembly: AssemblyTitle("DeepSpeech.WPF")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("MozillaVoiceStt.WPF.SingleFiles")]
[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")]
[assembly: AssemblyCopyright("Copyright © 2018")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

Просмотреть файл

@ -8,7 +8,7 @@
// </auto-generated>
//------------------------------------------------------------------------------
namespace MozillaVoiceStt.WPF.Properties {
namespace DeepSpeech.WPF.Properties {
using System;
@ -39,7 +39,7 @@ namespace MozillaVoiceStt.WPF.Properties {
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MozillaVoiceStt.WPF.Properties.Resources", typeof(Resources).Assembly);
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;

Просмотреть файл

@ -8,7 +8,7 @@
// </auto-generated>
//------------------------------------------------------------------------------
namespace MozillaVoiceStt.WPF.Properties {
namespace DeepSpeech.WPF.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]

Просмотреть файл

@ -3,7 +3,7 @@ using System.Collections.Generic;
using System.ComponentModel;
using System.Runtime.CompilerServices;
namespace MozillaVoiceStt.WPF.ViewModels
namespace DeepSpeech.WPF.ViewModels
{
/// <summary>
/// Implementation of <see cref="INotifyPropertyChanged"/> to simplify models.

Просмотреть файл

@ -3,8 +3,8 @@ using CSCore;
using CSCore.CoreAudioAPI;
using CSCore.SoundIn;
using CSCore.Streams;
using MozillaVoiceSttClient.Interfaces;
using MozillaVoiceSttClient.Models;
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
using GalaSoft.MvvmLight.CommandWpf;
using Microsoft.Win32;
using System;
@ -15,7 +15,7 @@ using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace MozillaVoiceStt.WPF.ViewModels
namespace DeepSpeech.WPF.ViewModels
{
/// <summary>
/// View model of the MainWindow View.
@ -27,7 +27,7 @@ namespace MozillaVoiceStt.WPF.ViewModels
private const string ScorerPath = "kenlm.scorer";
#endregion
private readonly IMozillaVoiceSttClient _sttClient;
private readonly IDeepSpeech _sttClient;
#region Commands
/// <summary>
@ -62,7 +62,7 @@ namespace MozillaVoiceStt.WPF.ViewModels
/// <summary>
/// Stream used to feed data into the acoustic model.
/// </summary>
private MozillaVoiceSttStream _sttStream;
private DeepSpeechStream _sttStream;
/// <summary>
/// Records the audio of the selected device.
@ -75,7 +75,7 @@ namespace MozillaVoiceStt.WPF.ViewModels
private SoundInSource _soundInSource;
/// <summary>
/// Target wave source.(16KHz Mono 16bit for MozillaVoiceStt)
/// Target wave source.(16KHz Mono 16bit for DeepSpeech)
/// </summary>
private IWaveSource _convertedSource;
@ -200,7 +200,7 @@ namespace MozillaVoiceStt.WPF.ViewModels
#endregion
#region Ctors
public MainWindowViewModel(IMozillaVoiceStt sttClient)
public MainWindowViewModel(IDeepSpeech sttClient)
{
_sttClient = sttClient;
@ -290,7 +290,7 @@ namespace MozillaVoiceStt.WPF.ViewModels
//read data from the converedSource
//important: don't use the e.Data here
//the e.Data contains the raw data provided by the
//soundInSource which won't have the model required audio format
//soundInSource which won't have the deepspeech required audio format
byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2];
int read;
@ -422,4 +422,4 @@ namespace MozillaVoiceStt.WPF.ViewModels
}
}
}
}
}

Просмотреть файл

@ -8,29 +8,29 @@ Only difference for both OS is the library used for gathering audio data from
Interface to both the libs is provided through NIM code.
## PREREQUISITIES :
* ```libmozilla_voice_stt.so```
* ```libdeepspeech.so```
Go to the [releases](https://github.com/mozilla/DeepSpeech/releases/tag/v0.8.0) page and download the native client package based on your OS and CPU architecture.
Extract the ``libmozilla_voice_stt.so`` and put into the subdirectory depending on OS of native Client used.
Extract the ``libdeepspeech.so`` and put into the subdirectory depending on OS of native Client used.
#### On WINDOWS:
* Download the ```native.client.amd64.win.tar.xz ``` package . [ same is true for ``xx.xx.amd64.cuda.win.xx`` if CUDA installed or ``xx.xx.amd64.tflite.win.xx``]
* Extract and place the ```libmozilla_voice_stt.so``` in ```win_nim_vad_streaming``` subdirectory
* Extract and place the ```libdeepspeech.so``` in ```win_nim_vad_streaming``` subdirectory
* Now see ``README.md`` in ```win_nim_vad_streaming``` subdirectory.
#### On LINUX:
* Download the ```native_client.amd64.linux.cpu ``` package .[ same is true for ``xx.xx.amd64.cuda.linux.xx`` is CUDA installed or ``xx.xx.amd64.tflite.linux.xx``]
* Extract and place the ```libmozilla_voice_stt.so``` in ```linux_nim_vad_streaming``` subdirectory
* Extract and place the ```libdeepspeech.so``` in ```linux_nim_vad_streaming``` subdirectory
* Now see ``README.md`` in ```linux_nim_vad_streaming``` subdirectory.
_Note: One can put ``libmozilla_voice_stt.so`` in the system's PATH rather than copying it to one of subdirectories for easy usage._
_Note: One can put ``libdeepspeech.so`` in the system's PATH rather than copying it to one of subdirectories for easy usage._
## NOTE:
Used NIM code only depends on the shared library(``libmozilla_voice_stt.so``) used.
Given one has downloaded the native client package and extracted the ``libmozilla_voice_stt.so`` shared library and copied it to one of the subdirectories or in system's PATH ,Code can be modified to add more functionalities in pure NIM and modified code would compile on any platform as long as that platform is supported by NIM.
Used NIM code only depends on the shared library(``libdeepspeech.so``) used.
Given one has downloaded the native client package and extracted the ``libdeepspeech.so`` shared library and copied it to one of the subdirectories or in system's PATH ,Code can be modified to add more functionalities in pure NIM and modified code would compile on any platform as long as that platform is supported by NIM.

Просмотреть файл

@ -1,15 +1,15 @@
# MICROPHONE VAD STREAMING
Minimalistic example to demonstrate the Mozilla Voice STT streaming API in NIM.Raw audio is streamed from microphone to the Mozilla Voice STT based on VAD (voice Activity Detection).
Minimalistic example to demonstrate the DeepSpeech streaming API in NIM.Raw audio is streamed from microphone to the DeepSpeech based on VAD (voice Activity Detection).
## Prerequisites:
0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libmozilla_voice_stt.so`` shared library.
0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libdeepspeech.so`` shared library.
1) This example depends on the ``libasound.so``(which is distributed along with all major linux distros and present in linker's default path)
_Note: You may need to install ``libasound.so`` if not found_
```
sudo apt-get install libasound2
```
2) Download the pre-trained Mozilla Voice STT english model (1089MB) and Scorer Package(~900MB):
2) Download the pre-trained DeepSpeech english model (1089MB) and Scorer Package(~900MB):
```
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.0/deepspeech-0.8.0-models.pbmm

Просмотреть файл

@ -1,6 +1,6 @@
import os ,deques,math,strutils,parseopt,tables,strformat
import alsa,webrtcvad,wav
import mozilla_voice_stt
import deepspeech
var
args = initTable[string, string]()
@ -28,8 +28,8 @@ let
hw_params: snd_pcm_hw_params_ref = nil
device_name = "plughw:0,0" #PCM hardware alsa Device.
size = (int((frameDuration*int(rate))/1000))
modelPtr: ModelState = nil #mozilla Voice Stt model
deepStreamPtr: StreamingState = nil #mozilla Voice Stt model stream
modelPtr: ModelState = nil #deepSpeech model
deepStreamPtr: StreamingState = nil #deepSpeech model stream
modelPath = args["model"]
var
@ -40,7 +40,7 @@ var
framesLen: clong
vad:vadObj #VAD Object declaration
codeV: cint #to hold the error codes for VAD.
codeD: cint #to hold the error codes for mozilla Voice Stt
codeD: cint #to hold the error codes for deepSpeech
#to get the data from the channel.
frame : seq[int16]
buff = initDeque[tuple[data: seq[int16],flag:int32]](nextPowerOfTwo(windowSize))
@ -187,4 +187,4 @@ while true:
#joinThread(thread)
#echo("Thread finished..")
#echo("Thread finished..")

Просмотреть файл

@ -1,11 +1,11 @@
# MICROPHONE VAD STREAMING
Minimalistic example to demonstrate the Mozilla Voice STT streaming API in NIM.Raw audio is streamed from microphone to the Mozilla Voice STT based on VAD (voice Activity Detection).
Minimalistic example to demonstrate the DeepSpeech streaming API in NIM.Raw audio is streamed from microphone to the DeepSpeech based on VAD (voice Activity Detection).
## Prerequisites:
0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libmozilla_voice_stt.so`` shared library.
0) Please read ``PREREQUISITES`` in [README](../README.md) for getting the required ``libdeepspeech.so`` shared library.
1) This example depends on the ``libportaudio.dll``(precompiled portaudio library).Make sure you have this library in PATH.If you don't have one or are unable to build one ,you can get one from [here](https://gitlab.com/eagledot/nim-portaudio/lib).
2) Download the pre-trained Mozilla Voice STT english model (1089MB):
2) Download the pre-trained DeepSpeech english model (1089MB):
```
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.0/deepspeech-0.8.0-models.pbmm

Просмотреть файл

@ -1,6 +1,6 @@
import os ,deques,math,strutils,parseopt,tables
import strformat
import webrtcvad,portaudio,mozilla_voice_stt,wav
import webrtcvad,portaudio,deepspeech,wav
proc sum[T](temp: Deque[T]): int =
@ -47,8 +47,8 @@ let
f1 = open("FIFO_rgb",fmWrite)
f2 = open("FIFO_rgb",fmREAD)
stream: pointer = nil #portaudio Stream pointer holder.
modelPtr: ModelState = nil #mozilla Voice Stt model
deepStreamPtr: StreamingState = nil #mozilla Voice Stt model stream
modelPtr: ModelState = nil #deepSpeech model
deepStreamPtr: StreamingState = nil #deepSpeech model stream
modelPath = args["model"]
if "scorer" in args:
scorerPath = args["scorer"]
@ -68,7 +68,7 @@ when isMainModule:
codeV = setMode(vad,3'i32)
assert codeV == 0'i32
#Mozilla Voice STT model initialization.
#DeepSpeech model initialization.
codeD = createModel(modelPath,unsafeaddr(modelPtr))
if codeD == 0'i32:
echo("Model Created Successfully")

Просмотреть файл

@ -1,14 +1,14 @@
# NodeJS Microphone VAD Streaming
This is a NodeJS example of recording from the microphone and streaming to
Mozilla Voice STT with voice activity detection.
DeepSpeech with voice activity detection.
### Prerequisites:
1) The example utilized the [mic](https://github.com/ashishbajaj99/mic) NPM module which requires
either [sox](http://sox.sourceforge.net/) (Windows/Mac) or [arecord](http://alsa-project.org/) (Linux).
2) Download the pre-trained Mozilla Voice STT english model (1089MB):
2) Download the pre-trained DeepSpeech english model (1089MB):
```
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.0/deepspeech-0.8.0-models.pbmm
@ -35,10 +35,10 @@ npm install
node start.js
```
#### Specify alternate Mozilla Voice STT model path:
#### Specify alternate DeepSpeech model path:
Use the `DEEPSPEECH_MODEL` environment variable to change models.
```
DEEPSPEECH_MODEL=~/dev/jaxcore/deepspeech-0.8.0-models/ node start.js
```
```

Просмотреть файл

@ -3,7 +3,7 @@
"version": "0.1.0",
"private": true,
"dependencies": {
"@mozilla-voice/stt": "^0.9.0-alpha.5",
"deepspeech": "^0.8.0",
"mic": "^2.1.2",
"node-vad": "^1.1.4",
"speaker": "^0.5.1",

Просмотреть файл

@ -1,11 +1,11 @@
const mozillaVoiceStt = require('@mozilla-voice/stt');
const DeepSpeech = require('deepspeech');
const VAD = require('node-vad');
const mic = require('mic');
const fs = require('fs');
const wav = require('wav');
const Speaker = require('speaker');
let DEEPSPEECH_MODEL; // path to model directory
let DEEPSPEECH_MODEL; // path to deepspeech model directory
if (process.env.DEEPSPEECH_MODEL) {
DEEPSPEECH_MODEL = process.env.DEEPSPEECH_MODEL;
}
@ -24,7 +24,7 @@ const vad = new VAD(VAD_MODE);
function createModel(modelDir) {
let modelPath = modelDir + '.pbmm';
let scorerPath = modelDir + '.scorer';
let model = new mozillaVoiceStt.Model(modelPath);
let model = new DeepSpeech.Model(modelPath);
model.enableExternalScorer(scorerPath);
return model;
}

Просмотреть файл

@ -1,4 +1,4 @@
# NodeJS voice recognition example using Mozilla Mozilla Voice STT
# NodeJS voice recognition example using Mozilla DeepSpeech
Download the pre-trained model (1.8GB):

Просмотреть файл

@ -1,4 +1,4 @@
const mozillaVoiceStt = require('@mozilla-voice/stt');
const DeepSpeech = require('deepspeech');
const Fs = require('fs');
const Sox = require('sox-stream');
const MemoryStream = require('memory-stream');
@ -7,7 +7,7 @@ const Wav = require('node-wav');
let modelPath = './models/deepspeech-0.8.0-models.pbmm';
let model = new mozillaVoiceStt.Model(modelPath);
let model = new DeepSpeech.Model(modelPath);
let desiredSampleRate = model.sampleRate();

Просмотреть файл

@ -1,5 +1,5 @@
{
"name": "mozilla_voice_stt-nodejs_wav",
"name": "deepspeech-nodejs_wav",
"version": "1.0.0",
"description": "Simple audio processing",
"main": "index.js",
@ -8,7 +8,7 @@
},
"dependencies": {
"argparse": "^1.0.10",
"@mozilla-voice/stt": "0.9.0-alpha.5",
"deepspeech": "0.8.0",
"node-wav": "0.0.2",
"sox-stream": "^2.0.3",
"util": "^0.11.1"

Просмотреть файл

@ -19,5 +19,5 @@ get_python_wheel_url()
get_npm_package_url()
{
echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${DEP_TASK_ID}/artifacts/public/mozilla-voice-stt-${DS_VERSION}.tgz"
echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${DEP_TASK_ID}/artifacts/public/mozilla_voice_stt-${DS_VERSION}.tgz"
}

Просмотреть файл

@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.29519.87
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttUWP", "MozillaVoiceSttUWP\MozillaVoiceSttUWP.csproj", "{49AAC24D-6A76-4910-913A-94D2D67B6226}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechUWP", "DeepSpeechUWP\DeepSpeechUWP.csproj", "{49AAC24D-6A76-4910-913A-94D2D67B6226}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution

Просмотреть файл

Просмотреть файл

@ -1,7 +1,7 @@
<Application
x:Class="MozillaVoiceSttUWP.App"
x:Class="DeepSpeechUWP.App"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="using:MozillaVoiceSttUWP">
xmlns:local="using:DeepSpeechUWP">
</Application>

Просмотреть файл

@ -16,7 +16,7 @@ using Windows.UI.Xaml.Input;
using Windows.UI.Xaml.Media;
using Windows.UI.Xaml.Navigation;
namespace MozillaVoiceSttUWP
namespace DeepSpeechUWP
{
/// <summary>
/// Provides application-specific behavior to supplement the default Application class.

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 1.4 KiB

После

Ширина:  |  Высота:  |  Размер: 1.4 KiB

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 7.5 KiB

После

Ширина:  |  Высота:  |  Размер: 7.5 KiB

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 2.9 KiB

После

Ширина:  |  Высота:  |  Размер: 2.9 KiB

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 1.6 KiB

После

Ширина:  |  Высота:  |  Размер: 1.6 KiB

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 1.2 KiB

После

Ширина:  |  Высота:  |  Размер: 1.2 KiB

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 1.4 KiB

После

Ширина:  |  Высота:  |  Размер: 1.4 KiB

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 3.1 KiB

После

Ширина:  |  Высота:  |  Размер: 3.1 KiB

Просмотреть файл

@ -7,8 +7,8 @@
<ProjectGuid>{49AAC24D-6A76-4910-913A-94D2D67B6226}</ProjectGuid>
<OutputType>AppContainerExe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>MozillaVoiceSttUWP</RootNamespace>
<AssemblyName>MozillaVoiceSttUWP</AssemblyName>
<RootNamespace>DeepSpeechUWP</RootNamespace>
<AssemblyName>DeepSpeechUWP</AssemblyName>
<DefaultLanguage>en-US</DefaultLanguage>
<TargetPlatformIdentifier>UAP</TargetPlatformIdentifier>
<TargetPlatformVersion Condition=" '$(TargetPlatformVersion)' == '' ">10.0.18362.0</TargetPlatformVersion>
@ -158,14 +158,14 @@
</Page>
</ItemGroup>
<ItemGroup>
<PackageReference Include="MozillaVoiceSttClient">
<Version>0.9.0-alpha.5</Version>
<PackageReference Include="DeepSpeech">
<Version>0.8.0</Version>
</PackageReference>
<PackageReference Include="Microsoft.NETCore.UniversalWindowsPlatform">
<Version>6.2.9</Version>
</PackageReference>
<Reference Include="MozillaVoiceSttClient, Version=1.0.0.0, Culture=neutral, processorArchitecture=AMD64">
<HintPath>..\packages\MozillaVoiceSttClient.0.9.0-alpha.5\lib\net46\MozillaVoiceSttClient.dll</HintPath>
<Reference Include="DeepSpeechClient, Version=1.0.0.0, Culture=neutral, processorArchitecture=AMD64">
<HintPath>..\packages\DeepSpeech.0.8.0\lib\net46\DeepSpeechClient.dll</HintPath>
</Reference>
</ItemGroup>
<ItemGroup>
@ -187,4 +187,4 @@
<Target Name="AfterBuild">
</Target>
-->
</Project>
</Project>

Просмотреть файл

@ -1,8 +1,8 @@
<Page
x:Class="MozillaVoiceSttUWP.MainPage"
x:Class="DeepSpeechUWP.MainPage"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="using:MozillaVoiceSttUWP"
xmlns:local="using:DeepSpeechUWP"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d"
@ -15,7 +15,7 @@
<RowDefinition Height="40" />
</Grid.RowDefinitions>
<StackPanel Grid.Row="0">
<TextBlock FontSize="30" FontWeight="Bold">MozillaVoiceStt UWP Demo</TextBlock>
<TextBlock FontSize="30" FontWeight="Bold">DeepSpeech UWP Demo</TextBlock>
<TextBlock FontSize="20" Margin="0 4 0 40">powered by Audio Graph API</TextBlock>
<TextBlock Margin="0 0 0 10">Select an audio for transcription:</TextBlock>
<Grid ColumnSpacing="10">

Просмотреть файл

@ -1,5 +1,5 @@
using MozillaVoiceSttClient.Interfaces;
using MozillaVoiceSttClient.Models;
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
using System;
using System.Collections.Concurrent;
using System.Diagnostics;
@ -17,7 +17,7 @@ using Windows.Storage;
using Windows.UI.Xaml;
using Windows.UI.Xaml.Controls;
namespace MozillaVoiceSttUWP
namespace DeepSpeechUWP
{
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
@ -34,8 +34,8 @@ namespace MozillaVoiceSttUWP
private StorageFile audioFile;
private DeviceInformation selectedInputDevice;
private DeviceInformationCollection inputDevices;
private IMozillaVoiceSttClient client;
private MozillaVoiceSttStream stream;
private IDeepSpeech client;
private DeepSpeechStream stream;
private MediaEncodingProfile encoding;
private AudioGraph graph;
@ -44,7 +44,7 @@ namespace MozillaVoiceSttUWP
public MainPage()
{
this.InitializeComponent();
InitMozillaVoiceStt();
InitDeepSpeech();
ListAudioInputDevices();
InitAudioGraph();
}
@ -61,14 +61,14 @@ namespace MozillaVoiceSttUWP
}
}
private void InitMozillaVoiceStt()
private void InitDeepSpeech()
{
string projectFolder = Directory.GetCurrentDirectory();
string modelsFolder = Path.Combine(projectFolder, "models");
string acousticModelPath = Path.Combine(modelsFolder, "deepspeech-0.8.0-models.pbmm");
string scorerPath = Path.Combine(modelsFolder, "deepspeech-0.8.0-models.scorer");
client = new MozillaVoiceSttClient.MozillaVoiceSttModel(acousticModelPath);
client = new DeepSpeechClient.DeepSpeech(acousticModelPath);
client.EnableExternalScorer(scorerPath);
}

Просмотреть файл

@ -14,7 +14,7 @@
<mp:PhoneIdentity PhoneProductId="a79d1931-db08-441d-b5ce-1c9cf6b1c8ff" PhonePublisherId="00000000-0000-0000-0000-000000000000"/>
<Properties>
<DisplayName>MozillaVoiceSttUWP</DisplayName>
<DisplayName>DeepSpeechUWP</DisplayName>
<PublisherDisplayName>erikz</PublisherDisplayName>
<Logo>Assets\StoreLogo.png</Logo>
</Properties>
@ -30,12 +30,12 @@
<Applications>
<Application Id="App"
Executable="$targetnametoken$.exe"
EntryPoint="MozillaVoiceSttUWP.App">
EntryPoint="DeepSpeechUWP.App">
<uap:VisualElements
DisplayName="MozillaVoiceSttUWP"
DisplayName="DeepSpeechUWP"
Square150x150Logo="Assets\Square150x150Logo.png"
Square44x44Logo="Assets\Square44x44Logo.png"
Description="MozillaVoiceSttUWP"
Description="DeepSpeechUWP"
BackgroundColor="transparent">
<uap:DefaultTile Wide310x150Logo="Assets\Wide310x150Logo.png"/>
<uap:SplashScreen Image="Assets\SplashScreen.png" />

Просмотреть файл

@ -5,11 +5,11 @@ using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("MozillaVoiceSttUWP")]
[assembly: AssemblyTitle("DeepSpeechUWP")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("MozillaVoiceSttUWP")]
[assembly: AssemblyProduct("DeepSpeechUWP")]
[assembly: AssemblyCopyright("Copyright © 2020")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

Просмотреть файл

@ -20,7 +20,7 @@ def main(args):
parser.add_argument('--model', required=True,
help='Path to directory that contains all model files (output_graph and scorer)')
parser.add_argument('--stream', required=False, action='store_true',
help='To use mozilla voice stt streaming interface')
help='To use deepspeech streaming interface')
args = parser.parse_args()
if args.stream is True:
print("Opening mic for streaming")
@ -52,7 +52,7 @@ def main(args):
logging.debug("Saving Transcript @: %s" % waveFile.rstrip(".wav") + ".txt")
for i, segment in enumerate(segments):
# Run mozilla voice stt on the chunk that just completed VAD
# Run deepspeech on the chunk that just completed VAD
logging.debug("Processing chunk %002d" % (i,))
audio = np.frombuffer(segment, dtype=np.int16)
output = wavTranscriber.stt(model_retval[0], audio, sample_rate)

Просмотреть файл

@ -93,7 +93,7 @@ class App(QMainWindow):
def __init__(self):
super().__init__()
self.title = 'Mozilla Voice STT Transcriber'
self.title = 'Deepspeech Transcriber'
self.left = 10
self.top = 10
self.width = 480
@ -114,7 +114,7 @@ class App(QMainWindow):
self.browseButton = QPushButton('Browse', self)
self.browseButton.setToolTip('Select a wav file')
self.modelsButton = QPushButton('Browse', self)
self.modelsButton.setToolTip('Select models folder')
self.modelsButton.setToolTip('Select deepspeech models folder')
self.transcribeWav = QPushButton('Transcribe Wav', self)
self.transcribeWav.setToolTip('Start Wav Transcription')
self.openMicrophone = QPushButton('Start Speaking', self)
@ -205,7 +205,7 @@ class App(QMainWindow):
@pyqtSlot()
def models_on_click(self):
logging.debug('Models Browse Button clicked')
self.dirName = QFileDialog.getExistingDirectory(self, "Select models directory")
self.dirName = QFileDialog.getExistingDirectory(self, "Select deepspeech models directory")
if self.dirName:
self.modelsBox.setText(self.dirName)
logging.debug(self.dirName)
@ -309,10 +309,10 @@ class App(QMainWindow):
@param Context: Is a tuple containing three objects
1. Speech samples, sctx
2. subprocess handle
3. Mozilla Voice STT model object
3. Deepspeech model object
'''
def micWorker(self, context, progress_callback):
# Mozilla Voice STT Streaming will be run from this method
# Deepspeech Streaming will be run from this method
logging.debug("Recording from your microphone")
while (not self.openMicrophone.isChecked()):
data = context[1].stdout.read(512)
@ -343,7 +343,7 @@ class App(QMainWindow):
self.show()
def wavWorker(self, waveFile, progress_callback):
# Mozilla Voice STT will be run from this method
# Deepspeech will be run from this method
logging.debug("Preparing for transcription...")
inference_time = 0.0
@ -353,7 +353,7 @@ class App(QMainWindow):
logging.debug("Saving Transcript @: %s" % waveFile.rstrip(".wav") + ".txt")
for i, segment in enumerate(segments):
# Run mozilla voice stt on the chunk that just completed VAD
# Run deepspeech on the chunk that just completed VAD
logging.debug("Processing chunk %002d" % (i,))
audio = np.frombuffer(segment, dtype=np.int16)
output = wavTranscriber.stt(self.model[0], audio, sample_rate)

Просмотреть файл

@ -1,3 +1,3 @@
mozilla_voice_stt==0.9.0a5
deepspeech==0.8.0
webrtcvad
pyqt5

Просмотреть файл

@ -8,7 +8,7 @@ pushd ${THIS}
source ../tests.sh
pip install --user $(get_python_wheel_url "$1")
pip install --user -r <(grep -v mozilla_voice_stt requirements.txt)
pip install --user -r <(grep -v deepspeech requirements.txt)
python audioTranscript_cmd.py \
--audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \

Просмотреть файл

@ -11,24 +11,24 @@ Load the pre-trained model into the memory
@param scorer: Scorer file
@Retval
Returns a list [MozillaVoiceStt Object, Model Load Time, Scorer Load Time]
Returns a list [DeepSpeech Object, Model Load Time, Scorer Load Time]
'''
def load_model(models, scorer):
model_load_start = timer()
mvs = mozilla_voice_stt.Model(models)
ds = mozilla_voice_stt.Model(models)
model_load_end = timer() - model_load_start
logging.debug("Loaded model in %0.3fs." % (model_load_end))
scorer_load_start = timer()
mvs.enableExternalScorer(scorer)
ds.enableExternalScorer(scorer)
scorer_load_end = timer() - scorer_load_start
logging.debug('Loaded external scorer in %0.3fs.' % (scorer_load_end))
return [mvs, model_load_end, scorer_load_end]
return [ds, model_load_end, scorer_load_end]
'''
Run Inference on input audio file
@param mvs: mozilla voice stt object
@param ds: Deepspeech object
@param audio: Input audio for running inference on
@param fs: Sample rate of the input audio file
@ -36,14 +36,14 @@ Run Inference on input audio file
Returns a list [Inference, Inference Time, Audio Length]
'''
def stt(mvs, audio, fs):
def stt(ds, audio, fs):
inference_time = 0.0
audio_length = len(audio) * (1 / fs)
# Run mozilla voice stt
# Run Deepspeech
logging.debug('Running inference...')
inference_start = timer()
output = mvs.stt(audio)
output = ds.stt(audio)
inference_end = timer() - inference_start
inference_time += inference_end
logging.debug('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length))

Просмотреть файл

@ -1,7 +1,7 @@
# Web Microphone Websocket
This is an example of a ReactJS web application streaming microphone audio from the browser
to a NodeJS server and transmitting the Mozilla Voice STT results back to the browser.
to a NodeJS server and transmitting the DeepSpeech results back to the browser.
#### Download the pre-trained model (1.8GB):
@ -26,4 +26,4 @@ yarn start
```
node server.js
```
```

Просмотреть файл

@ -8,7 +8,7 @@
"@testing-library/user-event": "^7.1.2",
"chai": "^4.2.0",
"chai-http": "^4.3.0",
"@mozilla-voice/stt": "^0.9.0-alpha.5",
"deepspeech": "^0.8.0",
"defaults": "^1.0.3",
"mocha": "^6.1.4",
"node-vad": "^1.1.4",

Просмотреть файл

@ -24,7 +24,7 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>Mozilla Voice STT - Web Microphone Websocket Example</title>
<title>DeepSpeech - Web Microphone Websocket Example</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>

Просмотреть файл

@ -1,9 +1,9 @@
const http = require('http');
const socketIO = require('socket.io');
const mozillaVoiceStt = require('@mozilla-voice/stt');
const DeepSpeech = require('deepspeech');
const VAD = require('node-vad');
let DEEPSPEECH_MODEL = __dirname + '/deepspeech-0.8.0-models'; // path to english model directory
let DEEPSPEECH_MODEL = __dirname + '/deepspeech-0.8.0-models'; // path to deepspeech english model directory
let SILENCE_THRESHOLD = 200; // how many milliseconds of inactivity before processing the audio
@ -18,7 +18,7 @@ const vad = new VAD(VAD_MODE);
function createModel(modelDir) {
let modelPath = modelDir + '.pbmm';
let scorerPath = modelDir + '.scorer';
let model = new mozillaVoiceStt.Model(modelPath);
let model = new DeepSpeech.Model(modelPath);
model.enableExternalScorer(scorerPath);
return model;
}
@ -222,4 +222,4 @@ app.listen(SERVER_PORT, 'localhost', () => {
console.log('Socket server listening on:', SERVER_PORT);
});
module.exports = app;
module.exports = app;