This commit is contained in:
David Flanagan 2016-05-06 16:12:45 -07:00
Родитель d87fe54b25
Коммит 9339e7c43d
9 изменённых файлов: 753 добавлений и 1 удалений

4
.gitignore поставляемый
Просмотреть файл

@ -31,3 +31,7 @@ node_modules
# Optional REPL history
.node_repl_history
# Don't check in the config file or uploads dir
sentences.txt
uploads

Просмотреть файл

@ -1,2 +1,13 @@
# speecher
A webapp for collecting speech samples for voice recognition testing and training
This is a simple webapp for collecting speech samples for voice
recognition testing and training.
Running it should be as simple as issuing these commands on your
server:
```
> git clone git@github.com:mozilla/speecher.git
> cd speecher
> npm install
> node speecher.js
```

19
package.json Normal file
Просмотреть файл

@ -0,0 +1,19 @@
{
"name": "speecher",
"version": "1.0.0",
"description": "collect recorded speech samples from users",
"repository" : {
"type" : "git",
"url" : "https://github.com/mozilla/speecher"
}
"main": "speecher.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "David Flanagan",
"license": "MPL-2.0",
"dependencies": {
"body-parser": "^1.15.0",
"express": "^4.13.4"
}
}

105
public/audiorecorder.js Normal file
Просмотреть файл

@ -0,0 +1,105 @@
//
// This is a simple class for recording mono audio from a getUserMedia()
// microphone stream and converting it to a WAV-format blob. To use it, get a
// microphone stream with getUserMedia or, then pass that stream to the
// AudioRecorder() constructor. To start recording call the start method. To
// stop recording, call the stop() method. The stop method returns a blob in
// WAV format. All the audio data is held in memory, in uncompressed form, and
// requires about 192kb of memory for each second of audio, so this class is
// not suitable for long recordings.
//
// By default, audio is collected in batches of 1024 samples (at about 40
// batches per second, though this depends on the platform's sampling rate).
// You can change the batch size by passing a different value as the optional
// second argument to the constructor. Note, however, that the batch size must
// be a power of two. If you set the onbatch property of an audiorecorder
// object then each batch (a Float32Array) will be passed to that function
// when it is collected.
//
// This code was inspired by, but simplified from this blog post
// http://typedarray.org/from-microphone-to-wav-with-getusermedia-and-web-audio/
//
(function(exports) {
'use strict';
function AudioRecorder(microphone, batchSize) {
this.context = new AudioContext();
this.source = this.context.createMediaStreamSource(microphone);
this.batchSize = batchSize || 1024;
// In Firefox we don't need the one output channel, but we need
// it for Chrome, even though it is unused.
this.processor = this.context.createScriptProcessor(this.batchSize, 1, 1);
this.batches = []; // batches of sample data from the script processor
// Each time we get a batch of data, this function will be called
// We just copy the typed array and save it. We end up with a long
// array of typed arrays.
this.processor.addEventListener('audioprocess', function(e) {
var data = e.inputBuffer.getChannelData(0);
var copy = new Float32Array(data);
this.batches.push(copy);
if (this.onbatch) { // If the user has defined a callback, call it
this.onbatch(copy);
}
}.bind(this));
}
// The microphone is live the entire time. To start recording we
// connect the microphone stream to the processor node.
AudioRecorder.prototype.start = function() {
this.source.connect(this.processor);
// For Chrome we also have to connect the processor to the
// destination even though the processor does not produce any output
this.processor.connect(this.context.destination);
};
// To stop recording, disconnect the microphone.
// Then take the data we stored and convert to a WAV format blob
AudioRecorder.prototype.stop = function() {
this.source.disconnect();
this.processor.disconnect();
var batches = this.batches;
this.batches = [];
return makeWAVBlob(batches, this.batchSize, this.context.sampleRate);
};
// Convert the sound samples we've collected into a WAV file
function makeWAVBlob(batches, batchSize, sampleRate) {
var numSamples = batches.length * batchSize;
// 44 byte WAV header plus two bytes per sample
var blobSize = numSamples * 2 + 44;
var bytes = new ArrayBuffer(blobSize);
var view = new DataView(bytes);
// Create WAV file header
view.setUint32(0, 0x46464952, true); // 'RIFF'
view.setUint32(4, blobSize - 8, true); // Size of rest of file
view.setUint32(8, 0x45564157, true); // 'WAVE'
view.setUint32(12, 0x20746d66, true); // 'fmt '
view.setUint32(16, 16, true); // 16 bytes of fmt view
view.setUint16(20, 1, true); // Audio is in PCM format
view.setUint16(22, 1, true); // One-channel (mono)
view.setUint32(24, sampleRate, true); // Samples per second
view.setUint32(28, 2*sampleRate, true); // Bytes per second
view.setUint16(32, 2, true); // Block size
view.setUint16(34, 16, true); // Bits per sample
view.setUint32(36, 0x61746164, true); // 'data'
view.setUint32(40, numSamples*2, true); // How many data bytes
// Copy the samples to the file now
var offset = 44;
for(var i = 0; i < batches.length; i++) {
var batch = batches[i];
for(var j = 0; j < batch.length; j++) {
var floatSample = batch[j];
var intSample = floatSample * 0x7FFF; // convert to 16-bit signed int
view.setInt16(offset, intSample, true);
offset += 2;
}
}
return new Blob([bytes], { type: 'audio/wav' });
}
exports.AudioRecorder = AudioRecorder;
}(window));

124
public/index.html Normal file
Просмотреть файл

@ -0,0 +1,124 @@
<html>
<head>
<!-- XXX: do I need a viewport meta tag here? -->
<script defer src="audiorecorder.js"></script>
<script defer src="index.js"></script>
<style>
body {
font-size: 18px;
font-family: sans-serif
}
.screen p {
margin: 8px;
}
#consent-screen button {
font-size: 18px;
}
#error-message {
margin: 15px;
font-style: italic;
}
#instructions {
margin: 10px;
font-style: italic;
}
#sentence {
font-size: larger;
font-weight: bold;
margin: 20px;
}
#canvas {
width: 300px;
height: 300px;
}
#canvas.disabled {
background: center center no-repeat url(record.png);
opacity: 0.5;
}
#canvas.stopped {
background: center center no-repeat url(record.png);
}
#canvas.recording {
background: center center no-repeat url(stop.png);
}
#player {
margin: 10px;
}
#playback-screen button {
font-size: 30px;
margin: 10px;
padding: 10px;
}
#playback-screen button span.small {
font-size: 18px;
}
</style>
</head>
<body>
<div id="consent-screen" class="screen" hidden>
<p>
This website is used by Mozilla engineering to collect speech
samples to test and train our speech recognition engine. It
collects only speech recordings and does not associate them with
any personally identifying information.
</p>
<p>
By clicking the "I agree" button below, you are agreeing to
donate audio recordings of your voice and to
<a href="https://creativecommons.org/publicdomain/zero/1.0/">
place them in the public domain</a>. This means that you agree
to <em>waive all rights to the recordings worldwide under copyright
and database law, including moral and publicity rights and all
related and neighboring rights</em>.
</p>
<button id="disagree">I Disagree</button>
<button id="agree">I Agree</button>
</div>
<div id="record-screen" class="screen" hidden>
<div id="instructions">
Tap the microphone and read this sentence after the beep:
</div>
<div id="sentence"></div>
<canvas id="canvas" class="stopped" width=300 height=300></canvas>
</div>
<div id="playback-screen" class="screen" hidden>
<audio id="player" controls autoplay></audio><br/>
<button id="upload">
Upload audio<br/>
<span class="small">and place it in the public domain</span>
</button>
<br/>
<button id="discard">
Discard audio<br/>
<span class="small">and record another sentence</span>
</button>
<br/>
</div>
<div id="error-screen" class="screen" hidden>
<p>
This application cannot run because:
</p>
<p id="error-message"><p>
<p>Reload if you'd like to try again</p>
</div>
</body>
</html>

355
public/index.js Normal file
Просмотреть файл

@ -0,0 +1,355 @@
// The microphone stream we get from getUserMedia
var microphone;
// The sentences we want the user to read and their corresponding
// server-side directories that we upload them to. We fetch these
// from the server. See getSentences() and parseSentences().
var sentences = [], directories = [];
// The sentence we're currently recording, and its directory.
// These are picked at random in recordingScreen.show()
var currentSentence, currentDirectory;
// These are configurable constants:
var SILENCE_THRESHOLD = 0.1; // How quiet does it have to be to stop recording?
var SILENCE_DURATION = 1500; // For how many milliseconds?
var LOUD_THRESHOLD = 0.75; // How loud shows as red in the levels
var BATCHSIZE = 2048; // How many samples per recorded batch
var RECORD_BEEP_HZ = 800; // Frequency and duration of beeps
var RECORD_BEEP_MS = 200;
var STOP_BEEP_HZ = 400;
var STOP_BEEP_MS = 300;
// These are some things that can go wrong:
var ERR_NO_CONSENT = 'You did not consent to recording. ' +
'You must click the "I Agree" button in order to use this website.';
var ERR_NO_GUM = 'Your browser does not support audio recording. ' +
'Try using a recent version of Firefox or Chrome.';
var ERR_NO_MIC = 'You did allow this website to use the microphone. ' +
'The website needs the microphone to record your voice.';
var ERR_UPLOAD_FAILED = 'Uploading your recording to the server failed. ' +
'This may be a temporary problem. Please reload and try again.';
// This is the program startup sequence.
getConsent()
.then(getMicrophone)
.then(rememberMicrophone)
.then(getSentences)
.then(parseSentences)
.then(initializeAndRun)
.catch(displayErrorMessage);
// Ask the user to agree to place the recordings in the public domain.
// They only have to agree once, and we remember using localStorage
function getConsent() {
return new Promise(function(resolve, reject) {
// If the user has already consented, then we're done
if (localStorage.consentGiven) {
resolve();
return;
}
// Otherwise, display the consent screen and wait for a response
var consentScreen = document.querySelector('#consent-screen');
consentScreen.hidden = false;
document.querySelector('#agree').onclick = function() {
localStorage.consentGiven = true; // Remember this consent
consentScreen.hidden = true;
resolve();
};
document.querySelector('#disagree').onclick = function() {
consentScreen.hidden = true;
reject(ERR_NO_CONSENT);
};
});
}
// Use getUserMedia() to get access to the user's microphone.
// This can fail because the browser does not support it, or
// because the user does not give permission.
function getMicrophone() {
return new Promise(function(resolve,reject) {
// Reject the promise with a 'permission denied' error code
function deny() { reject(ERR_NO_MIC); }
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices.getUserMedia({audio: true}).then(resolve, deny);
}
else if (navigator.getUserMedia) {
navigator.getUserMedia({audio:true}, resolve, deny);
}
else if (navigator.webkitGetUserMedia) {
navigator.webkitGetUserMedia({audio:true}, resolve, deny);
}
else if (navigator.mozGetUserMedia) {
navigator.mozGetUserMedia({audio:true}, resolve, deny);
}
else {
reject(ERR_NO_GUM); // Browser does not support getUserMedia
}
});
}
// When we get the microphone audio stream, remember it in a global variable.
function rememberMicrophone(stream) {
microphone = stream;
}
// Fetch the sentences.json file that tell us what sentences
// to ask the user to read
function getSentences() {
return fetch('sentences.json').then(function(r) { return r.json(); });
}
// Once we get the json file, break the keys and values into two
// parallel arrays.
function parseSentences(directoryToSentenceMap) {
for(var d in directoryToSentenceMap) {
directories.push(d);
sentences.push(directoryToSentenceMap[d]);
}
}
// If anything goes wrong in the app startup sequence, this function
// is called to tell the user what went wrong
function displayErrorMessage(error) {
document.querySelector('#consent-screen').hidden = true;
document.querySelector('#error-screen').hidden = false;
document.querySelector('#error-message').textContent = error;
}
// Once the async initialization is complete, this is where the
// program really starts. It initializes the recording and playback
// screens, and sets up event handlers to switch back and forth between
// those screens until the user gets tired of making recordings.
function initializeAndRun() {
// Get the DOM elements for the recording and playback screens
var recordingScreenElement = document.querySelector('#record-screen');
var playbackScreenElement = document.querySelector('#playback-screen');
// Create objects that encapsulate their functionality
// Then set up event handlers to coordinate the two screens
var recordingScreen = new RecordingScreen(recordingScreenElement, microphone);
var playbackScreen = new PlaybackScreen(playbackScreenElement);
// When a recording is complete, pass it to the playback screen
recordingScreenElement.addEventListener('record', function(event) {
recordingScreen.hide();
playbackScreen.show(event.detail);
});
// If the user clicks 'Upload' on the playback screen, do the upload
// and switch back to the recording screen for a new sentence
playbackScreenElement.addEventListener('upload', function(event) {
upload(currentDirectory, event.detail);
switchToRecordingScreen(true);
});
// If the user clicks 'Discard', switch back to the recording screen
// for another take of the same sentence
playbackScreenElement.addEventListener('discard', function() {
switchToRecordingScreen(false);
});
// Here's how we switch to the recording screen
function switchToRecordingScreen(needNewSentence) {
// Pick a random sentence if we don't have one or need a new one
if (needNewSentence || !currentSentence) {
var n = Math.floor(Math.random() * sentences.length);
currentSentence = sentences[n];
currentDirectory = directories[n];
}
// Hide the playback screen (and release its audio) if it was displayed
// Show the recording screen
playbackScreen.hide();
recordingScreen.show(currentSentence);
}
// Upload a recording using the fetch API to do an HTTP POST
function upload(directory, recording) {
fetch('/upload/' + directory, { method: 'POST', body: recording })
.then(function(response) {
if (response.status !== 200) {
playbackScreen.hide();
recordingScreen.hide();
displayErrorMessage(ERR_UPLOAD_FAILED + ' ' + response.status + ' ' +
response.statusText);
}
})
.catch(function() {
playbackScreen.hide();
recordingScreen.hide();
displayErrorMessage(ERR_UPLOAD_FAILED);
});
}
// Finally, we start the app off by displaying the recording screen
switchToRecordingScreen(true);
}
// The RecordingScreen object has show() and hide() methods and fires
// a 'record' event on its DOM element when a recording has been made.
function RecordingScreen(element, microphone) {
this.element = element;
this.show = function(sentence) {
this.element.querySelector('#sentence').textContent = sentence;
this.element.hidden = false;
};
this.hide = function() {
this.element.hidden = true;
};
// This allows us to record audio from the microphone stream.
// See audiorecorder.js
var recorder = new AudioRecorder(microphone, BATCHSIZE);
// Most of the state for this class is hidden away here in the constructor
// and is not exposed outside of the class.
// The main part of the recording screen is this canvas object
// that displays a microphone icon, acts as a recording level indicator
// and responds to clicks to start and stop recording
var canvas = element.querySelector('canvas');
var context = canvas.getContext('2d');
var recording = false; // Are we currently recording?
var lastSoundTime; // When was the last time we heard a sound?
// The canvas responds to clicks to start and stop recording
canvas.addEventListener('click', function() {
// Ignore clicks when we're not ready
if (canvas.className === 'disabled')
return;
if (recording) {
stopRecording();
}
else {
startRecording();
}
});
function startRecording() {
if (!recording) {
recording = true;
canvas.className = 'disabled'; // disabled 'till after the beep
beep(RECORD_BEEP_HZ, RECORD_BEEP_MS).then(function() {
lastSoundTime = performance.now();
recorder.start();
canvas.className = 'recording';
});
}
}
function stopRecording() {
if (recording) {
recording = false;
canvas.className = 'disabled'; // disabled 'till after the beep
var blob = recorder.stop();
// Beep to tell the user the recording is done
beep(STOP_BEEP_HZ, STOP_BEEP_MS).then(function() {
canvas.className = 'stopped';
});
// Erase the canvas
displayLevel(0);
// Broadcast an event containing the recorded blob
element.dispatchEvent(new CustomEvent('record', {
detail: blob
}));
}
}
// This function is called each time the recorder receives a batch of
// audio data. We use this to display recording levels and also to
// detect the silence that ends a recording
recorder.onbatch = function batchHandler(batch) {
// What's the highest amplitude for this batch? (Ignoring negative values)
var max = batch.reduce(function(max, val) { return val > max ? val : max; },
0.0);
// If we haven't heard anything in a while, it may be time to
// stop recording
var now = performance.now();
if (max < SILENCE_THRESHOLD) {
if (now - lastSoundTime > SILENCE_DURATION) {
stopRecording();
return;
}
}
else {
lastSoundTime = now;
}
// Graphically display this recording level
displayLevel(max);
};
// A WebAudio utility to do simple beeps
function beep(hertz, duration) {
return new Promise(function(resolve, reject) {
var context = new AudioContext();
var oscillator = context.createOscillator();
oscillator.connect(context.destination);
oscillator.frequency.value = hertz;
oscillator.start();
setTimeout(function() {
oscillator.stop();
oscillator.disconnect();
context.close();
resolve();
}, duration);
});
}
// Graphically display the recording level
function displayLevel(level) {
requestAnimationFrame(function() {
// Clear the canvas
context.clearRect(0, 0, canvas.width, canvas.height);
// Do nothing if the level is low
if (level < SILENCE_THRESHOLD) return;
// Otherwise, draw a circle whose radius and color depends on volume.
// The 100 is because we're using a microphone icon that is 95x95
var radius = 50 + level * (canvas.width-100) / 2;
context.lineWidth = radius/5;
context.beginPath();
context.arc(canvas.width/2, canvas.height/2, radius, 0, 2*Math.PI);
context.strokeStyle = (level > LOUD_THRESHOLD) ? 'red' : 'green';
context.stroke();
});
}
}
// This simple class encapsulates the playback screen. It has
// show and hide methods, and fires 'upload' and 'discard' events
// depending on which button is clicked.
function PlaybackScreen(element) {
this.element = element;
this.player = element.querySelector('#player');
this.show = function(recording) {
this.element.hidden = false;
this.recording = recording;
this.player.src = URL.createObjectURL(recording);
};
this.hide = function() {
this.element.hidden = true;
this.recording = null;
if (this.player.src) {
URL.revokeObjectURL(this.player.src);
delete this.player.src;
this.player.load();
}
};
element.querySelector('#upload').addEventListener('click', function() {
element.dispatchEvent(new CustomEvent('upload', {detail: this.recording}));
}.bind(this));
element.querySelector('#discard').addEventListener('click', function() {
element.dispatchEvent(new CustomEvent('discard'));
});
}

Двоичные данные
public/record.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 2.1 KiB

Двоичные данные
public/stop.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 2.4 KiB

134
speecher.js Normal file
Просмотреть файл

@ -0,0 +1,134 @@
var fs = require('fs');
var express = require('express');
var bodyParser = require('body-parser');
var PORT = 80; // What port to listen on
var uploaddir = __dirname + '/uploads'; // Upload directory
var directoryToSentence = {}; // dirname to sentence
var directoryToFileNumber = {}; // dirname to next file number to use
var directories = []; // all the directories
// Here's the program:
readConfigFile();
startServer();
/*
* Synchronous startup stuff before we start handling requests.
* This reads the sentences.txt configuration file, creates directories
* as needed, and figures out the next file number in each directory.
*/
function readConfigFile() {
var configFile = __dirname + '/sentences.txt';
try {
fs.readFileSync(configFile, 'utf8')
.trim()
.split('\n')
.forEach(function(line) {
var trimmed = line.trim();
if (trimmed === '' || trimmed[0] === '#') {
return; // ignore blanks and comments
}
var match = trimmed.match(/^(\w+)\s+(.*)$/);
if (!match) {
console.warn('Ignoring mis-formatted line in sentences.txt:',
line);
return;
}
var directory = match[1];
var sentence = match[2];
if (directory in directoryToSentence) {
console.warn('Ignoring line in sentences.txt because directory',
'is already in use:', line);
return;
}
directoryToSentence[directory] = sentence;
directories.push(directory);
});
}
catch(e) {
console.error('Error reading configuration file:', configFile,
'\n', e);
process.exit(1);
}
if (directories.length === 0) {
console.error('No sentences defined in sentences.txt. Exiting.');
process.exit(1);
}
directories.forEach(function(directory) {
try {
var dirname = uploaddir + '/' + directory;
if (fs.existsSync(dirname)) {
// Directory exists. Go find out what the next filenumber is
var filenumbers =
fs.readdirSync(dirname) // all files
.filter(function(f) { return f.match(/\d+\.wav/);}) // only .wav
.map(function(f) { return parseInt(f); }) // to number
.sort(function(a,b) { return b - a; }); // largest first
directoryToFileNumber[directory] = (filenumbers[0] + 1) || 0;
}
else {
// Directory does not exist. Create it and start with file 0
fs.mkdirSync(dirname);
directoryToFileNumber[directory] = 0;
}
}
catch(e) {
// This can happen, for example, if dirname is a file instead of
// a directory or if there is a directory that is not readable
console.warn('Error verifying directory', dirname,
'Ignoring that directory', e);
}
});
}
function startServer() {
var app = express();
// Serve static files in the public/ directory
app.use(express.static('public'));
// When the client issues a GET request for the list of sentences
// create that dynamically from the data we parsed from the config file
app.get('/sentences.json', function(request, response) {
response.send(directoryToSentence);
});
// When we get POSTs, handle the body like this
app.use(bodyParser.raw({
type: 'audio/wav',
limit: 2*1024*1024 // max file size 2mb
}));
// This is how we handle WAV file uploads
app.post('/upload/:dir', function(request, response) {
var dir = request.params.dir;
var filenumber = directoryToFileNumber[dir];
if (filenumber !== undefined) { // Only if it is a known directory
directoryToFileNumber[dir] = filenumber + 1;
var filename = String(filenumber);
while(filename.length < 4) filename = '0' + filename;
var path = uploaddir + '/' + dir + '/' + filename + '.wav';
fs.writeFile(path, request.body, {}, function(err) {
response.send('Thanks for your contribution!');
if (err) {
console.warn(err);
}
else {
console.log('wrote file:', path);
}
});
}
else {
response.status(404).send('Bad directory');
}
});
app.listen(PORT, function () {
console.log('Listening on port', PORT);
});
}