diff --git a/.gitignore b/.gitignore
index e920c16..67e4f6e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,7 @@ node_modules
# Optional REPL history
.node_repl_history
+
+# Don't check in the config file or uploads dir
+sentences.txt
+uploads
diff --git a/README.md b/README.md
index f26d528..4c75278 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,13 @@
# speecher
-A webapp for collecting speech samples for voice recognition testing and training
+This is a simple webapp for collecting speech samples for voice
+recognition testing and training.
+
+Running it should be as simple as issuing these commands on your
+server:
+
+```
+> git clone git@github.com:mozilla/speecher.git
+> cd speecher
+> npm install
+> node speecher.js
+```
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..259aa04
--- /dev/null
+++ b/package.json
@@ -0,0 +1,19 @@
+{
+ "name": "speecher",
+ "version": "1.0.0",
+ "description": "collect recorded speech samples from users",
+ "repository" : {
+ "type" : "git",
+ "url" : "https://github.com/mozilla/speecher"
+ }
+ "main": "speecher.js",
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1"
+ },
+ "author": "David Flanagan",
+ "license": "MPL-2.0",
+ "dependencies": {
+ "body-parser": "^1.15.0",
+ "express": "^4.13.4"
+ }
+}
diff --git a/public/audiorecorder.js b/public/audiorecorder.js
new file mode 100644
index 0000000..ea21e4c
--- /dev/null
+++ b/public/audiorecorder.js
@@ -0,0 +1,105 @@
+//
+// This is a simple class for recording mono audio from a getUserMedia()
+// microphone stream and converting it to a WAV-format blob. To use it, get a
+// microphone stream with getUserMedia or, then pass that stream to the
+// AudioRecorder() constructor. To start recording call the start method. To
+// stop recording, call the stop() method. The stop method returns a blob in
+// WAV format. All the audio data is held in memory, in uncompressed form, and
+// requires about 192kb of memory for each second of audio, so this class is
+// not suitable for long recordings.
+//
+// By default, audio is collected in batches of 1024 samples (at about 40
+// batches per second, though this depends on the platform's sampling rate).
+// You can change the batch size by passing a different value as the optional
+// second argument to the constructor. Note, however, that the batch size must
+// be a power of two. If you set the onbatch property of an audiorecorder
+// object then each batch (a Float32Array) will be passed to that function
+// when it is collected.
+//
+// This code was inspired by, but simplified from this blog post
+// http://typedarray.org/from-microphone-to-wav-with-getusermedia-and-web-audio/
+//
+(function(exports) {
+ 'use strict';
+
+ function AudioRecorder(microphone, batchSize) {
+ this.context = new AudioContext();
+ this.source = this.context.createMediaStreamSource(microphone);
+ this.batchSize = batchSize || 1024;
+ // In Firefox we don't need the one output channel, but we need
+ // it for Chrome, even though it is unused.
+ this.processor = this.context.createScriptProcessor(this.batchSize, 1, 1);
+ this.batches = []; // batches of sample data from the script processor
+
+ // Each time we get a batch of data, this function will be called
+ // We just copy the typed array and save it. We end up with a long
+ // array of typed arrays.
+ this.processor.addEventListener('audioprocess', function(e) {
+ var data = e.inputBuffer.getChannelData(0);
+ var copy = new Float32Array(data);
+ this.batches.push(copy);
+ if (this.onbatch) { // If the user has defined a callback, call it
+ this.onbatch(copy);
+ }
+ }.bind(this));
+ }
+
+ // The microphone is live the entire time. To start recording we
+ // connect the microphone stream to the processor node.
+ AudioRecorder.prototype.start = function() {
+ this.source.connect(this.processor);
+ // For Chrome we also have to connect the processor to the
+ // destination even though the processor does not produce any output
+ this.processor.connect(this.context.destination);
+ };
+
+ // To stop recording, disconnect the microphone.
+ // Then take the data we stored and convert to a WAV format blob
+ AudioRecorder.prototype.stop = function() {
+ this.source.disconnect();
+ this.processor.disconnect();
+ var batches = this.batches;
+ this.batches = [];
+ return makeWAVBlob(batches, this.batchSize, this.context.sampleRate);
+ };
+
+ // Convert the sound samples we've collected into a WAV file
+ function makeWAVBlob(batches, batchSize, sampleRate) {
+ var numSamples = batches.length * batchSize;
+ // 44 byte WAV header plus two bytes per sample
+ var blobSize = numSamples * 2 + 44;
+ var bytes = new ArrayBuffer(blobSize);
+ var view = new DataView(bytes);
+
+ // Create WAV file header
+ view.setUint32(0, 0x46464952, true); // 'RIFF'
+ view.setUint32(4, blobSize - 8, true); // Size of rest of file
+ view.setUint32(8, 0x45564157, true); // 'WAVE'
+ view.setUint32(12, 0x20746d66, true); // 'fmt '
+ view.setUint32(16, 16, true); // 16 bytes of fmt view
+ view.setUint16(20, 1, true); // Audio is in PCM format
+ view.setUint16(22, 1, true); // One-channel (mono)
+ view.setUint32(24, sampleRate, true); // Samples per second
+ view.setUint32(28, 2*sampleRate, true); // Bytes per second
+ view.setUint16(32, 2, true); // Block size
+ view.setUint16(34, 16, true); // Bits per sample
+ view.setUint32(36, 0x61746164, true); // 'data'
+ view.setUint32(40, numSamples*2, true); // How many data bytes
+
+ // Copy the samples to the file now
+ var offset = 44;
+ for(var i = 0; i < batches.length; i++) {
+ var batch = batches[i];
+ for(var j = 0; j < batch.length; j++) {
+ var floatSample = batch[j];
+ var intSample = floatSample * 0x7FFF; // convert to 16-bit signed int
+ view.setInt16(offset, intSample, true);
+ offset += 2;
+ }
+ }
+
+ return new Blob([bytes], { type: 'audio/wav' });
+ }
+
+ exports.AudioRecorder = AudioRecorder;
+}(window));
diff --git a/public/index.html b/public/index.html
new file mode 100644
index 0000000..fc46b04
--- /dev/null
+++ b/public/index.html
@@ -0,0 +1,124 @@
+
+
+
+
+
+
+
+
+
+
+ This website is used by Mozilla engineering to collect speech
+ samples to test and train our speech recognition engine. It
+ collects only speech recordings and does not associate them with
+ any personally identifying information.
+
+
+
+ By clicking the "I agree" button below, you are agreeing to
+ donate audio recordings of your voice and to
+
+ place them in the public domain. This means that you agree
+ to waive all rights to the recordings worldwide under copyright
+ and database law, including moral and publicity rights and all
+ related and neighboring rights.
+
+
+
+
+
+
+
+
+ Tap the microphone and read this sentence after the beep:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This application cannot run because:
+
+
+
+
+
Reload if you'd like to try again
+
+
+
diff --git a/public/index.js b/public/index.js
new file mode 100644
index 0000000..20ecbee
--- /dev/null
+++ b/public/index.js
@@ -0,0 +1,355 @@
+// The microphone stream we get from getUserMedia
+var microphone;
+
+// The sentences we want the user to read and their corresponding
+// server-side directories that we upload them to. We fetch these
+// from the server. See getSentences() and parseSentences().
+var sentences = [], directories = [];
+
+// The sentence we're currently recording, and its directory.
+// These are picked at random in recordingScreen.show()
+var currentSentence, currentDirectory;
+
+// These are configurable constants:
+var SILENCE_THRESHOLD = 0.1; // How quiet does it have to be to stop recording?
+var SILENCE_DURATION = 1500; // For how many milliseconds?
+var LOUD_THRESHOLD = 0.75; // How loud shows as red in the levels
+var BATCHSIZE = 2048; // How many samples per recorded batch
+var RECORD_BEEP_HZ = 800; // Frequency and duration of beeps
+var RECORD_BEEP_MS = 200;
+var STOP_BEEP_HZ = 400;
+var STOP_BEEP_MS = 300;
+
+// These are some things that can go wrong:
+var ERR_NO_CONSENT = 'You did not consent to recording. ' +
+ 'You must click the "I Agree" button in order to use this website.';
+var ERR_NO_GUM = 'Your browser does not support audio recording. ' +
+ 'Try using a recent version of Firefox or Chrome.';
+var ERR_NO_MIC = 'You did allow this website to use the microphone. ' +
+ 'The website needs the microphone to record your voice.';
+var ERR_UPLOAD_FAILED = 'Uploading your recording to the server failed. ' +
+ 'This may be a temporary problem. Please reload and try again.';
+
+// This is the program startup sequence.
+getConsent()
+ .then(getMicrophone)
+ .then(rememberMicrophone)
+ .then(getSentences)
+ .then(parseSentences)
+ .then(initializeAndRun)
+ .catch(displayErrorMessage);
+
+// Ask the user to agree to place the recordings in the public domain.
+// They only have to agree once, and we remember using localStorage
+function getConsent() {
+ return new Promise(function(resolve, reject) {
+ // If the user has already consented, then we're done
+ if (localStorage.consentGiven) {
+ resolve();
+ return;
+ }
+ // Otherwise, display the consent screen and wait for a response
+ var consentScreen = document.querySelector('#consent-screen');
+ consentScreen.hidden = false;
+ document.querySelector('#agree').onclick = function() {
+ localStorage.consentGiven = true; // Remember this consent
+ consentScreen.hidden = true;
+ resolve();
+ };
+ document.querySelector('#disagree').onclick = function() {
+ consentScreen.hidden = true;
+ reject(ERR_NO_CONSENT);
+ };
+ });
+}
+
+// Use getUserMedia() to get access to the user's microphone.
+// This can fail because the browser does not support it, or
+// because the user does not give permission.
+function getMicrophone() {
+ return new Promise(function(resolve,reject) {
+ // Reject the promise with a 'permission denied' error code
+ function deny() { reject(ERR_NO_MIC); }
+
+ if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
+ navigator.mediaDevices.getUserMedia({audio: true}).then(resolve, deny);
+ }
+ else if (navigator.getUserMedia) {
+ navigator.getUserMedia({audio:true}, resolve, deny);
+ }
+ else if (navigator.webkitGetUserMedia) {
+ navigator.webkitGetUserMedia({audio:true}, resolve, deny);
+ }
+ else if (navigator.mozGetUserMedia) {
+ navigator.mozGetUserMedia({audio:true}, resolve, deny);
+ }
+ else {
+ reject(ERR_NO_GUM); // Browser does not support getUserMedia
+ }
+ });
+}
+
+// When we get the microphone audio stream, remember it in a global variable.
+function rememberMicrophone(stream) {
+ microphone = stream;
+}
+
+// Fetch the sentences.json file that tell us what sentences
+// to ask the user to read
+function getSentences() {
+ return fetch('sentences.json').then(function(r) { return r.json(); });
+}
+
+// Once we get the json file, break the keys and values into two
+// parallel arrays.
+function parseSentences(directoryToSentenceMap) {
+ for(var d in directoryToSentenceMap) {
+ directories.push(d);
+ sentences.push(directoryToSentenceMap[d]);
+ }
+}
+
+// If anything goes wrong in the app startup sequence, this function
+// is called to tell the user what went wrong
+function displayErrorMessage(error) {
+ document.querySelector('#consent-screen').hidden = true;
+ document.querySelector('#error-screen').hidden = false;
+ document.querySelector('#error-message').textContent = error;
+}
+
+// Once the async initialization is complete, this is where the
+// program really starts. It initializes the recording and playback
+// screens, and sets up event handlers to switch back and forth between
+// those screens until the user gets tired of making recordings.
+function initializeAndRun() {
+ // Get the DOM elements for the recording and playback screens
+ var recordingScreenElement = document.querySelector('#record-screen');
+ var playbackScreenElement = document.querySelector('#playback-screen');
+
+ // Create objects that encapsulate their functionality
+ // Then set up event handlers to coordinate the two screens
+ var recordingScreen = new RecordingScreen(recordingScreenElement, microphone);
+ var playbackScreen = new PlaybackScreen(playbackScreenElement);
+
+ // When a recording is complete, pass it to the playback screen
+ recordingScreenElement.addEventListener('record', function(event) {
+ recordingScreen.hide();
+ playbackScreen.show(event.detail);
+ });
+
+ // If the user clicks 'Upload' on the playback screen, do the upload
+ // and switch back to the recording screen for a new sentence
+ playbackScreenElement.addEventListener('upload', function(event) {
+ upload(currentDirectory, event.detail);
+ switchToRecordingScreen(true);
+ });
+
+ // If the user clicks 'Discard', switch back to the recording screen
+ // for another take of the same sentence
+ playbackScreenElement.addEventListener('discard', function() {
+ switchToRecordingScreen(false);
+ });
+
+ // Here's how we switch to the recording screen
+ function switchToRecordingScreen(needNewSentence) {
+ // Pick a random sentence if we don't have one or need a new one
+ if (needNewSentence || !currentSentence) {
+ var n = Math.floor(Math.random() * sentences.length);
+ currentSentence = sentences[n];
+ currentDirectory = directories[n];
+ }
+
+ // Hide the playback screen (and release its audio) if it was displayed
+ // Show the recording screen
+ playbackScreen.hide();
+ recordingScreen.show(currentSentence);
+ }
+
+ // Upload a recording using the fetch API to do an HTTP POST
+ function upload(directory, recording) {
+ fetch('/upload/' + directory, { method: 'POST', body: recording })
+ .then(function(response) {
+ if (response.status !== 200) {
+ playbackScreen.hide();
+ recordingScreen.hide();
+ displayErrorMessage(ERR_UPLOAD_FAILED + ' ' + response.status + ' ' +
+ response.statusText);
+ }
+ })
+ .catch(function() {
+ playbackScreen.hide();
+ recordingScreen.hide();
+ displayErrorMessage(ERR_UPLOAD_FAILED);
+ });
+ }
+
+ // Finally, we start the app off by displaying the recording screen
+ switchToRecordingScreen(true);
+}
+
+// The RecordingScreen object has show() and hide() methods and fires
+// a 'record' event on its DOM element when a recording has been made.
+function RecordingScreen(element, microphone) {
+ this.element = element;
+
+ this.show = function(sentence) {
+ this.element.querySelector('#sentence').textContent = sentence;
+ this.element.hidden = false;
+ };
+
+ this.hide = function() {
+ this.element.hidden = true;
+ };
+
+ // This allows us to record audio from the microphone stream.
+ // See audiorecorder.js
+ var recorder = new AudioRecorder(microphone, BATCHSIZE);
+
+ // Most of the state for this class is hidden away here in the constructor
+ // and is not exposed outside of the class.
+
+ // The main part of the recording screen is this canvas object
+ // that displays a microphone icon, acts as a recording level indicator
+ // and responds to clicks to start and stop recording
+ var canvas = element.querySelector('canvas');
+ var context = canvas.getContext('2d');
+
+ var recording = false; // Are we currently recording?
+ var lastSoundTime; // When was the last time we heard a sound?
+
+ // The canvas responds to clicks to start and stop recording
+ canvas.addEventListener('click', function() {
+ // Ignore clicks when we're not ready
+ if (canvas.className === 'disabled')
+ return;
+
+ if (recording) {
+ stopRecording();
+ }
+ else {
+ startRecording();
+ }
+ });
+
+ function startRecording() {
+ if (!recording) {
+ recording = true;
+ canvas.className = 'disabled'; // disabled 'till after the beep
+ beep(RECORD_BEEP_HZ, RECORD_BEEP_MS).then(function() {
+ lastSoundTime = performance.now();
+ recorder.start();
+ canvas.className = 'recording';
+ });
+ }
+ }
+
+ function stopRecording() {
+ if (recording) {
+ recording = false;
+ canvas.className = 'disabled'; // disabled 'till after the beep
+ var blob = recorder.stop();
+ // Beep to tell the user the recording is done
+ beep(STOP_BEEP_HZ, STOP_BEEP_MS).then(function() {
+ canvas.className = 'stopped';
+ });
+ // Erase the canvas
+ displayLevel(0);
+ // Broadcast an event containing the recorded blob
+ element.dispatchEvent(new CustomEvent('record', {
+ detail: blob
+ }));
+ }
+ }
+
+ // This function is called each time the recorder receives a batch of
+ // audio data. We use this to display recording levels and also to
+ // detect the silence that ends a recording
+ recorder.onbatch = function batchHandler(batch) {
+ // What's the highest amplitude for this batch? (Ignoring negative values)
+ var max = batch.reduce(function(max, val) { return val > max ? val : max; },
+ 0.0);
+
+ // If we haven't heard anything in a while, it may be time to
+ // stop recording
+ var now = performance.now();
+ if (max < SILENCE_THRESHOLD) {
+ if (now - lastSoundTime > SILENCE_DURATION) {
+ stopRecording();
+ return;
+ }
+ }
+ else {
+ lastSoundTime = now;
+ }
+
+ // Graphically display this recording level
+ displayLevel(max);
+ };
+
+ // A WebAudio utility to do simple beeps
+ function beep(hertz, duration) {
+ return new Promise(function(resolve, reject) {
+ var context = new AudioContext();
+ var oscillator = context.createOscillator();
+ oscillator.connect(context.destination);
+ oscillator.frequency.value = hertz;
+ oscillator.start();
+ setTimeout(function() {
+ oscillator.stop();
+ oscillator.disconnect();
+ context.close();
+ resolve();
+ }, duration);
+ });
+ }
+
+ // Graphically display the recording level
+ function displayLevel(level) {
+ requestAnimationFrame(function() {
+ // Clear the canvas
+ context.clearRect(0, 0, canvas.width, canvas.height);
+ // Do nothing if the level is low
+ if (level < SILENCE_THRESHOLD) return;
+ // Otherwise, draw a circle whose radius and color depends on volume.
+ // The 100 is because we're using a microphone icon that is 95x95
+ var radius = 50 + level * (canvas.width-100) / 2;
+ context.lineWidth = radius/5;
+ context.beginPath();
+ context.arc(canvas.width/2, canvas.height/2, radius, 0, 2*Math.PI);
+ context.strokeStyle = (level > LOUD_THRESHOLD) ? 'red' : 'green';
+ context.stroke();
+ });
+ }
+}
+
+// This simple class encapsulates the playback screen. It has
+// show and hide methods, and fires 'upload' and 'discard' events
+// depending on which button is clicked.
+function PlaybackScreen(element) {
+ this.element = element;
+ this.player = element.querySelector('#player');
+
+ this.show = function(recording) {
+ this.element.hidden = false;
+ this.recording = recording;
+ this.player.src = URL.createObjectURL(recording);
+ };
+
+ this.hide = function() {
+ this.element.hidden = true;
+ this.recording = null;
+ if (this.player.src) {
+ URL.revokeObjectURL(this.player.src);
+ delete this.player.src;
+ this.player.load();
+ }
+ };
+
+ element.querySelector('#upload').addEventListener('click', function() {
+ element.dispatchEvent(new CustomEvent('upload', {detail: this.recording}));
+ }.bind(this));
+
+ element.querySelector('#discard').addEventListener('click', function() {
+ element.dispatchEvent(new CustomEvent('discard'));
+ });
+}
diff --git a/public/record.png b/public/record.png
new file mode 100644
index 0000000..39ce037
Binary files /dev/null and b/public/record.png differ
diff --git a/public/stop.png b/public/stop.png
new file mode 100644
index 0000000..7faedd3
Binary files /dev/null and b/public/stop.png differ
diff --git a/speecher.js b/speecher.js
new file mode 100644
index 0000000..6586256
--- /dev/null
+++ b/speecher.js
@@ -0,0 +1,134 @@
+var fs = require('fs');
+var express = require('express');
+var bodyParser = require('body-parser');
+
+var PORT = 80; // What port to listen on
+var uploaddir = __dirname + '/uploads'; // Upload directory
+var directoryToSentence = {}; // dirname to sentence
+var directoryToFileNumber = {}; // dirname to next file number to use
+var directories = []; // all the directories
+
+// Here's the program:
+readConfigFile();
+startServer();
+
+/*
+ * Synchronous startup stuff before we start handling requests.
+ * This reads the sentences.txt configuration file, creates directories
+ * as needed, and figures out the next file number in each directory.
+ */
+function readConfigFile() {
+ var configFile = __dirname + '/sentences.txt';
+
+ try {
+ fs.readFileSync(configFile, 'utf8')
+ .trim()
+ .split('\n')
+ .forEach(function(line) {
+ var trimmed = line.trim();
+ if (trimmed === '' || trimmed[0] === '#') {
+ return; // ignore blanks and comments
+ }
+ var match = trimmed.match(/^(\w+)\s+(.*)$/);
+ if (!match) {
+ console.warn('Ignoring mis-formatted line in sentences.txt:',
+ line);
+ return;
+ }
+ var directory = match[1];
+ var sentence = match[2];
+
+ if (directory in directoryToSentence) {
+ console.warn('Ignoring line in sentences.txt because directory',
+ 'is already in use:', line);
+ return;
+ }
+
+ directoryToSentence[directory] = sentence;
+ directories.push(directory);
+ });
+ }
+ catch(e) {
+ console.error('Error reading configuration file:', configFile,
+ '\n', e);
+ process.exit(1);
+ }
+
+ if (directories.length === 0) {
+ console.error('No sentences defined in sentences.txt. Exiting.');
+ process.exit(1);
+ }
+
+ directories.forEach(function(directory) {
+ try {
+ var dirname = uploaddir + '/' + directory;
+ if (fs.existsSync(dirname)) {
+ // Directory exists. Go find out what the next filenumber is
+ var filenumbers =
+ fs.readdirSync(dirname) // all files
+ .filter(function(f) { return f.match(/\d+\.wav/);}) // only .wav
+ .map(function(f) { return parseInt(f); }) // to number
+ .sort(function(a,b) { return b - a; }); // largest first
+ directoryToFileNumber[directory] = (filenumbers[0] + 1) || 0;
+ }
+ else {
+ // Directory does not exist. Create it and start with file 0
+ fs.mkdirSync(dirname);
+ directoryToFileNumber[directory] = 0;
+ }
+ }
+ catch(e) {
+ // This can happen, for example, if dirname is a file instead of
+ // a directory or if there is a directory that is not readable
+ console.warn('Error verifying directory', dirname,
+ 'Ignoring that directory', e);
+ }
+ });
+}
+
+function startServer() {
+ var app = express();
+
+ // Serve static files in the public/ directory
+ app.use(express.static('public'));
+
+ // When the client issues a GET request for the list of sentences
+ // create that dynamically from the data we parsed from the config file
+ app.get('/sentences.json', function(request, response) {
+ response.send(directoryToSentence);
+ });
+
+ // When we get POSTs, handle the body like this
+ app.use(bodyParser.raw({
+ type: 'audio/wav',
+ limit: 2*1024*1024 // max file size 2mb
+ }));
+
+ // This is how we handle WAV file uploads
+ app.post('/upload/:dir', function(request, response) {
+ var dir = request.params.dir;
+ var filenumber = directoryToFileNumber[dir];
+ if (filenumber !== undefined) { // Only if it is a known directory
+ directoryToFileNumber[dir] = filenumber + 1;
+ var filename = String(filenumber);
+ while(filename.length < 4) filename = '0' + filename;
+ var path = uploaddir + '/' + dir + '/' + filename + '.wav';
+ fs.writeFile(path, request.body, {}, function(err) {
+ response.send('Thanks for your contribution!');
+ if (err) {
+ console.warn(err);
+ }
+ else {
+ console.log('wrote file:', path);
+ }
+ });
+ }
+ else {
+ response.status(404).send('Bad directory');
+ }
+ });
+
+ app.listen(PORT, function () {
+ console.log('Listening on port', PORT);
+ });
+}