diff --git a/.gitignore b/.gitignore index e920c16..67e4f6e 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,7 @@ node_modules # Optional REPL history .node_repl_history + +# Don't check in the config file or uploads dir +sentences.txt +uploads diff --git a/README.md b/README.md index f26d528..4c75278 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,13 @@ # speecher -A webapp for collecting speech samples for voice recognition testing and training +This is a simple webapp for collecting speech samples for voice +recognition testing and training. + +Running it should be as simple as issuing these commands on your +server: + +``` +> git clone git@github.com:mozilla/speecher.git +> cd speecher +> npm install +> node speecher.js +``` diff --git a/package.json b/package.json new file mode 100644 index 0000000..259aa04 --- /dev/null +++ b/package.json @@ -0,0 +1,19 @@ +{ + "name": "speecher", + "version": "1.0.0", + "description": "collect recorded speech samples from users", + "repository" : { + "type" : "git", + "url" : "https://github.com/mozilla/speecher" + } + "main": "speecher.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "David Flanagan", + "license": "MPL-2.0", + "dependencies": { + "body-parser": "^1.15.0", + "express": "^4.13.4" + } +} diff --git a/public/audiorecorder.js b/public/audiorecorder.js new file mode 100644 index 0000000..ea21e4c --- /dev/null +++ b/public/audiorecorder.js @@ -0,0 +1,105 @@ +// +// This is a simple class for recording mono audio from a getUserMedia() +// microphone stream and converting it to a WAV-format blob. To use it, get a +// microphone stream with getUserMedia or, then pass that stream to the +// AudioRecorder() constructor. To start recording call the start method. To +// stop recording, call the stop() method. The stop method returns a blob in +// WAV format. All the audio data is held in memory, in uncompressed form, and +// requires about 192kb of memory for each second of audio, so this class is +// not suitable for long recordings. +// +// By default, audio is collected in batches of 1024 samples (at about 40 +// batches per second, though this depends on the platform's sampling rate). +// You can change the batch size by passing a different value as the optional +// second argument to the constructor. Note, however, that the batch size must +// be a power of two. If you set the onbatch property of an audiorecorder +// object then each batch (a Float32Array) will be passed to that function +// when it is collected. +// +// This code was inspired by, but simplified from this blog post +// http://typedarray.org/from-microphone-to-wav-with-getusermedia-and-web-audio/ +// +(function(exports) { + 'use strict'; + + function AudioRecorder(microphone, batchSize) { + this.context = new AudioContext(); + this.source = this.context.createMediaStreamSource(microphone); + this.batchSize = batchSize || 1024; + // In Firefox we don't need the one output channel, but we need + // it for Chrome, even though it is unused. + this.processor = this.context.createScriptProcessor(this.batchSize, 1, 1); + this.batches = []; // batches of sample data from the script processor + + // Each time we get a batch of data, this function will be called + // We just copy the typed array and save it. We end up with a long + // array of typed arrays. + this.processor.addEventListener('audioprocess', function(e) { + var data = e.inputBuffer.getChannelData(0); + var copy = new Float32Array(data); + this.batches.push(copy); + if (this.onbatch) { // If the user has defined a callback, call it + this.onbatch(copy); + } + }.bind(this)); + } + + // The microphone is live the entire time. To start recording we + // connect the microphone stream to the processor node. + AudioRecorder.prototype.start = function() { + this.source.connect(this.processor); + // For Chrome we also have to connect the processor to the + // destination even though the processor does not produce any output + this.processor.connect(this.context.destination); + }; + + // To stop recording, disconnect the microphone. + // Then take the data we stored and convert to a WAV format blob + AudioRecorder.prototype.stop = function() { + this.source.disconnect(); + this.processor.disconnect(); + var batches = this.batches; + this.batches = []; + return makeWAVBlob(batches, this.batchSize, this.context.sampleRate); + }; + + // Convert the sound samples we've collected into a WAV file + function makeWAVBlob(batches, batchSize, sampleRate) { + var numSamples = batches.length * batchSize; + // 44 byte WAV header plus two bytes per sample + var blobSize = numSamples * 2 + 44; + var bytes = new ArrayBuffer(blobSize); + var view = new DataView(bytes); + + // Create WAV file header + view.setUint32(0, 0x46464952, true); // 'RIFF' + view.setUint32(4, blobSize - 8, true); // Size of rest of file + view.setUint32(8, 0x45564157, true); // 'WAVE' + view.setUint32(12, 0x20746d66, true); // 'fmt ' + view.setUint32(16, 16, true); // 16 bytes of fmt view + view.setUint16(20, 1, true); // Audio is in PCM format + view.setUint16(22, 1, true); // One-channel (mono) + view.setUint32(24, sampleRate, true); // Samples per second + view.setUint32(28, 2*sampleRate, true); // Bytes per second + view.setUint16(32, 2, true); // Block size + view.setUint16(34, 16, true); // Bits per sample + view.setUint32(36, 0x61746164, true); // 'data' + view.setUint32(40, numSamples*2, true); // How many data bytes + + // Copy the samples to the file now + var offset = 44; + for(var i = 0; i < batches.length; i++) { + var batch = batches[i]; + for(var j = 0; j < batch.length; j++) { + var floatSample = batch[j]; + var intSample = floatSample * 0x7FFF; // convert to 16-bit signed int + view.setInt16(offset, intSample, true); + offset += 2; + } + } + + return new Blob([bytes], { type: 'audio/wav' }); + } + + exports.AudioRecorder = AudioRecorder; +}(window)); diff --git a/public/index.html b/public/index.html new file mode 100644 index 0000000..fc46b04 --- /dev/null +++ b/public/index.html @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + diff --git a/public/index.js b/public/index.js new file mode 100644 index 0000000..20ecbee --- /dev/null +++ b/public/index.js @@ -0,0 +1,355 @@ +// The microphone stream we get from getUserMedia +var microphone; + +// The sentences we want the user to read and their corresponding +// server-side directories that we upload them to. We fetch these +// from the server. See getSentences() and parseSentences(). +var sentences = [], directories = []; + +// The sentence we're currently recording, and its directory. +// These are picked at random in recordingScreen.show() +var currentSentence, currentDirectory; + +// These are configurable constants: +var SILENCE_THRESHOLD = 0.1; // How quiet does it have to be to stop recording? +var SILENCE_DURATION = 1500; // For how many milliseconds? +var LOUD_THRESHOLD = 0.75; // How loud shows as red in the levels +var BATCHSIZE = 2048; // How many samples per recorded batch +var RECORD_BEEP_HZ = 800; // Frequency and duration of beeps +var RECORD_BEEP_MS = 200; +var STOP_BEEP_HZ = 400; +var STOP_BEEP_MS = 300; + +// These are some things that can go wrong: +var ERR_NO_CONSENT = 'You did not consent to recording. ' + + 'You must click the "I Agree" button in order to use this website.'; +var ERR_NO_GUM = 'Your browser does not support audio recording. ' + + 'Try using a recent version of Firefox or Chrome.'; +var ERR_NO_MIC = 'You did allow this website to use the microphone. ' + + 'The website needs the microphone to record your voice.'; +var ERR_UPLOAD_FAILED = 'Uploading your recording to the server failed. ' + + 'This may be a temporary problem. Please reload and try again.'; + +// This is the program startup sequence. +getConsent() + .then(getMicrophone) + .then(rememberMicrophone) + .then(getSentences) + .then(parseSentences) + .then(initializeAndRun) + .catch(displayErrorMessage); + +// Ask the user to agree to place the recordings in the public domain. +// They only have to agree once, and we remember using localStorage +function getConsent() { + return new Promise(function(resolve, reject) { + // If the user has already consented, then we're done + if (localStorage.consentGiven) { + resolve(); + return; + } + // Otherwise, display the consent screen and wait for a response + var consentScreen = document.querySelector('#consent-screen'); + consentScreen.hidden = false; + document.querySelector('#agree').onclick = function() { + localStorage.consentGiven = true; // Remember this consent + consentScreen.hidden = true; + resolve(); + }; + document.querySelector('#disagree').onclick = function() { + consentScreen.hidden = true; + reject(ERR_NO_CONSENT); + }; + }); +} + +// Use getUserMedia() to get access to the user's microphone. +// This can fail because the browser does not support it, or +// because the user does not give permission. +function getMicrophone() { + return new Promise(function(resolve,reject) { + // Reject the promise with a 'permission denied' error code + function deny() { reject(ERR_NO_MIC); } + + if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { + navigator.mediaDevices.getUserMedia({audio: true}).then(resolve, deny); + } + else if (navigator.getUserMedia) { + navigator.getUserMedia({audio:true}, resolve, deny); + } + else if (navigator.webkitGetUserMedia) { + navigator.webkitGetUserMedia({audio:true}, resolve, deny); + } + else if (navigator.mozGetUserMedia) { + navigator.mozGetUserMedia({audio:true}, resolve, deny); + } + else { + reject(ERR_NO_GUM); // Browser does not support getUserMedia + } + }); +} + +// When we get the microphone audio stream, remember it in a global variable. +function rememberMicrophone(stream) { + microphone = stream; +} + +// Fetch the sentences.json file that tell us what sentences +// to ask the user to read +function getSentences() { + return fetch('sentences.json').then(function(r) { return r.json(); }); +} + +// Once we get the json file, break the keys and values into two +// parallel arrays. +function parseSentences(directoryToSentenceMap) { + for(var d in directoryToSentenceMap) { + directories.push(d); + sentences.push(directoryToSentenceMap[d]); + } +} + +// If anything goes wrong in the app startup sequence, this function +// is called to tell the user what went wrong +function displayErrorMessage(error) { + document.querySelector('#consent-screen').hidden = true; + document.querySelector('#error-screen').hidden = false; + document.querySelector('#error-message').textContent = error; +} + +// Once the async initialization is complete, this is where the +// program really starts. It initializes the recording and playback +// screens, and sets up event handlers to switch back and forth between +// those screens until the user gets tired of making recordings. +function initializeAndRun() { + // Get the DOM elements for the recording and playback screens + var recordingScreenElement = document.querySelector('#record-screen'); + var playbackScreenElement = document.querySelector('#playback-screen'); + + // Create objects that encapsulate their functionality + // Then set up event handlers to coordinate the two screens + var recordingScreen = new RecordingScreen(recordingScreenElement, microphone); + var playbackScreen = new PlaybackScreen(playbackScreenElement); + + // When a recording is complete, pass it to the playback screen + recordingScreenElement.addEventListener('record', function(event) { + recordingScreen.hide(); + playbackScreen.show(event.detail); + }); + + // If the user clicks 'Upload' on the playback screen, do the upload + // and switch back to the recording screen for a new sentence + playbackScreenElement.addEventListener('upload', function(event) { + upload(currentDirectory, event.detail); + switchToRecordingScreen(true); + }); + + // If the user clicks 'Discard', switch back to the recording screen + // for another take of the same sentence + playbackScreenElement.addEventListener('discard', function() { + switchToRecordingScreen(false); + }); + + // Here's how we switch to the recording screen + function switchToRecordingScreen(needNewSentence) { + // Pick a random sentence if we don't have one or need a new one + if (needNewSentence || !currentSentence) { + var n = Math.floor(Math.random() * sentences.length); + currentSentence = sentences[n]; + currentDirectory = directories[n]; + } + + // Hide the playback screen (and release its audio) if it was displayed + // Show the recording screen + playbackScreen.hide(); + recordingScreen.show(currentSentence); + } + + // Upload a recording using the fetch API to do an HTTP POST + function upload(directory, recording) { + fetch('/upload/' + directory, { method: 'POST', body: recording }) + .then(function(response) { + if (response.status !== 200) { + playbackScreen.hide(); + recordingScreen.hide(); + displayErrorMessage(ERR_UPLOAD_FAILED + ' ' + response.status + ' ' + + response.statusText); + } + }) + .catch(function() { + playbackScreen.hide(); + recordingScreen.hide(); + displayErrorMessage(ERR_UPLOAD_FAILED); + }); + } + + // Finally, we start the app off by displaying the recording screen + switchToRecordingScreen(true); +} + +// The RecordingScreen object has show() and hide() methods and fires +// a 'record' event on its DOM element when a recording has been made. +function RecordingScreen(element, microphone) { + this.element = element; + + this.show = function(sentence) { + this.element.querySelector('#sentence').textContent = sentence; + this.element.hidden = false; + }; + + this.hide = function() { + this.element.hidden = true; + }; + + // This allows us to record audio from the microphone stream. + // See audiorecorder.js + var recorder = new AudioRecorder(microphone, BATCHSIZE); + + // Most of the state for this class is hidden away here in the constructor + // and is not exposed outside of the class. + + // The main part of the recording screen is this canvas object + // that displays a microphone icon, acts as a recording level indicator + // and responds to clicks to start and stop recording + var canvas = element.querySelector('canvas'); + var context = canvas.getContext('2d'); + + var recording = false; // Are we currently recording? + var lastSoundTime; // When was the last time we heard a sound? + + // The canvas responds to clicks to start and stop recording + canvas.addEventListener('click', function() { + // Ignore clicks when we're not ready + if (canvas.className === 'disabled') + return; + + if (recording) { + stopRecording(); + } + else { + startRecording(); + } + }); + + function startRecording() { + if (!recording) { + recording = true; + canvas.className = 'disabled'; // disabled 'till after the beep + beep(RECORD_BEEP_HZ, RECORD_BEEP_MS).then(function() { + lastSoundTime = performance.now(); + recorder.start(); + canvas.className = 'recording'; + }); + } + } + + function stopRecording() { + if (recording) { + recording = false; + canvas.className = 'disabled'; // disabled 'till after the beep + var blob = recorder.stop(); + // Beep to tell the user the recording is done + beep(STOP_BEEP_HZ, STOP_BEEP_MS).then(function() { + canvas.className = 'stopped'; + }); + // Erase the canvas + displayLevel(0); + // Broadcast an event containing the recorded blob + element.dispatchEvent(new CustomEvent('record', { + detail: blob + })); + } + } + + // This function is called each time the recorder receives a batch of + // audio data. We use this to display recording levels and also to + // detect the silence that ends a recording + recorder.onbatch = function batchHandler(batch) { + // What's the highest amplitude for this batch? (Ignoring negative values) + var max = batch.reduce(function(max, val) { return val > max ? val : max; }, + 0.0); + + // If we haven't heard anything in a while, it may be time to + // stop recording + var now = performance.now(); + if (max < SILENCE_THRESHOLD) { + if (now - lastSoundTime > SILENCE_DURATION) { + stopRecording(); + return; + } + } + else { + lastSoundTime = now; + } + + // Graphically display this recording level + displayLevel(max); + }; + + // A WebAudio utility to do simple beeps + function beep(hertz, duration) { + return new Promise(function(resolve, reject) { + var context = new AudioContext(); + var oscillator = context.createOscillator(); + oscillator.connect(context.destination); + oscillator.frequency.value = hertz; + oscillator.start(); + setTimeout(function() { + oscillator.stop(); + oscillator.disconnect(); + context.close(); + resolve(); + }, duration); + }); + } + + // Graphically display the recording level + function displayLevel(level) { + requestAnimationFrame(function() { + // Clear the canvas + context.clearRect(0, 0, canvas.width, canvas.height); + // Do nothing if the level is low + if (level < SILENCE_THRESHOLD) return; + // Otherwise, draw a circle whose radius and color depends on volume. + // The 100 is because we're using a microphone icon that is 95x95 + var radius = 50 + level * (canvas.width-100) / 2; + context.lineWidth = radius/5; + context.beginPath(); + context.arc(canvas.width/2, canvas.height/2, radius, 0, 2*Math.PI); + context.strokeStyle = (level > LOUD_THRESHOLD) ? 'red' : 'green'; + context.stroke(); + }); + } +} + +// This simple class encapsulates the playback screen. It has +// show and hide methods, and fires 'upload' and 'discard' events +// depending on which button is clicked. +function PlaybackScreen(element) { + this.element = element; + this.player = element.querySelector('#player'); + + this.show = function(recording) { + this.element.hidden = false; + this.recording = recording; + this.player.src = URL.createObjectURL(recording); + }; + + this.hide = function() { + this.element.hidden = true; + this.recording = null; + if (this.player.src) { + URL.revokeObjectURL(this.player.src); + delete this.player.src; + this.player.load(); + } + }; + + element.querySelector('#upload').addEventListener('click', function() { + element.dispatchEvent(new CustomEvent('upload', {detail: this.recording})); + }.bind(this)); + + element.querySelector('#discard').addEventListener('click', function() { + element.dispatchEvent(new CustomEvent('discard')); + }); +} diff --git a/public/record.png b/public/record.png new file mode 100644 index 0000000..39ce037 Binary files /dev/null and b/public/record.png differ diff --git a/public/stop.png b/public/stop.png new file mode 100644 index 0000000..7faedd3 Binary files /dev/null and b/public/stop.png differ diff --git a/speecher.js b/speecher.js new file mode 100644 index 0000000..6586256 --- /dev/null +++ b/speecher.js @@ -0,0 +1,134 @@ +var fs = require('fs'); +var express = require('express'); +var bodyParser = require('body-parser'); + +var PORT = 80; // What port to listen on +var uploaddir = __dirname + '/uploads'; // Upload directory +var directoryToSentence = {}; // dirname to sentence +var directoryToFileNumber = {}; // dirname to next file number to use +var directories = []; // all the directories + +// Here's the program: +readConfigFile(); +startServer(); + +/* + * Synchronous startup stuff before we start handling requests. + * This reads the sentences.txt configuration file, creates directories + * as needed, and figures out the next file number in each directory. + */ +function readConfigFile() { + var configFile = __dirname + '/sentences.txt'; + + try { + fs.readFileSync(configFile, 'utf8') + .trim() + .split('\n') + .forEach(function(line) { + var trimmed = line.trim(); + if (trimmed === '' || trimmed[0] === '#') { + return; // ignore blanks and comments + } + var match = trimmed.match(/^(\w+)\s+(.*)$/); + if (!match) { + console.warn('Ignoring mis-formatted line in sentences.txt:', + line); + return; + } + var directory = match[1]; + var sentence = match[2]; + + if (directory in directoryToSentence) { + console.warn('Ignoring line in sentences.txt because directory', + 'is already in use:', line); + return; + } + + directoryToSentence[directory] = sentence; + directories.push(directory); + }); + } + catch(e) { + console.error('Error reading configuration file:', configFile, + '\n', e); + process.exit(1); + } + + if (directories.length === 0) { + console.error('No sentences defined in sentences.txt. Exiting.'); + process.exit(1); + } + + directories.forEach(function(directory) { + try { + var dirname = uploaddir + '/' + directory; + if (fs.existsSync(dirname)) { + // Directory exists. Go find out what the next filenumber is + var filenumbers = + fs.readdirSync(dirname) // all files + .filter(function(f) { return f.match(/\d+\.wav/);}) // only .wav + .map(function(f) { return parseInt(f); }) // to number + .sort(function(a,b) { return b - a; }); // largest first + directoryToFileNumber[directory] = (filenumbers[0] + 1) || 0; + } + else { + // Directory does not exist. Create it and start with file 0 + fs.mkdirSync(dirname); + directoryToFileNumber[directory] = 0; + } + } + catch(e) { + // This can happen, for example, if dirname is a file instead of + // a directory or if there is a directory that is not readable + console.warn('Error verifying directory', dirname, + 'Ignoring that directory', e); + } + }); +} + +function startServer() { + var app = express(); + + // Serve static files in the public/ directory + app.use(express.static('public')); + + // When the client issues a GET request for the list of sentences + // create that dynamically from the data we parsed from the config file + app.get('/sentences.json', function(request, response) { + response.send(directoryToSentence); + }); + + // When we get POSTs, handle the body like this + app.use(bodyParser.raw({ + type: 'audio/wav', + limit: 2*1024*1024 // max file size 2mb + })); + + // This is how we handle WAV file uploads + app.post('/upload/:dir', function(request, response) { + var dir = request.params.dir; + var filenumber = directoryToFileNumber[dir]; + if (filenumber !== undefined) { // Only if it is a known directory + directoryToFileNumber[dir] = filenumber + 1; + var filename = String(filenumber); + while(filename.length < 4) filename = '0' + filename; + var path = uploaddir + '/' + dir + '/' + filename + '.wav'; + fs.writeFile(path, request.body, {}, function(err) { + response.send('Thanks for your contribution!'); + if (err) { + console.warn(err); + } + else { + console.log('wrote file:', path); + } + }); + } + else { + response.status(404).send('Bad directory'); + } + }); + + app.listen(PORT, function () { + console.log('Listening on port', PORT); + }); +}