Initial version of the speech extension. There is certainly still some stuff to figure out but the core of it is here.

2025-07-05 18:50:28 -04:00 · 2018-05-04 17:23:01 -07:00 · 2018-05-04 17:23:01 -07:00 · e7987a6a4c
commit e7987a6a4c
parent a85f641d25
5 changed files with 760 additions and 0 deletions
--- a/package.json
+++ b/package.json
@ -37,6 +37,7 @@
    "canvas-toBlob": "1.0.0",
    "copy-webpack-plugin": "^4.5.1",
    "decode-html": "2.0.0",
    "diff-match-patch": "^1.0.0",
    "escape-html": "1.0.3",
    "eslint": "^4.5.0",
    "eslint-config-scratch": "^5.0.0",
--- a/src/extensions/scratch3_speech/assets/speech-rec-end.mp3
+++ b/src/extensions/scratch3_speech/assets/speech-rec-end.mp3
--- a/src/extensions/scratch3_speech/assets/speech-rec-start.mp3
+++ b/src/extensions/scratch3_speech/assets/speech-rec-start.mp3
--- a/src/extensions/scratch3_speech/index.js
+++ b/src/extensions/scratch3_speech/index.js
@ -0,0 +1,755 @@
 const ArgumentType = require('../../extension-support/argument-type');
 const Cast = require('../../util/cast');
 const BlockType = require('../../extension-support/block-type');
 const log = require('../../util/log');
 const DiffMatchPatch = require('diff-match-patch');
 /**
 * Url of icon to be displayed at the left edge of each extension block.
 * TODO: Find the final Icon. Replace it with the right format. data URI?
 * @type {string}
 */
 const iconURI = 'https://www.gstatic.com/images/icons/material/system/1x/mic_white_24dp.png';
 /**
 * Url of icon to be displayed in the toolbox menu for the extension category.
 * TODO: Find the final Icon. Replace it with the right format. data URI?
 * @type {string}
 */
 const menuIconURI = 'https://www.gstatic.com/images/icons/material/system/1x/mic_grey600_24dp.png';
 /**
 * The url of the speech server.
 * @type {string}
 */
 const serverURL = 'wss://speech.scratch.mit.edu';
 /**
 * The amount of time to wait between when we stop sending speech data to the server and when
 * we expect the transcription result marked with isFinal: true to come back from the server.
 * @type {int}
 */
 const finalResponseTimeoutDurationMs = 3000;
 /**
 * The amount of time to wait between when we stop sending speech data to the server and when
 * we expect the transcription result marked with isFinal: true to come back from the server.
 * Currently set to 10sec. This should not exceed the speech api limit (60sec) without redoing how
 * we stream the microphone data data.
 * @type {int}
 */
 const listenAndWaitBlockTimeoutMs = 10000;
 /**
 * The start and stop sounds, loaded as static assets.
 * @type {object}
 */
 let assetData = {};
 try {
    assetData = require('./manifest');
 } catch (e) {
    // Non-webpack environment, don't worry about assets.
 }
 class Scratch3SpeechBlocks {
    constructor (runtime) {
        /**
         * The runtime instantiating this block package.
         * @type {Runtime}
         */
        this.runtime = runtime;
        /**
         * An array of phrases from the [when I hear] hat blocks.
         * The list of phrases in the when I hear hat blocks.  This list is sent
         * to the speech api to seed the recognition engine and for deciding
         * whether the transcription results match.
         * @type {Array}
         * @private
         */
        this._phraseList = [];
        /**
         * The most recent transcription result received from the speech API that we decided to keep.
         * This is the value returned by the reporter block.
         * @type {String}
         * @private
         */
        this._currentUtterance = null;
        /**
         *  Similar to _currentUtterance, but set back to '' at the beginning of listening block.
         *  Used to get the hat blocks to edge trigger.  In order to detect someone saying
         *  the same thing twice in two subsequent liten and wait blocks
         *  and still trigger the hat, we need this to go from
         *  '' at the beginning of the listen block to '<transcription value>' at the end.
         * @type {string}
         * @private
         */
        this._utteranceForEdgeTrigger = null;
        /**
         * The list of queued `resolve` callbacks for 'Listen and Wait' blocks.
         * We only listen to for one utterance at a time.  We may encounter multiple
         * 'Listen and wait' blocks that tell us to start listening. If one starts
         * and hasn't receieved results back yet, when we encounter more, any further ones
         * will all resolve when we get the next acceptable transcription result back.
         * @type {!Array}
         * @private
         */
        this._speechPromises = [];
        /**
         * The id of the timeout that will run if we start listening and don't get any
         * transcription results back. e.g. because we didn't hear anything.
         * @type {number}
         * @private
         */
        this._speechTimeoutId = null;
        /**
         * The id of the timeout that will run to wait for after we're done listening but
         * are still waiting for a potential isFinal:true transcription result to come back.
         * @type {number}
         * @private
         */
        this._speechFinalResponseTimeout = null;
        /**
         * The ScriptProcessorNode hooked up to the audio context.
         * @type {ScriptProcessorNode}
         * @private
         */
        this._scriptNode = null;
        /**
         * The socket used to communicate with the speech server to send microphone data
         * and recieve transcription results.
         * @type {WebSocket}
         * @private
         */
        this._socket = null;
        /**
         * The AudioContext used to manage the microphone.
         * @type {AudioContext}
         * @private
         */
        this._context = null;
        /**
         * MediaStreamAudioSourceNode to handle microphone data.
         * @type {MediaStreamAudioSourceNode}
         * @private
         */
        this._sourceNode = null;
        /**
         * A Promise whose fulfillment handler receives a MediaStream object when the microphone has been obtained.
         * @type {Promise}
         * @private
         */
        this._audioPromise = null;
        /**
         * Audio buffer for sound to indicate that listending has started.
         * @type {bufferSourceNode}
         * @private
         */
        this._startSoundBuffer = null;
        /**
         * Audio buffer for sound to indicate that listending has ended.
         * @type {bufferSourceNode}
         * @private
         */
        this._endSoundBuffer = null;
        /**
         * Diff Match Patch is used to do some fuzzy matching of the transcription results
         * with what is in the hat blocks.
         */
        this._dmp = new DiffMatchPatch();
        // Threshold for diff match patch to use: (0.0 = perfection, 1.0 = very loose).
        this._dmp.Match_Threshold = 0.3;
        // Come back and figure out which of these I really need.
        this._newSocketCallback = this._newSocketCallback.bind(this);
        this._setupSocketCallback = this._setupSocketCallback.bind(this);
        this._socketMessageCallback = this._socketMessageCallback.bind(this);
        this._processAudioCallback = this._processAudioCallback.bind(this);
        this._onTranscriptionFromServer = this._onTranscriptionFromServer.bind(this);
        this._resetListening = this._resetListening.bind(this);
        this._stopTranscription = this._stopTranscription.bind(this);
        this.runtime.on('PROJECT_STOP_ALL', this._resetListening.bind(this));
        // Load in the start and stop listening indicator sounds.
        this._loadUISounds();
    }
    /**
     * Load the UI sounds played when listening starts and stops.
     * @private
     */
    _loadUISounds () {
        const startSoundBuffer = assetData['speech-rec-start.mp3'].buffer;
        this._decodeSound(startSoundBuffer).then(buffer => {
            this._startSoundBuffer = buffer;
        });
        const endSoundBuffer = assetData['speech-rec-end.mp3'].buffer;
        this._decodeSound(endSoundBuffer).then(buffer => {
            this._endSoundBuffer = buffer;
        });
    }
    /**
     * Decode a sound and return a promise with the audio buffer.
     * @param  {ArrayBuffer} soundBuffer - a buffer containing the encoded audio.
     * @return {Promise} - a promise which will resolve once the sound has decoded.
     * @private
     */
    _decodeSound (soundBuffer) {
        const context = this.runtime.audioEngine && this.runtime.audioEngine.audioContext;
        if (!context) {
            return Promise.reject(new Error('No Audio Context Detected'));
        }
        // Check for newer promise-based API
        if (context.decodeAudioData.length === 1) {
            return context.decodeAudioData(soundBuffer);
        } else { // eslint-disable-line no-else-return
            // Fall back to callback API
            return new Promise((resolve, reject) =>
                context.decodeAudioData(soundBuffer,
                    buffer => resolve(buffer),
                    error => reject(error)
                )
            );
        }
    }
    /**
     * Play the given sound.
     * @param {ArrayBuffer} buffer The audio buffer to play.
     * @returns {Promise} A promise that resoloves when the sound is done playing.
     * @private
     */
    _playSound (buffer) {
        if (this.runtime.audioEngine === null) return;
        const context = this.runtime.audioEngine.audioContext;
        const bufferSource = context.createBufferSource();
        bufferSource.buffer = buffer;
        bufferSource.connect(this.runtime.audioEngine.input);
        bufferSource.start();
        return new Promise(resolve => {
            bufferSource.onended = () => {
                resolve();
            };
        });
    }
    /**
     * Scans all the 'When I hear' hat blocks for each sprite and pulls out the text.  The list
     * is sent off to the speech recognition server as hints.  This *only* reads the value out of
     * the hat block shadow.  If a block is dropped on top of the shadow, it is skipped.
     * @returns {Array} list of strings from the hat blocks in the project.
     * @private
     */
    _scanBlocksForPhraseList () {
        const words = [];
        // For each each target, walk through the top level blocks and check whether
        // they are speech hat/when I hear blocks.
        this.runtime.targets.forEach(target => {
            target.blocks._scripts.forEach(id => {
                const b = target.blocks.getBlock(id);
                if (b.opcode === 'speech.whenIHearHat') {
                    // Grab the text from the hat block's shadow.
                    const inputId = b.inputs.PHRASE.block;
                    const inputBlock = target.blocks.getBlock(inputId);
                    // Only grab the value from text blocks. This means we'll
                    // miss some. e.g. values in variables or other reporters.
                    if (inputBlock.opcode === 'text') {
                        const word = target.blocks.getBlock(inputId).fields.TEXT.value;
                        words.push(word);
                    }
                }
            });
        });
        return words;
    }
    /**
     * Resets all things related to listening. Called on Red Stop sign button.
     *   - suspends audio processing
     *   - closes socket with speech socket server
     *   - clears out any remaining speech blocks that are waiting.
     * @private.
     */
    _resetListening () {
        this._stopListening();
        this._closeWebsocket();
        this._resolveSpeechPromises();
    }
    /**
     * Close the connection to the socket server if it is open.
     * @private
     */
    _closeWebsocket () {
        if (this._socket && this._socket.readyState === this._socket.OPEN) {
            this._socket.close();
        }
    }
    /**
     * Call to suspend getting data from the microphone.
     * @private
     */
    _stopListening () {
        // Note that this can be called before any Listen And Wait block did setup,
        // so check that things exist before disconnecting them.
        if (this._context) {
            this._context.suspend.bind(this._context);
        }
        // This is called on green flag to reset things that may never have existed
        // in the first place. Do a bunch of checks.
        if (this._scriptNode) {
            this._scriptNode.disconnect();
        }
        if (this._sourceNode) {
            this._sourceNode.disconnect();
        }
    }
    /**
     * Resolves all the speech promises we've accumulated so far and empties out the list.
     * @private
     */
    _resolveSpeechPromises () {
        for (let i = 0; i < this._speechPromises.length; i++) {
            const resFn = this._speechPromises[i];
            resFn();
        }
        this._speechPromises = [];
    }
    /**
     * Called when we want to stop listening (e.g. when a listen block times out)
     * but we still want to wait a little to see if we get any transcription results
     * back before yielding the block execution.
     * @private
     */
    _stopTranscription () {
        this._stopListening();
        if (this._socket && this._socket.readyState === this._socket.OPEN) {
            this._socket.send('stopTranscription');
        }
        // Give it a couple seconds to response before giving up and assuming nothing else will come back.
        this._speechFinalResponseTimeout = setTimeout(this._resetListening, finalResponseTimeoutDurationMs);
    }
    /**
     * Decides whether to keep a given transcirption result.
     * @param {number} fuzzyMatchIndex Index of the fuzzy match or -1 if there is no match.
     * @param {object} result The json object representing the transcription result.
     * @param {string} normalizedTranscript The transcription text used for matching (i.e. lowercased, no punctuation).
     * @returns {boolean} true If a result is good enough to be kept.
     * @private
     */
    _shouldKeepResult (fuzzyMatchIndex, result, normalizedTranscript) {
        // The threshold above which we decide transcription results are unlikely to change again.
        // See https://cloud.google.com/speech-to-text/docs/basics#streaming_responses.
        const stabilityThreshold = .85;
        // For responsiveness of the When I Hear hat blocks, sometimes we want to keep results that are not
        // yet marked 'isFinal' by the speech api.  Here are some signals we use.
        // If the result from the speech api isn't very stable and we only had a fuzzy match, we don't want to use it.
        const shouldKeepFuzzyMatch = fuzzyMatchIndex !== -1 && result.stability > stabilityThreshold;
        // If the result is in the phraseList (i.e. it matches one of the 'When I Hear' blocks), we keep it.
        // This might be aggressive... but so far seems to be a good thing.
        const shouldKeepPhraseListMatch = this._phraseList.includes(normalizedTranscript);
        if (!result.isFinal && !shouldKeepPhraseListMatch && !shouldKeepFuzzyMatch) {
            return false;
        }
        return true;
    }
    /**
     * Normalizes text a bit to facilitate matching.  Lowercases, removes some punctuation and whitespace.
     * @param {string} text The text to normalzie
     * @returns {string} The normalized text.
     * @private
     */
    _normalizeText (text) {
        text = Cast.toString(text).toLowerCase();
        text = text.replace(/[.?!]/g, '');
        text = text.trim();
        return text;
    }
    /**
     * Call into diff match patch library to compute whether there is a fuzzy match.
     * @param {string} text The text to search in.
     * @param {string} pattern The pattern to look for in text.
     * @returns {number} The index of the match or -1 if there isn't one.
     */
    _computeFuzzyMatch (text, pattern) {
        // Don't bother matching if any are null.
        if (!pattern || !text) {
            return -1;
        }
        let match = -1;
        try {
            // Look for the text in the pattern starting at position 0.
            match = this._dmp.match_main(text, pattern, 0);
        } catch (e) {
            // This can happen inf the text or pattern gets too long.  If so just substring match.
            return pattern.indexOf(text);
        }
        return match;
    }
    /**
     * Processes the results we get back from the speech server.  Decides whether the results
     * are good enough to keep. If they are, resolves the 'Listen and Wait' blocks promise and cleans up.
     * @param {object} result The transcription result.
     * @private
     */
    _processTranscriptionResult (result) {
        log.info(`Got result: ${JSON.stringify(result)}`);
        const transcriptionResult = this._normalizeText(result.alternatives[0].transcript);
        // Waiting for an exact match is not satisfying.  It makes it hard to catch
        // things like homonyms or things that sound similar "let us" vs "lettuce".  Using the fuzzy matching helps
        // more aggressively match the phrases that are in the "When I hear" hat blocks.
        const phrases = this._phraseList.join(' ');
        const fuzzyMatchIndex = this._computeFuzzyMatch(phrases, transcriptionResult);
        let fuzzyMatchResult = null;
        if (fuzzyMatchIndex !== -1) {
            fuzzyMatchResult = transcriptionResult.substring(fuzzyMatchIndex, fuzzyMatchIndex + phrases.length);
        }
        // If the result isn't good enough yet, return without saving and resolving the promises.
        if (!this._shouldKeepResult(fuzzyMatchIndex, result, transcriptionResult)) {
            return;
        }
        // TODO: Decide whether this is the right thing.
        // This sets the currentUtterance (which is returned by the reporter) to the fuzzy match if we had one.
        // That means it'll often get set to a phrase from one of the 'when I hear' blocks instead of the
        // full phrase that the user said.
        if (fuzzyMatchResult) {
            this._currentUtterance = fuzzyMatchResult;
        } else {
            this._currentUtterance = transcriptionResult;
        }
        log.info(`Keeing result: ${this._currentUtterance}`);
        this._utteranceForEdgeTrigger = transcriptionResult;
        // We're done listening so resolove all the promises and reset everying so we're ready for next time.
        this._resetListening();
        // We got results so clear out the timeouts.
        if (this._speechTimeoutId) {
            clearTimeout(this._speechTimeoutId);
            this._speechTimeoutId = null;
        }
        if (this._speechFinalResponseTimeout) {
            clearTimeout(this._speechFinalResponseTimeout);
            this._speechFinalResponseTimeout = null;
        }
    }
    /**
     * Handle a message from the socket. It contains transcription results.
     * @param {MessageEvent} e The message event containing data from speech server.
     * @private
     */
    _onTranscriptionFromServer (e) {
        let result = null;
        try {
            result = JSON.parse(e.data);
        } catch (ex) {
            log.error(`Problem parsing json. continuing: ${ex}`);
            // TODO: Question - Should we kill listening and continue?
            return;
        }
        this._processTranscriptionResult(result);
    }
    /**
     * Decide whether the pattern given matches the text. Uses fuzzy matching
     * @param {string} pattern The pattern to look for.  Usually this is the transcription result
     * @param {string} text The text to look in. Usually this is the set of phrases from the when I hear blocks
     * @returns {boolean} true if there is a fuzzy match.
     * @private
     */
    _speechMatches (pattern, text) {
        pattern = this._normalizeText(pattern);
        text = this._normalizeText(text);
        const match = this._computeFuzzyMatch(text, pattern);
        return match !== -1;
    }
    /**
     * Kick off the listening process.
     * @private
     */
    _startListening () {
        // If we've already setup the context, we can resume instead of doing all the setup again.
        if (this._context) {
            this._resumeListening();
        } else {
            this._initListening();
        }
        // Force the block to timeout if we don't get any results back/the user didn't say anything.
        this._speechTimeoutId = setTimeout(this._stopTranscription, listenAndWaitBlockTimeoutMs);
    }
    /**
     * Resume listening for audio and re-open the socket to send data.
     * @private
     */
    _resumeListening () {
        this._context.resume.bind(this._context);
        this._newWebsocket();
    }
    /**
     * Does all setup to get microphone data and initializes the web socket.
     * that data to the speech server.
     * @private
     */
    _initListening () {
        this._initializeMicrophone();
        this._initScriptNode();
        this._newWebsocket();
    }
    /**
     * Initialize the audio context and connect the microphone.
     * @private
     */
    _initializeMicrophone () {
        this._context = new AudioContext();
        this._audioPromise = navigator.mediaDevices.getUserMedia({
            audio: {
                echoCancellation: true,
                channelCount: 1,
                sampleRate: {
                    ideal: 16000
                },
                sampleSize: 16
            }
        });
        const tempContext = this._context;
        this._audioPromise.then(micStream => {
            const microphone = tempContext.createMediaStreamSource(micStream);
            const analyser = tempContext.createAnalyser();
            microphone.connect(analyser);
        }).catch(e => {
            log.error(`Problem connecting to microphone:  ${e}`);
        });
    }
    /**
     * Sets up the script processor and the web socket.
     * @private
     *
     */
    _initScriptNode () {
        // Create a node that sends raw bytes across the websocket
        this._scriptNode = this._context.createScriptProcessor(4096, 1, 1);
        // Need the maximum value for 16-bit signed samples, to convert from float.
        this._scriptNode.addEventListener('audioprocess', this._processAudioCallback);
    }
    /**
     * Callback called when it is time to setup the new web socket.
     * @param {Function} resolve - function to call when the web socket opens succesfully.
     * @param {Function} reject - function to call if opening the web socket fails.
     */
    _newSocketCallback (resolve, reject) {
        this._socket = new WebSocket(serverURL);
        this._socket.addEventListener('open', resolve);
        this._socket.addEventListener('error', reject);
    }
    /**
     * Callback called once we've initially established the web socket is open and working.
     * Sets up the callback for subsequent messages (i.e. transcription results)  and
     * connects to the script node to get data.
     * @private
     */
    _socketMessageCallback () {
        this._socket.addEventListener('message', this._onTranscriptionFromServer);
        this._startByteStream();
    }
    /**
     * Sets up callback for when socket and audio are initialized.
     * @private
     */
    _newWebsocket () {
        const websocketPromise = new Promise(this._newSocketCallback);
        Promise.all([this._audioPromise, websocketPromise]).then(
            this._setupSocketCallback)
            .catch(e => {
                log.error(`Problem with setup:  ${e}`);
            });
    }
    /**
     * Callback to handle initial setting up of a socket.
     * Currently we send a setup message (only contains sample rate) but might
     * be useful to send more data so we can do quota stuff.
     * @param {Array} values The
     */
    _setupSocketCallback (values) {
        this._micStream = values[0];
        this._socket = values[1].target;
        this._socket.addEventListener('error', e => {
            log.error(`Error from web socket: ${e}`);
        });
        // Send the initial configuration message. When the server acknowledges
        // it, start streaming the audio bytes to the server and listening for
        // transcriptions.
        this._socket.addEventListener('message', this._socketMessageCallback, {once: true});
        this._socket.send(JSON.stringify(
            {
                sampleRate: this._context.sampleRate,
                phrases: this._phraseList
            }
        ));
    }
    /**
     * Do setup so we can start streaming mic data.
     * @private
     */
    _startByteStream () {
        // Hook up the scriptNode to the mic
        this._sourceNode = this._context.createMediaStreamSource(this._micStream);
        this._sourceNode.connect(this._scriptNode);
        this._scriptNode.connect(this._context.destination);
    }
    /**
     * Called when we have data from the microphone. Takes that data and ships
     * it off to the speech server for transcription.
     * @param {audioProcessingEvent} e The event with audio data in it.
     * @private
     */
    _processAudioCallback (e) {
        if (this._socket.readyState === WebSocket.CLOSED ||
        this._socket.readyState === WebSocket.CLOSING) {
            log.error(`Not sending data because not in ready state. State: ${this._socket.readyState}`);
            // TODO: should we stop trying and reset state so it might work next time?
            return;
        }
        const MAX_INT = Math.pow(2, 16 - 1) - 1;
        const floatSamples = e.inputBuffer.getChannelData(0);
        // The samples are floats in range [-1, 1]. Convert to 16-bit signed
        // integer.
        this._socket.send(Int16Array.from(floatSamples.map(n => n * MAX_INT)));
    }
    /**
     * The key to load & store a target's speech-related state.
     * @type {string}
     */
    static get STATE_KEY () {
        return 'Scratch.speech';
    }
    /**
     * @returns {object} Metadata for this extension and its blocks.
     */
    getInfo () {
        return {
            id: 'speech',
            name: 'Google Speech',
            menuIconURI: menuIconURI,
            blockIconURI: iconURI,
            blocks: [
                {
                    opcode: 'listenAndWait',
                    text: 'Listen and Wait',
                    blockType: BlockType.COMMAND
                },
                {
                    opcode: 'whenIHearHat',
                    text: 'When I hear [PHRASE]',
                    blockType: BlockType.HAT,
                    arguments: {
                        PHRASE: {
                            type: ArgumentType.STRING,
                            defaultValue: 'cat'
                        }
                    }
                },
                {
                    opcode: 'getSpeech',
                    text: 'speech',
                    blockType: BlockType.REPORTER
                }
            ]
        };
    }
    /**
     * Start the listening process if it isn't already in progress, playing a sound to indicate
     * when it starts and stops.
     * @return {Promise} A promise that will resolve when listening is complete.
     */
    listenAndWait () {
        // TODO: Look into the timing of when to start the sound.  There currently seems
        // to be some lag between when the sound starts and when the socket message
        // callback is received. Perhaps we should play the sound after the socket is setup.
        // TODO: Question - Should we only play the sound if listening isn't already in progress?
        return this._playSound(this._startSoundBuffer).then(() => {
            this._phraseList = this._scanBlocksForPhraseList();
            this._utteranceForEdgeTrigger = '';
            const speechPromise = new Promise(resolve => {
                const listeningInProgress = this._speechPromises.length > 0;
                this._speechPromises.push(resolve);
                if (!listeningInProgress) {
                    this._startListening();
                }
            });
            return speechPromise.then(() => this._playSound(this._endSoundBuffer));
        });
    }
    /**
     * An edge triggered hat block to listen for a specific phrase.
     * @param {object} args - the block arguments.
     * @return {boolean} true if the phrase matches what was transcribed.
     */
    whenIHearHat (args) {
        return this._speechMatches(args.PHRASE, this._utteranceForEdgeTrigger);
    }
    /**
     * Reporter for the last heard phrase/utterance.
     * @return {string} The lastest thing we heard from a listen and wait block.
     */
    getSpeech () {
        return this._currentUtterance;
    }
 }
 module.exports = Scratch3SpeechBlocks;
--- a/src/extensions/scratch3_speech/manifest.js
+++ b/src/extensions/scratch3_speech/manifest.js
@ -0,0 +1,4 @@
 module.exports = {
    'speech-rec-start.mp3': require('!buffer-loader!./assets/speech-rec-start.mp3'),
    'speech-rec-end.mp3': require('!buffer-loader!./assets/speech-rec-end.mp3')
 };