scratch-vm/src/extensions/scratch3_text2speech/index.js

649 lines
24 KiB
JavaScript
Raw Normal View History

const formatMessage = require('format-message');
const nets = require('nets');
const ArgumentType = require('../../extension-support/argument-type');
const BlockType = require('../../extension-support/block-type');
const Cast = require('../../util/cast');
const MathUtil = require('../../util/math-util');
const Clone = require('../../util/clone');
const log = require('../../util/log');
2018-10-10 14:09:34 -04:00
/**
* Icon svg to be displayed in the blocks category menu, encoded as a data URI.
* @type {string}
*/
// eslint-disable-next-line max-len
const menuIconURI = '';
/**
* Icon svg to be displayed at the left edge of each extension block, encoded as a data URI.
* @type {string}
*/
// eslint-disable-next-line max-len
const blockIconURI = '';
/**
* The url of the synthesis server.
* @type {string}
*/
const SERVER_HOST = 'https://synthesis-service.scratch.mit.edu';
/**
* How long to wait in ms before timing out requests to synthesis server.
* @type {int}
*/
const SERVER_TIMEOUT = 10000; // 10 seconds
2018-09-05 18:36:01 -04:00
/**
* Volume for playback of speech sounds, as a percentage.
* @type {number}
*/
const SPEECH_VOLUME = 250;
/**
* An id for one of the voices.
*/
const ALTO_ID = 'ALTO';
/**
* An id for one of the voices.
*/
const TENOR_ID = 'TENOR';
/**
* An id for one of the voices.
*/
const SQUEAK_ID = 'SQUEAK';
/**
* An id for one of the voices.
*/
const GIANT_ID = 'GIANT';
/**
* An id for one of the voices.
*/
const KITTEN_ID = 'KITTEN';
/**
* Language ids.
*/
const ENGLISH_ID = 'ENGLISH';
const CHINESE_ID = 'CHINESE';
const TURKISH_ID = 'TURKISH';
const JAPANESE_ID = 'JAPANESE';
/**
* Playback rate for the tenor voice, for cases where we have only a female gender voice.
*/
const FEMALE_TENOR_RATE = 0.89; // -2 semitones
/**
* Playback rate for the giant voice, for cases where we have only a female gender voice.
*/
const FEMALE_GIANT_RATE = 0.79; // -4 semitones
/**
* Class for the text2speech blocks.
* @constructor
*/
class Scratch3Text2SpeechBlocks {
constructor (runtime) {
/**
* The runtime instantiating this block package.
* @type {Runtime}
*/
this.runtime = runtime;
2018-09-04 21:12:45 -04:00
/**
* Map of soundPlayers by sound id.
* @type {Map<string, SoundPlayer>}
*/
this._soundPlayers = new Map();
this._stopAllSpeech = this._stopAllSpeech.bind(this);
if (this.runtime) {
this.runtime.on('PROJECT_STOP_ALL', this._stopAllSpeech);
}
this._onTargetCreated = this._onTargetCreated.bind(this);
if (this.runtime) {
runtime.on('targetWasCreated', this._onTargetCreated);
}
}
/**
* An object with info for each voice.
*/
get VOICE_INFO () {
return {
[ALTO_ID]: {
name: formatMessage({
id: 'text2speech.alto',
default: 'alto',
description: 'Name for a voice with ambiguous gender.'
}),
gender: 'female',
playbackRate: 1
},
[TENOR_ID]: {
name: formatMessage({
id: 'text2speech.tenor',
default: 'tenor',
description: 'Name for a voice with ambiguous gender.'
}),
gender: 'male',
playbackRate: 1
},
[SQUEAK_ID]: {
name: formatMessage({
id: 'text2speech.squeak',
default: 'squeak',
description: 'Name for a funny voice with a high pitch.'
}),
gender: 'female',
playbackRate: 1.19 // +3 semitones
},
[GIANT_ID]: {
name: formatMessage({
id: 'text2speech.giant',
default: 'giant',
description: 'Name for a funny voice with a low pitch.'
}),
gender: 'male',
playbackRate: 0.84 // -3 semitones
},
[KITTEN_ID]: {
name: formatMessage({
id: 'text2speech.kitten',
default: 'kitten',
description: 'A baby cat.'
}),
gender: 'female',
playbackRate: 1.41 // +6 semitones
}
};
}
2018-09-24 17:16:10 -04:00
/**
* An object with language names mapped to their language codes.
*/
get LANGUAGE_INFO () {
return {
[CHINESE_ID]: {
name: 'Chinese (Mandarin)',
singleGender: true,
locales: ['zh-cn', 'zh-tw'],
pollyLocale: 'cmn-CN'
},
[ENGLISH_ID]: {
name: 'English',
locales: ['en'],
pollyLocale: 'en-US'
},
[JAPANESE_ID]: {
name: 'Japanese',
locales: ['ja', 'ja-Hira'],
pollyLocale: 'ja-JP'
},
[TURKISH_ID]: {
name: 'Turkish',
singleGender: true,
locales: ['tr'],
pollyLocale: 'tr-TR'
}
/*
2019-02-07 17:16:49 -05:00
'cy': {
name: 'Welsh',
singleGender: true
},
'da': {
name: 'Danish'
},
'nl': {
name: 'Dutch'
},
'en': {
name: 'English'
},
'fr': {
name: 'French'
},
'de': {
name: 'German'
},
'hi': {
name: 'Hindi',
singleGender: true
},
'is': {
name: 'Icelandic'
},
'it': {
name: 'Italian'
},
'ja': {
name: 'Japanese'
},
'ko': {
name: 'Korean',
singleGender: true
},
'no': {
name: 'Norwegian',
singleGender: true
},
'pl': {
name: 'Polish'
},
'pt-br': {
name: 'Portuguese (Brazilian)'
},
'pt': {
name: 'Portuguese (European)'
},
2019-02-08 16:55:05 -05:00
'ro': {
name: 'Romanian',
singleGender: true
},
'ru': {
name: 'Russian'
},
'es': {
name: 'Spanish (European)'
},
2019-02-08 16:55:05 -05:00
'es-419': {
name: 'Spanish (Latin American)'
},
2019-02-07 17:16:49 -05:00
'sv': {
name: 'Swedish',
singleGender: true
},
'tr': {
name: 'Turkish',
singleGender: true
},
'zh-cn': {
name: 'Chinese (Simplified)',
singleGender: true
},
'zh-tw': {
name: 'Chinese (Traditional)',
singleGender: true
}
*/
};
}
/**
* This is a temporary adapter to convert Scratch locale codes to Amazon polly's locale codes.
* @todo remove this once the speech synthesis server can perform this conversion
* @param {string} locale the Scratch locale to convert.
* @return {string} the Amazon polly locale.
*/
// localeToPolly (locale) {
// const pollyLocales = {
// 'cy': 'cy-GB', // Welsh
// 'da': 'da-DK', // Danish
// 'nl': 'nl-NL', // Dutch
// 'en': 'en-US', // English
// 'fr': 'fr-FR', // French
// 'de': 'de-DE', // German
// 'hi': 'en-IN', // Hindi
// 'is': 'is-IS', // Icelandic
// 'it': 'it-IT', // Italian
// 'ja': 'ja-JP', // Japanese
// 'ko': 'ko-KR', // Korean
// 'no': 'nb-NO', // Norwegian
// 'pl': 'pl-PL', // Polish
// 'pt-br': 'pt-BR', // Portuguese (Brazilian)
// 'pt': 'pt-PT', // Portuguese (European)
// 'ro': 'ro-RO', // Romanian
// 'ru': 'ru-RU', // Russian
// 'es': 'es-ES', // Spanish (European)
// 'es-419': 'es-US', // Spanish (Latin American)
// 'sv': 'sv-SE', // Swedish
// 'tr': 'tr-TR', // Turkish
// 'zh-cn': 'cmn-CN', // Chinese (simplified) -> Mandarin
// 'zh-tw': 'cmn-CN' // Chinese (traditional) -> Mandarin
// };
// let converted = 'en-US';
// if (pollyLocales[locale]) {
// converted = pollyLocales[locale];
// }
// return converted;
// }
2018-09-24 17:16:10 -04:00
/**
* The key to load & store a target's text2speech state.
* @return {string} The key.
*/
static get STATE_KEY () {
return 'Scratch.text2speech';
}
/**
* The default state, to be used when a target has no existing state.
* @type {Text2SpeechState}
*/
static get DEFAULT_TEXT2SPEECH_STATE () {
return {
voiceId: ALTO_ID
};
}
/**
* A default language to use for speech synthesis.
* @type {string}
*/
get DEFAULT_LANGUAGE () {
return 'en';
}
/**
* @param {Target} target - collect state for this target.
* @returns {Text2SpeechState} the mutable state associated with that target. This will be created if necessary.
* @private
*/
_getState (target) {
let state = target.getCustomState(Scratch3Text2SpeechBlocks.STATE_KEY);
if (!state) {
state = Clone.simple(Scratch3Text2SpeechBlocks.DEFAULT_TEXT2SPEECH_STATE);
target.setCustomState(Scratch3Text2SpeechBlocks.STATE_KEY, state);
}
return state;
}
/**
* When a Target is cloned, clone the state.
* @param {Target} newTarget - the newly created target.
* @param {Target} [sourceTarget] - the target used as a source for the new clone, if any.
* @listens Runtime#event:targetWasCreated
* @private
*/
_onTargetCreated (newTarget, sourceTarget) {
if (sourceTarget) {
const state = sourceTarget.getCustomState(Scratch3Text2SpeechBlocks.STATE_KEY);
if (state) {
newTarget.setCustomState(Scratch3Text2SpeechBlocks.STATE_KEY, Clone.simple(state));
}
}
}
/**
* @returns {object} metadata for this extension and its blocks.
*/
getInfo () {
// Only localize the default input to the "speak" block if we are in a
// supported language.
let defaultTextToSpeak = 'hello';
if (this.isSupportedLanguage(this.getEditorLanguage())) {
defaultTextToSpeak = formatMessage({
id: 'text2speech.defaultTextToSpeak',
default: 'hello',
description: 'hello: the default text to speak'
});
}
return {
id: 'text2speech',
name: 'Text to Speech',
2018-10-10 14:09:34 -04:00
blockIconURI: blockIconURI,
menuIconURI: menuIconURI,
blocks: [
{
opcode: 'speakAndWait',
text: formatMessage({
id: 'text2speech.speakAndWaitBlock',
default: 'speak [WORDS]',
description: 'Speak some words.'
}),
blockType: BlockType.COMMAND,
arguments: {
WORDS: {
type: ArgumentType.STRING,
defaultValue: defaultTextToSpeak
}
}
},
{
opcode: 'setVoice',
text: formatMessage({
id: 'text2speech.setVoiceBlock',
default: 'set voice to [VOICE]',
description: 'Set the voice for speech synthesis.'
}),
blockType: BlockType.COMMAND,
arguments: {
VOICE: {
type: ArgumentType.STRING,
menu: 'voices',
defaultValue: ALTO_ID
}
}
2018-09-24 17:16:10 -04:00
},
{
opcode: 'setLanguage',
text: formatMessage({
id: 'text2speech.setLanguageBlock',
default: 'set language to [LANGUAGE]',
description: 'Set the language for speech synthesis.'
}),
blockType: BlockType.COMMAND,
arguments: {
LANGUAGE: {
type: ArgumentType.STRING,
menu: 'languages',
defaultValue: this.getCurrentLanguage()
2018-09-24 17:16:10 -04:00
}
}
}
],
menus: {
2018-09-24 17:16:10 -04:00
voices: this.getVoiceMenu(),
languages: this.getLanguageMenu()
}
};
}
/**
* Get the language code currently set in the editor, or fall back to the
* browser locale.
* @return {string} the language code.
*/
getEditorLanguage () {
return formatMessage.setup().locale ||
navigator.language || navigator.userLanguage || this.DEFAULT_LANGUAGE;
}
/**
* Get the language for speech synthesis.
* @returns {string} the language code.
*/
getCurrentLanguage () {
const stage = this.runtime.getTargetForStage();
if (!stage) return this.DEFAULT_LANGUAGE;
// If no language has been set, set it to the editor locale (or default).
if (!stage.textToSpeechLanguage) {
this.setCurrentLanguage(this.getEditorLanguage());
}
return stage.textToSpeechLanguage;
}
/**
* Set the language for speech synthesis.
* It is stored in the stage so it can be saved and loaded with the project.
* @param {string} languageCode a locale code to set.
*/
setCurrentLanguage (languageCode) {
const stage = this.runtime.getTargetForStage();
if (!stage) return;
// Only set the language if it is in the list.
if (this.isSupportedLanguage(languageCode)) {
stage.textToSpeechLanguage = languageCode;
}
// If the language is null, set it to the default language.
// This can occur e.g. if the extension was loaded with the editor
// set to a language that is not in the list.
if (!stage.textToSpeechLanguage) {
stage.textToSpeechLanguage = this.DEFAULT_LANGUAGE;
}
}
/**
* Check if a language code is in the list of supported languages for the
* speech synthesis service.
* @param {string} languageCode the language code to check.
* @returns {boolean} true if the language code is supported.
*/
isSupportedLanguage (languageCode) {
return Object.keys(this.LANGUAGE_INFO).includes(languageCode);
}
/**
* Get the menu of voices for the "set voice" block.
* @return {array} the text and value for each menu item.
*/
getVoiceMenu () {
return Object.keys(this.VOICE_INFO).map(voiceId => ({
text: this.VOICE_INFO[voiceId].name,
value: voiceId
}));
}
2018-09-24 17:16:10 -04:00
/**
* Get the menu of languages for the "set language" block.
* @return {array} the text and value for each menu item.
*/
getLanguageMenu () {
return Object.keys(this.LANGUAGE_INFO).map(key => ({
text: this.LANGUAGE_INFO[key].name,
value: key
2018-09-24 17:16:10 -04:00
}));
}
/**
* Set the voice for speech synthesis for this sprite.
* @param {object} args Block arguments
* @param {object} util Utility object provided by the runtime.
*/
setVoice (args, util) {
const state = this._getState(util.target);
let voice = args.VOICE;
// If the arg is a dropped number, treat it as a voice index
let voiceNum = parseInt(voice, 10);
if (!isNaN(voiceNum)) {
voiceNum -= 1; // Treat dropped args as one-indexed
voiceNum = MathUtil.wrapClamp(voiceNum, 0, Object.keys(this.VOICE_INFO).length - 1);
voice = Object.keys(this.VOICE_INFO)[voiceNum];
}
// Only set the voice if the arg is a valid voice id.
if (Object.keys(this.VOICE_INFO).includes(voice)) {
state.voiceId = voice;
}
}
2018-09-24 17:16:10 -04:00
/**
* Set the language for speech synthesis.
* @param {object} args Block arguments
*/
setLanguage (args) {
this.setCurrentLanguage(args.LANGUAGE);
2018-09-24 17:16:10 -04:00
}
2018-09-04 21:12:45 -04:00
/**
* Stop all currently playing speech sounds.
*/
_stopAllSpeech () {
this._soundPlayers.forEach(player => {
player.stop();
});
}
/**
* Convert the provided text into a sound file and then play the file.
* @param {object} args Block arguments
* @param {object} util Utility object provided by the runtime.
* @return {Promise} A promise that resolves after playing the sound
*/
speakAndWait (args, util) {
// Cast input to string
let words = Cast.toString(args.WORDS);
2018-12-22 17:59:57 +09:00
let locale = this.localeToPolly(this.getCurrentLanguage());
const state = this._getState(util.target);
let gender = this.VOICE_INFO[state.voiceId].gender;
let playbackRate = this.VOICE_INFO[state.voiceId].playbackRate;
if (this.LANGUAGE_INFO[this.getCurrentLanguage()].singleGender) {
gender = 'female';
if (state.voiceId === TENOR_ID) {
playbackRate = FEMALE_TENOR_RATE;
}
if (state.voiceId === GIANT_ID) {
playbackRate = FEMALE_GIANT_RATE;
}
}
if (state.voiceId === KITTEN_ID) {
2018-12-14 17:28:45 -05:00
words = words.replace(/\S+/g, 'meow');
2018-12-22 17:59:57 +09:00
locale = 'en-US';
}
// Build up URL
let path = `${SERVER_HOST}/synth`;
2018-12-22 17:59:57 +09:00
path += `?locale=${locale}`;
path += `&gender=${gender}`;
2018-12-05 12:35:17 -05:00
path += `&text=${encodeURIComponent(words.substring(0, 128))}`;
// Perform HTTP request to get audio file
return new Promise(resolve => {
nets({
url: path,
timeout: SERVER_TIMEOUT
}, (err, res, body) => {
if (err) {
log.warn(err);
return resolve();
}
if (res.statusCode !== 200) {
log.warn(res.statusCode);
return resolve();
}
// Play the sound
const sound = {
data: {
buffer: body.buffer
}
};
this.runtime.audioEngine.decodeSoundPlayer(sound).then(soundPlayer => {
2018-09-04 21:12:45 -04:00
this._soundPlayers.set(soundPlayer.id, soundPlayer);
soundPlayer.setPlaybackRate(playbackRate);
// Increase the volume
const engine = this.runtime.audioEngine;
const chain = engine.createEffectChain();
2018-09-05 18:36:01 -04:00
chain.set('volume', SPEECH_VOLUME);
soundPlayer.connect(chain);
2018-08-31 10:47:36 -04:00
2018-09-04 21:12:45 -04:00
soundPlayer.play();
soundPlayer.on('stop', () => {
this._soundPlayers.delete(soundPlayer.id);
resolve();
});
});
});
});
}
}
module.exports = Scratch3Text2SpeechBlocks;