feat: add IBM Speech to Text

pull/73/head
dessant 6 years ago
parent f51c3b8d2a
commit 58f9106f41

@ -29,6 +29,16 @@
"description": "Value of the option."
},
"optionValue_speechService_ibmSpeechApi": {
"message": "IBM Speech to Text",
"description": "Value of the option."
},
"inputLabel_url": {
"message": "URL",
"description": "Placeholder of the input."
},
"inputLabel_apiKey": {
"message": "API key",
"description": "Placeholder of the input."
@ -70,6 +80,12 @@
"description": "Error message."
},
"error_missingApiUrl": {
"message":
"API URL missing. Visit the options page to configure the service.",
"description": "Error message."
},
"error_missingApiKey": {
"message":
"API key missing. Visit the options page to configure the service.",

@ -17,6 +17,18 @@
:label="getText('inputLabel_apiKey')">
</v-textfield>
</div>
<div class="option text-field"
v-if="options.speechService === 'ibmSpeechApi'">
<v-textfield v-model="options.ibmSpeechApiUrl"
:label="getText('inputLabel_url')">
</v-textfield>
</div>
<div class="option text-field"
v-if="options.speechService === 'ibmSpeechApi'">
<v-textfield v-model="options.ibmSpeechApiKey"
:label="getText('inputLabel_apiKey')">
</v-textfield>
</div>
</div>
</div>
</div>
@ -42,12 +54,18 @@ export default {
dataLoaded: false,
selectOptions: getOptionLabels({
speechService: ['googleSpeechApiDemo', 'googleSpeechApi']
speechService: [
'googleSpeechApiDemo',
'googleSpeechApi',
'ibmSpeechApi'
]
}),
options: {
speechService: '',
googleSpeechApiKey: ''
googleSpeechApiKey: '',
ibmSpeechApiUrl: '',
ibmSpeechApiKey: ''
}
};
},
@ -62,6 +80,13 @@ export default {
for (const option of Object.keys(this.options)) {
this.options[option] = options[option];
this.$watch(`options.${option}`, async function(value) {
if (
['googleSpeechApiKey', 'ibmSpeechApiUrl', 'ibmSpeechApiKey'].includes(
option
)
) {
value = value.trim();
}
await storage.set({[option]: value}, 'sync');
});
}

@ -3,7 +3,10 @@ import audioBufferToWav from 'audiobuffer-to-wav';
import storage from 'storage/storage';
import {getText, waitForElement, arrayBufferToBase64} from 'utils/common';
import {captchaGoogleSpeechApiLangCodes} from 'utils/data';
import {
captchaGoogleSpeechApiLangCodes,
captchaIbmSpeechApiLangCodes
} from 'utils/data';
let solverRunning = false;
@ -61,9 +64,7 @@ async function prepareAudio(audio) {
// discard 1 second noise from beginning/end
source.start(0, 1, data.duration - 2);
return arrayBufferToBase64(
audioBufferToWav(await offlineCtx.startRendering())
);
return audioBufferToWav(await offlineCtx.startRendering());
}
function dispatchEnter(node) {
@ -116,6 +117,7 @@ async function solve() {
audioUrl = result.audioUrl;
}
const lang = document.documentElement.lang;
const audioRsp = await fetch(audioUrl, {referrer: ''});
const audioContent = await prepareAudio(await audioRsp.arrayBuffer());
@ -143,13 +145,11 @@ async function solve() {
const data = {
audio: {
content: audioContent
content: arrayBufferToBase64(audioContent)
},
config: {
encoding: 'LINEAR16',
languageCode:
captchaGoogleSpeechApiLangCodes[document.documentElement.lang] ||
'en-US',
languageCode: captchaGoogleSpeechApiLangCodes[lang] || 'en-US',
model: 'default',
sampleRateHertz: 16000
}
@ -161,9 +161,54 @@ async function solve() {
body: JSON.stringify(data)
});
if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}
const results = (await rsp.json()).results;
if (results) {
solution = results[0].alternatives[0].transcript;
solution = results[0].alternatives[0].transcript.trim();
}
}
if (speechService === 'ibmSpeechApi') {
const {
ibmSpeechApiUrl: apiUrl,
ibmSpeechApiKey: apiKey
} = await storage.get(['ibmSpeechApiUrl', 'ibmSpeechApiKey'], 'sync');
if (!apiUrl) {
browser.runtime.sendMessage({
id: 'notification',
messageId: 'error_missingApiUrl'
});
return;
}
if (!apiKey) {
browser.runtime.sendMessage({
id: 'notification',
messageId: 'error_missingApiKey'
});
return;
}
const model = captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel';
const rsp = await fetch(`${apiUrl}?model=${model}&profanity_filter=false`, {
referrer: '',
mode: 'cors',
method: 'POST',
headers: {
Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey)
},
body: new Blob([audioContent], {type: 'audio/wav'})
});
if (rsp.status !== 200) {
throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
}
const results = (await rsp.json()).results;
if (results && results.length) {
solution = results[0].alternatives[0].transcript.trim();
}
}

@ -0,0 +1,28 @@
import browser from 'webextension-polyfill';
const message = 'Add IBM Speech to Text';
const revision = 'ONiJBs00o';
const downRevision = 'UoT3kGyBH';
const storage = browser.storage.local;
async function upgrade() {
const changes = {
ibmSpeechApiUrl: '',
ibmSpeechApiKey: ''
};
changes.storageVersion = revision;
return storage.set(changes);
}
async function downgrade() {
const changes = {};
await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']);
changes.storageVersion = downRevision;
return storage.set(changes);
}
export {message, revision, upgrade, downgrade};

@ -1 +1 @@
{"versions": ["UoT3kGyBH"]}
{"versions": ["UoT3kGyBH", "ONiJBs00o"]}

@ -0,0 +1,28 @@
import browser from 'webextension-polyfill';
const message = 'Add IBM Speech to Text';
const revision = 'ONiJBs00o';
const downRevision = 'UoT3kGyBH';
const storage = browser.storage.sync;
async function upgrade() {
const changes = {
ibmSpeechApiUrl: '',
ibmSpeechApiKey: ''
};
changes.storageVersion = revision;
return storage.set(changes);
}
async function downgrade() {
const changes = {};
await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']);
changes.storageVersion = downRevision;
return storage.set(changes);
}
export {message, revision, upgrade, downgrade};

@ -1 +1 @@
{"versions": ["UoT3kGyBH"]}
{"versions": ["UoT3kGyBH", "ONiJBs00o"]}

@ -1,6 +1,11 @@
import browser from 'webextension-polyfill';
const optionKeys = ['speechService', 'googleSpeechApiKey'];
const optionKeys = [
'speechService',
'googleSpeechApiKey',
'ibmSpeechApiUrl',
'ibmSpeechApiKey'
];
// https://developers.google.com/recaptcha/docs/language
// https://cloud.google.com/speech-to-text/docs/languages
@ -77,4 +82,29 @@ const captchaGoogleSpeechApiLangCodes = {
zu: 'zu-ZA'
};
export {optionKeys, captchaGoogleSpeechApiLangCodes};
// https://cloud.ibm.com/apidocs/speech-to-text#recognize-audio
const captchaIbmSpeechApiLangCodes = {
ar: 'ar-AR_BroadbandModel',
'zh-CN': 'zh-CN_BroadbandModel',
'zh-TW': 'zh-CN_BroadbandModel',
'en-GB': 'en-GB_BroadbandModel',
en: 'en-US_BroadbandModel',
fr: 'fr-FR_BroadbandModel',
'fr-CA': 'fr-FR_BroadbandModel',
de: 'de-DE_BroadbandModel',
'de-AT': 'de-DE_BroadbandModel',
'de-CH': 'de-DE_BroadbandModel',
ja: 'ja-JP_BroadbandModel',
ko: 'ko-KR_BroadbandModel',
pt: 'pt-BR_BroadbandModel',
'pt-BR': 'pt-BR_BroadbandModel',
'pt-PT': 'pt-BR_BroadbandModel',
es: 'es-ES_BroadbandModel',
'es-419': 'es-ES_BroadbandModel'
};
export {
optionKeys,
captchaGoogleSpeechApiLangCodes,
captchaIbmSpeechApiLangCodes
};

Loading…
Cancel
Save