feat: add IBM Speech to Text

6 years ago · 58f9106f41
parent f51c3b8d2a
commit 58f9106f41
8 changed files with 187 additions and 15 deletions
--- a/src/_locales/en/messages.json
+++ b/src/_locales/en/messages.json
@ -29,6 +29,16 @@
    "description": "Value of the option."
  },

+  "optionValue_speechService_ibmSpeechApi": {
+    "message": "IBM Speech to Text",
+    "description": "Value of the option."
+  },
+
+  "inputLabel_url": {
+    "message": "URL",
+    "description": "Placeholder of the input."
+  },
+
  "inputLabel_apiKey": {
    "message": "API key",
    "description": "Placeholder of the input."
@ -70,6 +80,12 @@
    "description": "Error message."
  },

+  "error_missingApiUrl": {
+    "message":
+      "API URL missing. Visit the options page to configure the service.",
+    "description": "Error message."
+  },
+
  "error_missingApiKey": {
    "message":
      "API key missing. Visit the options page to configure the service.",
--- a/src/options/App.vue
+++ b/src/options/App.vue
@ -17,6 +17,18 @@
            :label="getText('inputLabel_apiKey')">
        </v-textfield>
      </div>
+      <div class="option text-field"
+          v-if="options.speechService === 'ibmSpeechApi'">
+        <v-textfield v-model="options.ibmSpeechApiUrl"
+            :label="getText('inputLabel_url')">
+        </v-textfield>
+      </div>
+      <div class="option text-field"
+          v-if="options.speechService === 'ibmSpeechApi'">
+        <v-textfield v-model="options.ibmSpeechApiKey"
+            :label="getText('inputLabel_apiKey')">
+        </v-textfield>
+      </div>
    </div>
  </div>
 </div>
@ -42,12 +54,18 @@ export default {
      dataLoaded: false,

      selectOptions: getOptionLabels({
-        speechService: ['googleSpeechApiDemo', 'googleSpeechApi']
+        speechService: [
+          'googleSpeechApiDemo',
+          'googleSpeechApi',
+          'ibmSpeechApi'
+        ]
      }),

      options: {
        speechService: '',
-        googleSpeechApiKey: ''
+        googleSpeechApiKey: '',
+        ibmSpeechApiUrl: '',
+        ibmSpeechApiKey: ''
      }
    };
  },
@ -62,6 +80,13 @@ export default {
    for (const option of Object.keys(this.options)) {
      this.options[option] = options[option];
      this.$watch(`options.${option}`, async function(value) {
+        if (
+          ['googleSpeechApiKey', 'ibmSpeechApiUrl', 'ibmSpeechApiKey'].includes(
+            option
+          )
+        ) {
+          value = value.trim();
+        }
        await storage.set({[option]: value}, 'sync');
      });
    }
--- a/src/solve/main.js
+++ b/src/solve/main.js
@ -3,7 +3,10 @@ import audioBufferToWav from 'audiobuffer-to-wav';

 import storage from 'storage/storage';
 import {getText, waitForElement, arrayBufferToBase64} from 'utils/common';
-import {captchaGoogleSpeechApiLangCodes} from 'utils/data';
+import {
+  captchaGoogleSpeechApiLangCodes,
+  captchaIbmSpeechApiLangCodes
+} from 'utils/data';

 let solverRunning = false;

@ -61,9 +64,7 @@ async function prepareAudio(audio) {
  // discard 1 second noise from beginning/end
  source.start(0, 1, data.duration - 2);

-  return arrayBufferToBase64(
-    audioBufferToWav(await offlineCtx.startRendering())
-  );
+  return audioBufferToWav(await offlineCtx.startRendering());
 }

 function dispatchEnter(node) {
@ -116,6 +117,7 @@ async function solve() {
    audioUrl = result.audioUrl;
  }

+  const lang = document.documentElement.lang;
  const audioRsp = await fetch(audioUrl, {referrer: ''});
  const audioContent = await prepareAudio(await audioRsp.arrayBuffer());

@ -143,13 +145,11 @@ async function solve() {

    const data = {
      audio: {
-        content: audioContent
+        content: arrayBufferToBase64(audioContent)
      },
      config: {
        encoding: 'LINEAR16',
-        languageCode:
-          captchaGoogleSpeechApiLangCodes[document.documentElement.lang] ||
-          'en-US',
+        languageCode: captchaGoogleSpeechApiLangCodes[lang] || 'en-US',
        model: 'default',
        sampleRateHertz: 16000
      }
@ -161,9 +161,54 @@ async function solve() {
      body: JSON.stringify(data)
    });

+    if (rsp.status !== 200) {
+      throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
+    }
+
    const results = (await rsp.json()).results;
    if (results) {
-      solution = results[0].alternatives[0].transcript;
+      solution = results[0].alternatives[0].transcript.trim();
+    }
+  }
+
+  if (speechService === 'ibmSpeechApi') {
+    const {
+      ibmSpeechApiUrl: apiUrl,
+      ibmSpeechApiKey: apiKey
+    } = await storage.get(['ibmSpeechApiUrl', 'ibmSpeechApiKey'], 'sync');
+    if (!apiUrl) {
+      browser.runtime.sendMessage({
+        id: 'notification',
+        messageId: 'error_missingApiUrl'
+      });
+      return;
+    }
+    if (!apiKey) {
+      browser.runtime.sendMessage({
+        id: 'notification',
+        messageId: 'error_missingApiKey'
+      });
+      return;
+    }
+    const model = captchaIbmSpeechApiLangCodes[lang] || 'en-US_BroadbandModel';
+
+    const rsp = await fetch(`${apiUrl}?model=${model}&profanity_filter=false`, {
+      referrer: '',
+      mode: 'cors',
+      method: 'POST',
+      headers: {
+        Authorization: 'Basic ' + window.btoa('apiKey:' + apiKey)
+      },
+      body: new Blob([audioContent], {type: 'audio/wav'})
+    });
+
+    if (rsp.status !== 200) {
+      throw new Error(`API response: ${rsp.status}, ${await rsp.text()}`);
+    }
+
+    const results = (await rsp.json()).results;
+    if (results && results.length) {
+      solution = results[0].alternatives[0].transcript.trim();
    }
  }

--- a/src/storage/versions/local/ONiJBs00o.js
+++ b/src/storage/versions/local/ONiJBs00o.js
@ -0,0 +1,28 @@
+import browser from 'webextension-polyfill';
+
+const message = 'Add IBM Speech to Text';
+
+const revision = 'ONiJBs00o';
+const downRevision = 'UoT3kGyBH';
+
+const storage = browser.storage.local;
+
+async function upgrade() {
+  const changes = {
+    ibmSpeechApiUrl: '',
+    ibmSpeechApiKey: ''
+  };
+
+  changes.storageVersion = revision;
+  return storage.set(changes);
+}
+
+async function downgrade() {
+  const changes = {};
+  await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']);
+
+  changes.storageVersion = downRevision;
+  return storage.set(changes);
+}
+
+export {message, revision, upgrade, downgrade};
--- a/src/storage/versions/local/versions.json
+++ b/src/storage/versions/local/versions.json
@ -1 +1 @@
-{"versions": ["UoT3kGyBH"]}
+{"versions": ["UoT3kGyBH", "ONiJBs00o"]}
--- a/src/storage/versions/sync/ONiJBs00o.js
+++ b/src/storage/versions/sync/ONiJBs00o.js
@ -0,0 +1,28 @@
+import browser from 'webextension-polyfill';
+
+const message = 'Add IBM Speech to Text';
+
+const revision = 'ONiJBs00o';
+const downRevision = 'UoT3kGyBH';
+
+const storage = browser.storage.sync;
+
+async function upgrade() {
+  const changes = {
+    ibmSpeechApiUrl: '',
+    ibmSpeechApiKey: ''
+  };
+
+  changes.storageVersion = revision;
+  return storage.set(changes);
+}
+
+async function downgrade() {
+  const changes = {};
+  await storage.remove(['ibmSpeechApiUrl', 'ibmSpeechApiKey']);
+
+  changes.storageVersion = downRevision;
+  return storage.set(changes);
+}
+
+export {message, revision, upgrade, downgrade};
--- a/src/storage/versions/sync/versions.json
+++ b/src/storage/versions/sync/versions.json
@ -1 +1 @@
-{"versions": ["UoT3kGyBH"]}
+{"versions": ["UoT3kGyBH", "ONiJBs00o"]}
--- a/src/utils/data.js
+++ b/src/utils/data.js
@ -1,6 +1,11 @@
 import browser from 'webextension-polyfill';

-const optionKeys = ['speechService', 'googleSpeechApiKey'];
+const optionKeys = [
+  'speechService',
+  'googleSpeechApiKey',
+  'ibmSpeechApiUrl',
+  'ibmSpeechApiKey'
+];

 // https://developers.google.com/recaptcha/docs/language
 // https://cloud.google.com/speech-to-text/docs/languages
@ -77,4 +82,29 @@ const captchaGoogleSpeechApiLangCodes = {
  zu: 'zu-ZA'
 };

-export {optionKeys, captchaGoogleSpeechApiLangCodes};
+// https://cloud.ibm.com/apidocs/speech-to-text#recognize-audio
+const captchaIbmSpeechApiLangCodes = {
+  ar: 'ar-AR_BroadbandModel',
+  'zh-CN': 'zh-CN_BroadbandModel',
+  'zh-TW': 'zh-CN_BroadbandModel',
+  'en-GB': 'en-GB_BroadbandModel',
+  en: 'en-US_BroadbandModel',
+  fr: 'fr-FR_BroadbandModel',
+  'fr-CA': 'fr-FR_BroadbandModel',
+  de: 'de-DE_BroadbandModel',
+  'de-AT': 'de-DE_BroadbandModel',
+  'de-CH': 'de-DE_BroadbandModel',
+  ja: 'ja-JP_BroadbandModel',
+  ko: 'ko-KR_BroadbandModel',
+  pt: 'pt-BR_BroadbandModel',
+  'pt-BR': 'pt-BR_BroadbandModel',
+  'pt-PT': 'pt-BR_BroadbandModel',
+  es: 'es-ES_BroadbandModel',
+  'es-419': 'es-ES_BroadbandModel'
+};
+
+export {
+  optionKeys,
+  captchaGoogleSpeechApiLangCodes,
+  captchaIbmSpeechApiLangCodes
+};