Using patched espeak-ng

1 year ago · cbed19e1b4
parent 5b64824eea
commit cbed19e1b4
10 changed files with 50 additions and 23 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,7 @@ tmp/

 *.py[cod]
 *.egg
+*.egg-info/
 build
 htmlcov

--- a/13
+++ b/13
@ -14,8 +14,8 @@ ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /build

 # Build minimal version of espeak-ng
-ADD lib/espeak-ng-1.51.tar.gz ./
-RUN cd espeak-ng-1.51 && \
+ADD lib/espeak-ng-1.52-patched.tar.gz ./
+RUN cd espeak-ng && \
    ./autogen.sh && \
    ./configure \
        --without-pcaudiolib \
@ -30,9 +30,10 @@ RUN cd espeak-ng-1.51 && \
    make install

 # Copy onnxruntime library
-COPY lib/ ./lib/
-RUN mkdir -p /usr/local/include/onnxruntime && \
-    tar -C /usr/local/include/onnxruntime \
+COPY lib/onnxruntime-linux-*.tgz ./lib/
+RUN export ONNX_DIR="./lib/Linux-$(uname -m)" && \
+    mkdir -p "${ONNX_DIR}" && \
+    tar -C "${ONNX_DIR}" \
        --strip-components 1 \
        -xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz"

@ -49,7 +50,7 @@ WORKDIR /dist
 RUN mkdir -p piper && \
    cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \
    cp -dR /usr/share/espeak-ng-data ./piper/ && \
-    cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./piper/ && \
+    find /build/lib/ -name 'libonnxruntime.so.*' -exec cp -d {} ./piper/ \; && \
    cp /build/build/piper ./piper/ && \
    tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/

--- a/Dockerfile.dockerignore
+++ b/Dockerfile.dockerignore
@ -1,4 +1,5 @@
 *
 !Makefile
 !src/cpp/
-!lib/
+!lib/onnxruntime*.tgz
+!lib/espeak-ng*.tar.gz
--- a/README.md
+++ b/README.md
@ -39,7 +39,9 @@ Download a release:
 * [amd64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz) (desktop Linux)
 * [arm64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_arm64.tar.gz) (Raspberry Pi 4)

-If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1.
+If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Piper depends on a patched `espeak-ng` in [lib](lib).
+
+Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.14.1.


 ## Usage
--- a/lib/.gitignore
+++ b/lib/.gitignore
@ -0,0 +1,2 @@
+espeak-ng/
+Linux-*/
--- a/lib/espeak-ng-1.52-patched.tar.gz
+++ b/lib/espeak-ng-1.52-patched.tar.gz
--- a/src/cpp/config.hpp
+++ b/src/cpp/config.hpp
@ -32,8 +32,10 @@ struct eSpeakConfig {
  // Characters that eSpeak uses to break apart paragraphs/sentences
  set<Phoneme> clauseBreakers{U'.', U'?', U'!', U',', U';', U':'};

-  // Characters that piper will use to split utterances
-  set<Phoneme> sentenceBreakers{U'.', U'?', U'!'};
+  Phoneme fullStop = U'.';
+  Phoneme comma = U',';
+  Phoneme question = U'?';
+  Phoneme exclamation = U'!';
 };

 struct PhonemizeConfig {
--- a/src/cpp/main.cpp
+++ b/src/cpp/main.cpp
@ -53,7 +53,8 @@ int main(int argc, char *argv[]) {
  RunConfig runConfig;
  parseArgs(argc, argv, runConfig);

-  auto exePath = filesystem::path(argv[0]);
+  // NOTE: This won't work for Windows (need GetModuleFileName)
+  auto exePath = filesystem::canonical("/proc/self/exe");
  piper::initialize(exePath.parent_path());

  piper::Voice voice;
--- a/src/cpp/phonemize.hpp
+++ b/src/cpp/phonemize.hpp
@ -15,6 +15,13 @@
 #include "config.hpp"
 #include "utf8.h"

+#define CLAUSE_INTONATION_FULL_STOP   0x00000000
+#define CLAUSE_INTONATION_COMMA       0x00001000
+#define CLAUSE_INTONATION_QUESTION    0x00002000
+#define CLAUSE_INTONATION_EXCLAMATION 0x00003000
+
+#define CLAUSE_TYPE_SENTENCE          0x00080000
+
 using namespace std;

 namespace piper {
@ -54,13 +61,15 @@ void phonemize(string text, PhonemizeConfig &phonemizeConfig,

  vector<Phoneme> *sentencePhonemes = nullptr;
  const char *inputTextPointer = textCopy.c_str();
-  size_t clauseBreakerIndex = 0;
+  int terminator = 0;

  while (inputTextPointer != NULL) {
+    // Modified espeak-ng API to get access to clause terminator
    string clausePhonemes(
-        espeak_TextToPhonemes((const void **)&inputTextPointer,
+        espeak_TextToPhonemes2((const void **)&inputTextPointer,
                              /*textmode*/ espeakCHARS_AUTO,
-                              /*phonememode = IPA*/ 0x02));
+                              /*phonememode = IPA*/ 0x02,
+                               &terminator));

    utf8::iterator phonemeIter(clausePhonemes.begin(), clausePhonemes.begin(),
                               clausePhonemes.end());
@ -74,17 +83,25 @@ void phonemize(string text, PhonemizeConfig &phonemizeConfig,
    }

    sentencePhonemes->insert(sentencePhonemes->end(), phonemeIter, phonemeEnd);
-    if (clauseBreakerIndex < textClauseBreakers.size()) {
-      auto clauseBreaker = textClauseBreakers[clauseBreakerIndex];
-      sentencePhonemes->push_back(clauseBreaker);
-      if (phonemizeConfig.eSpeak->sentenceBreakers.contains(clauseBreaker)) {
+
+    // Add appropriate puntuation depending on terminator type
+    int intonation = terminator & 0x0000F000;
+    if (intonation == CLAUSE_INTONATION_FULL_STOP) {
+      sentencePhonemes->push_back(phonemizeConfig.eSpeak->fullStop);
+    } else if (intonation == CLAUSE_INTONATION_COMMA) {
+      sentencePhonemes->push_back(phonemizeConfig.eSpeak->comma);
+    } else if (intonation == CLAUSE_INTONATION_QUESTION) {
+      sentencePhonemes->push_back(phonemizeConfig.eSpeak->question);
+    } else if (intonation == CLAUSE_INTONATION_EXCLAMATION) {
+      sentencePhonemes->push_back(phonemizeConfig.eSpeak->exclamation);
+    }
+
+    if ((terminator & CLAUSE_TYPE_SENTENCE) == CLAUSE_TYPE_SENTENCE) {
        // End of sentence
        sentencePhonemes = nullptr;
-      }
-
-      clauseBreakerIndex++;
    }
-  }
+
+  }  // while inputTextPointer != NULL

 } /* phonemize */

--- a/src/cpp/piper.hpp
+++ b/src/cpp/piper.hpp
@ -30,7 +30,7 @@ struct Voice {
 void initialize(std::filesystem::path cwd) {
  const char *dataPath = NULL;

-  auto cwdDataPath = cwd.append("espeak-ng-data");
+  auto cwdDataPath = std::filesystem::absolute(cwd.append("espeak-ng-data"));
  if (std::filesystem::is_directory(cwdDataPath)) {
    dataPath = cwdDataPath.c_str();
  }