Using patched espeak-ng

pull/85/head
Michael Hansen 1 year ago
parent 5b64824eea
commit cbed19e1b4

1
.gitignore vendored

@ -5,6 +5,7 @@ tmp/
*.py[cod]
*.egg
*.egg-info/
build
htmlcov

@ -14,8 +14,8 @@ ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /build
# Build minimal version of espeak-ng
ADD lib/espeak-ng-1.51.tar.gz ./
RUN cd espeak-ng-1.51 && \
ADD lib/espeak-ng-1.52-patched.tar.gz ./
RUN cd espeak-ng && \
./autogen.sh && \
./configure \
--without-pcaudiolib \
@ -30,9 +30,10 @@ RUN cd espeak-ng-1.51 && \
make install
# Copy onnxruntime library
COPY lib/ ./lib/
RUN mkdir -p /usr/local/include/onnxruntime && \
tar -C /usr/local/include/onnxruntime \
COPY lib/onnxruntime-linux-*.tgz ./lib/
RUN export ONNX_DIR="./lib/Linux-$(uname -m)" && \
mkdir -p "${ONNX_DIR}" && \
tar -C "${ONNX_DIR}" \
--strip-components 1 \
-xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz"
@ -49,7 +50,7 @@ WORKDIR /dist
RUN mkdir -p piper && \
cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \
cp -dR /usr/share/espeak-ng-data ./piper/ && \
cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./piper/ && \
find /build/lib/ -name 'libonnxruntime.so.*' -exec cp -d {} ./piper/ \; && \
cp /build/build/piper ./piper/ && \
tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/

@ -1,4 +1,5 @@
*
!Makefile
!src/cpp/
!lib/
!lib/onnxruntime*.tgz
!lib/espeak-ng*.tar.gz

@ -39,7 +39,9 @@ Download a release:
* [amd64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz) (desktop Linux)
* [arm64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_arm64.tar.gz) (Raspberry Pi 4)
If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1.
If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Piper depends on a patched `espeak-ng` in [lib](lib).
Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.14.1.
## Usage

2
lib/.gitignore vendored

@ -0,0 +1,2 @@
espeak-ng/
Linux-*/

@ -32,8 +32,10 @@ struct eSpeakConfig {
// Characters that eSpeak uses to break apart paragraphs/sentences
set<Phoneme> clauseBreakers{U'.', U'?', U'!', U',', U';', U':'};
// Characters that piper will use to split utterances
set<Phoneme> sentenceBreakers{U'.', U'?', U'!'};
Phoneme fullStop = U'.';
Phoneme comma = U',';
Phoneme question = U'?';
Phoneme exclamation = U'!';
};
struct PhonemizeConfig {

@ -53,7 +53,8 @@ int main(int argc, char *argv[]) {
RunConfig runConfig;
parseArgs(argc, argv, runConfig);
auto exePath = filesystem::path(argv[0]);
// NOTE: This won't work for Windows (need GetModuleFileName)
auto exePath = filesystem::canonical("/proc/self/exe");
piper::initialize(exePath.parent_path());
piper::Voice voice;

@ -15,6 +15,13 @@
#include "config.hpp"
#include "utf8.h"
#define CLAUSE_INTONATION_FULL_STOP 0x00000000
#define CLAUSE_INTONATION_COMMA 0x00001000
#define CLAUSE_INTONATION_QUESTION 0x00002000
#define CLAUSE_INTONATION_EXCLAMATION 0x00003000
#define CLAUSE_TYPE_SENTENCE 0x00080000
using namespace std;
namespace piper {
@ -54,13 +61,15 @@ void phonemize(string text, PhonemizeConfig &phonemizeConfig,
vector<Phoneme> *sentencePhonemes = nullptr;
const char *inputTextPointer = textCopy.c_str();
size_t clauseBreakerIndex = 0;
int terminator = 0;
while (inputTextPointer != NULL) {
// Modified espeak-ng API to get access to clause terminator
string clausePhonemes(
espeak_TextToPhonemes((const void **)&inputTextPointer,
espeak_TextToPhonemes2((const void **)&inputTextPointer,
/*textmode*/ espeakCHARS_AUTO,
/*phonememode = IPA*/ 0x02));
/*phonememode = IPA*/ 0x02,
&terminator));
utf8::iterator phonemeIter(clausePhonemes.begin(), clausePhonemes.begin(),
clausePhonemes.end());
@ -74,17 +83,25 @@ void phonemize(string text, PhonemizeConfig &phonemizeConfig,
}
sentencePhonemes->insert(sentencePhonemes->end(), phonemeIter, phonemeEnd);
if (clauseBreakerIndex < textClauseBreakers.size()) {
auto clauseBreaker = textClauseBreakers[clauseBreakerIndex];
sentencePhonemes->push_back(clauseBreaker);
if (phonemizeConfig.eSpeak->sentenceBreakers.contains(clauseBreaker)) {
// Add appropriate puntuation depending on terminator type
int intonation = terminator & 0x0000F000;
if (intonation == CLAUSE_INTONATION_FULL_STOP) {
sentencePhonemes->push_back(phonemizeConfig.eSpeak->fullStop);
} else if (intonation == CLAUSE_INTONATION_COMMA) {
sentencePhonemes->push_back(phonemizeConfig.eSpeak->comma);
} else if (intonation == CLAUSE_INTONATION_QUESTION) {
sentencePhonemes->push_back(phonemizeConfig.eSpeak->question);
} else if (intonation == CLAUSE_INTONATION_EXCLAMATION) {
sentencePhonemes->push_back(phonemizeConfig.eSpeak->exclamation);
}
if ((terminator & CLAUSE_TYPE_SENTENCE) == CLAUSE_TYPE_SENTENCE) {
// End of sentence
sentencePhonemes = nullptr;
}
clauseBreakerIndex++;
}
}
} // while inputTextPointer != NULL
} /* phonemize */

@ -30,7 +30,7 @@ struct Voice {
void initialize(std::filesystem::path cwd) {
const char *dataPath = NULL;
auto cwdDataPath = cwd.append("espeak-ng-data");
auto cwdDataPath = std::filesystem::absolute(cwd.append("espeak-ng-data"));
if (std::filesystem::is_directory(cwdDataPath)) {
dataPath = cwdDataPath.c_str();
}

Loading…
Cancel
Save