mirror of https://github.com/rhasspy/piper
Starting on Python inference
parent
0ddfb082be
commit
b21d815298
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Directory of *this* script
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# Base directory of repo
|
||||
base_dir="$(realpath "${this_dir}/..")"
|
||||
|
||||
# Path to virtual environment
|
||||
: "${venv:=${base_dir}/.venv}"
|
||||
|
||||
# Python binary to use
|
||||
: "${PYTHON=python3}"
|
||||
|
||||
python_version="$(${PYTHON} --version)"
|
||||
|
||||
# Create virtual environment
|
||||
echo "Creating virtual environment at ${venv} (${python_version})"
|
||||
rm -rf "${venv}"
|
||||
"${PYTHON}" -m venv "${venv}"
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
# Install Python dependencies
|
||||
echo 'Installing Python dependencies'
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade wheel setuptools
|
||||
|
||||
pip3 install -r "${base_dir}/requirements.txt"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "OK"
|
@ -0,0 +1,6 @@
|
||||
[settings]
|
||||
multi_line_output=3
|
||||
include_trailing_comma=True
|
||||
force_grid_wrap=0
|
||||
use_parentheses=True
|
||||
line_length=88
|
@ -0,0 +1,134 @@
|
||||
import io
|
||||
import json
|
||||
import wave
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Mapping, Optional, Sequence, Union
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime
|
||||
from espeak_phonemizer import Phonemizer
|
||||
|
||||
_BOS = "^"
|
||||
_EOS = "$"
|
||||
_PAD = "_"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LarynxConfig:
|
||||
num_symbols: int
|
||||
num_speakers: int
|
||||
sample_rate: int
|
||||
espeak_voice: str
|
||||
length_scale: float
|
||||
noise_scale: float
|
||||
noise_w: float
|
||||
phoneme_id_map: Mapping[str, Sequence[int]]
|
||||
|
||||
|
||||
class Larynx:
|
||||
def __init__(
|
||||
self,
|
||||
model_path: Union[str, Path],
|
||||
config_path: Optional[Union[str, Path]] = None,
|
||||
use_cuda: bool = False,
|
||||
):
|
||||
if config_path is None:
|
||||
config_path = f"{model_path}.json"
|
||||
|
||||
self.config = load_config(config_path)
|
||||
self.phonemizer = Phonemizer(self.config.espeak_voice)
|
||||
self.model = onnxruntime.InferenceSession(
|
||||
str(model_path),
|
||||
sess_options=onnxruntime.SessionOptions(),
|
||||
providers=None if not use_cuda else ["CUDAExecutionProvider"],
|
||||
)
|
||||
|
||||
def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
speaker_id: Optional[int] = None,
|
||||
length_scale: Optional[float] = None,
|
||||
noise_scale: Optional[float] = None,
|
||||
noise_w: Optional[float] = None,
|
||||
) -> bytes:
|
||||
"""Synthesize WAV audio from text."""
|
||||
if length_scale is None:
|
||||
length_scale = self.config.length_scale
|
||||
|
||||
if noise_scale is None:
|
||||
noise_scale = self.config.noise_scale
|
||||
|
||||
if noise_w is None:
|
||||
noise_w = self.config.noise_w
|
||||
|
||||
phonemes_str = self.phonemizer.phonemize(text)
|
||||
phonemes = [_BOS] + list(phonemes_str)
|
||||
phoneme_ids: List[int] = []
|
||||
|
||||
for phoneme in phonemes:
|
||||
phoneme_ids.extend(self.config.phoneme_id_map[phoneme])
|
||||
phoneme_ids.extend(self.config.phoneme_id_map[_PAD])
|
||||
|
||||
phoneme_ids.extend(self.config.phoneme_id_map[_EOS])
|
||||
|
||||
phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
|
||||
phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64)
|
||||
scales = np.array(
|
||||
[noise_scale, length_scale, noise_w],
|
||||
dtype=np.float32,
|
||||
)
|
||||
sid = None
|
||||
|
||||
if speaker_id is not None:
|
||||
sid = np.array([speaker_id], dtype=np.int64)
|
||||
|
||||
# Synthesize through Onnx
|
||||
audio = self.model.run(
|
||||
None,
|
||||
{
|
||||
"input": phoneme_ids_array,
|
||||
"input_lengths": phoneme_ids_lengths,
|
||||
"scales": scales,
|
||||
"sid": sid,
|
||||
},
|
||||
)[0].squeeze((0, 1))
|
||||
audio = audio_float_to_int16(audio.squeeze())
|
||||
|
||||
# Convert to WAV
|
||||
with io.BytesIO() as wav_io:
|
||||
wav_file: wave.Wave_write = wave.open(wav_io, "wb")
|
||||
with wav_file:
|
||||
wav_file.setframerate(self.config.sample_rate)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.writeframes(audio.tobytes())
|
||||
|
||||
return wav_io.getvalue()
|
||||
|
||||
|
||||
def load_config(config_path: Union[str, Path]) -> LarynxConfig:
|
||||
with open(config_path, "r", encoding="utf-8") as config_file:
|
||||
config_dict = json.load(config_file)
|
||||
inference = config_dict.get("inference", {})
|
||||
|
||||
return LarynxConfig(
|
||||
num_symbols=config_dict["num_symbols"],
|
||||
num_speakers=config_dict["num_speakers"],
|
||||
sample_rate=config_dict["audio"]["sample_rate"],
|
||||
espeak_voice=config_dict["espeak"]["voice"],
|
||||
noise_scale=inference.get("noise_scale", 0.667),
|
||||
length_scale=inference.get("length_scale", 1.0),
|
||||
noise_w=inference.get("noise_w", 0.8),
|
||||
phoneme_id_map=config_dict["phoneme_id_map"],
|
||||
)
|
||||
|
||||
|
||||
def audio_float_to_int16(
|
||||
audio: np.ndarray, max_wav_value: float = 32767.0
|
||||
) -> np.ndarray:
|
||||
"""Normalize audio and convert to int16 range"""
|
||||
audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
|
||||
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
|
||||
audio_norm = audio_norm.astype("int16")
|
||||
return audio_norm
|
@ -0,0 +1,19 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from . import Larynx
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", help="Path to Onnx model file")
|
||||
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
||||
args = parser.parse_args()
|
||||
|
||||
voice = Larynx(args.model, use_cuda=args.cuda)
|
||||
wav_bytes = voice.synthesize(sys.stdin.read())
|
||||
sys.stdout.buffer.write(wav_bytes)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,4 @@
|
||||
[mypy]
|
||||
|
||||
[mypy-onnxruntime.*]
|
||||
ignore_missing_imports = True
|
@ -0,0 +1,40 @@
|
||||
[MESSAGES CONTROL]
|
||||
disable=
|
||||
format,
|
||||
abstract-class-little-used,
|
||||
abstract-method,
|
||||
cyclic-import,
|
||||
duplicate-code,
|
||||
global-statement,
|
||||
import-outside-toplevel,
|
||||
inconsistent-return-statements,
|
||||
locally-disabled,
|
||||
not-context-manager,
|
||||
redefined-variable-type,
|
||||
too-few-public-methods,
|
||||
too-many-arguments,
|
||||
too-many-branches,
|
||||
too-many-instance-attributes,
|
||||
too-many-lines,
|
||||
too-many-locals,
|
||||
too-many-public-methods,
|
||||
too-many-return-statements,
|
||||
too-many-statements,
|
||||
too-many-boolean-expressions,
|
||||
unnecessary-pass,
|
||||
unused-argument,
|
||||
broad-except,
|
||||
too-many-nested-blocks,
|
||||
invalid-name,
|
||||
unused-import,
|
||||
no-self-use,
|
||||
fixme,
|
||||
useless-super-delegation,
|
||||
missing-module-docstring,
|
||||
missing-class-docstring,
|
||||
missing-function-docstring,
|
||||
import-error,
|
||||
relative-beyond-top-level
|
||||
|
||||
[FORMAT]
|
||||
expected-line-ending-format=LF
|
@ -0,0 +1,2 @@
|
||||
espeak-phonemizer>=1.1.0,<2
|
||||
onnxruntime~=1.11.0
|
@ -0,0 +1,7 @@
|
||||
black==22.3.0
|
||||
coverage==5.0.4
|
||||
flake8==3.7.9
|
||||
mypy==0.910
|
||||
pylint==2.10.2
|
||||
pytest==5.4.1
|
||||
pytest-cov==2.8.1
|
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Runs formatters, linters, and type checkers on Python code.
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Directory of *this* script
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
base_dir="$(realpath "${this_dir}/..")"
|
||||
|
||||
# Path to virtual environment
|
||||
: "${venv:=${base_dir}/.venv}"
|
||||
|
||||
if [ -d "${venv}" ]; then
|
||||
# Activate virtual environment if available
|
||||
source "${venv}/bin/activate"
|
||||
fi
|
||||
|
||||
python_files=("${base_dir}/larynx")
|
||||
|
||||
# Format code
|
||||
black "${python_files[@]}"
|
||||
isort "${python_files[@]}"
|
||||
|
||||
# Check
|
||||
flake8 "${python_files[@]}"
|
||||
pylint "${python_files[@]}"
|
||||
mypy "${python_files[@]}"
|
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Directory of *this* script
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
base_dir="$(realpath "${this_dir}/..")"
|
||||
|
||||
# Path to virtual environment
|
||||
: "${venv:=${base_dir}/.venv}"
|
||||
|
||||
if [ -d "${venv}" ]; then
|
||||
# Activate virtual environment if available
|
||||
source "${venv}/bin/activate"
|
||||
fi
|
||||
|
||||
python3 -m larynx "$@"
|
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Directory of *this* script
|
||||
this_dir="$( cd "$( dirname "$0" )" && pwd )"
|
||||
|
||||
# Base directory of repo
|
||||
base_dir="$(realpath "${this_dir}/..")"
|
||||
|
||||
# Path to virtual environment
|
||||
: "${venv:=${base_dir}/.venv}"
|
||||
|
||||
# Python binary to use
|
||||
: "${PYTHON=python3}"
|
||||
|
||||
python_version="$(${PYTHON} --version)"
|
||||
|
||||
# Create virtual environment
|
||||
echo "Creating virtual environment at ${venv} (${python_version})"
|
||||
rm -rf "${venv}"
|
||||
"${PYTHON}" -m venv "${venv}"
|
||||
source "${venv}/bin/activate"
|
||||
|
||||
# Install Python dependencies
|
||||
echo 'Installing Python dependencies'
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade wheel setuptools
|
||||
|
||||
pip3 install -r "${base_dir}/requirements.txt"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
echo "OK"
|
@ -0,0 +1,22 @@
|
||||
[flake8]
|
||||
# To work with Black
|
||||
max-line-length = 88
|
||||
# E501: line too long
|
||||
# W503: Line break occurred before a binary operator
|
||||
# E203: Whitespace before ':'
|
||||
# D202 No blank lines allowed after function docstring
|
||||
# W504 line break after binary operator
|
||||
ignore =
|
||||
E501,
|
||||
W503,
|
||||
E203,
|
||||
D202,
|
||||
W504
|
||||
|
||||
[isort]
|
||||
multi_line_output = 3
|
||||
include_trailing_comma=True
|
||||
force_grid_wrap=0
|
||||
use_parentheses=True
|
||||
line_length=88
|
||||
indent = " "
|
Loading…
Reference in New Issue