mirror of https://github.com/rhasspy/piper
Basic HTTP server
parent
3d1f4463cf
commit
2fa4c2c139
@ -0,0 +1,27 @@
|
||||
# Piper HTTP Server
|
||||
|
||||
Install the requirements into your virtual environment:
|
||||
|
||||
```sh
|
||||
.venv/bin/pip3 install -r requirements_http.txt
|
||||
```
|
||||
|
||||
Run the web server:
|
||||
|
||||
```sh
|
||||
.venv/bin/python3 -m piper.http_server --model ...
|
||||
```
|
||||
|
||||
See `--help` for more options.
|
||||
|
||||
Using a `GET` request:
|
||||
|
||||
```sh
|
||||
curl -G --data-urlencode 'text=This is a test.' -o test.wav 'localhost:5000'
|
||||
```
|
||||
|
||||
Using a `POST` request:
|
||||
|
||||
```sh
|
||||
curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
|
||||
```
|
@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import io
|
||||
import logging
|
||||
import wave
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from flask import Flask, request
|
||||
|
||||
from . import PiperVoice
|
||||
from .download import ensure_voice_exists, find_voice, get_voices
|
||||
|
||||
_LOGGER = logging.getLogger()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
|
||||
parser.add_argument("--port", type=int, default=5000, help="HTTP server port")
|
||||
#
|
||||
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
|
||||
parser.add_argument("-c", "--config", help="Path to model config file")
|
||||
#
|
||||
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
|
||||
parser.add_argument(
|
||||
"--length-scale", "--length_scale", type=float, help="Phoneme length"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
|
||||
)
|
||||
#
|
||||
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
||||
#
|
||||
parser.add_argument(
|
||||
"--sentence-silence",
|
||||
"--sentence_silence",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Seconds of silence after each sentence",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--data-dir",
|
||||
"--data_dir",
|
||||
action="append",
|
||||
default=[str(Path.cwd())],
|
||||
help="Data directory to check for downloaded models (default: current directory)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--download-dir",
|
||||
"--download_dir",
|
||||
help="Directory to download voices into (default: first data dir)",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--update-voices",
|
||||
action="store_true",
|
||||
help="Download latest voices.json during startup",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--debug", action="store_true", help="Print DEBUG messages to console"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
||||
_LOGGER.debug(args)
|
||||
|
||||
if not args.download_dir:
|
||||
# Download to first data directory by default
|
||||
args.download_dir = args.data_dir[0]
|
||||
|
||||
# Download voice if file doesn't exist
|
||||
model_path = Path(args.model)
|
||||
if not model_path.exists():
|
||||
# Load voice info
|
||||
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
||||
|
||||
# Resolve aliases for backwards compatibility with old voice names
|
||||
aliases_info: Dict[str, Any] = {}
|
||||
for voice_info in voices_info.values():
|
||||
for voice_alias in voice_info.get("aliases", []):
|
||||
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
||||
|
||||
voices_info.update(aliases_info)
|
||||
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
|
||||
args.model, args.config = find_voice(args.model, args.data_dir)
|
||||
|
||||
# Load voice
|
||||
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
|
||||
synthesize_args = {
|
||||
"speaker_id": args.speaker,
|
||||
"length_scale": args.length_scale,
|
||||
"noise_scale": args.noise_scale,
|
||||
"noise_w": args.noise_w,
|
||||
"sentence_silence": args.sentence_silence,
|
||||
}
|
||||
|
||||
# Create web server
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/", methods=["GET", "POST"])
|
||||
def app_synthesize() -> bytes:
|
||||
if request.method == "POST":
|
||||
text = request.data.decode("utf-8")
|
||||
else:
|
||||
text = request.args.get("text", "")
|
||||
|
||||
text = text.strip()
|
||||
if not text:
|
||||
raise ValueError("No text provided")
|
||||
|
||||
_LOGGER.debug("Synthesizing text: %s", text)
|
||||
with io.BytesIO() as wav_io:
|
||||
with wave.open(wav_io, "wb") as wav_file:
|
||||
voice.synthesize(text, wav_file, **synthesize_args)
|
||||
|
||||
return wav_io.getvalue()
|
||||
|
||||
app.run(host=args.host, port=args.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1 @@
|
||||
flask>=3,<4
|
Loading…
Reference in New Issue