Basic HTTP server

5 months ago · 2fa4c2c139
parent 3d1f4463cf
commit 2fa4c2c139
4 changed files with 156 additions and 1 deletions
--- a/src/python_run/README_http.md
+++ b/src/python_run/README_http.md
@ -0,0 +1,27 @@
+# Piper HTTP Server
+
+Install the requirements into your virtual environment:
+
+```sh
+.venv/bin/pip3 install -r requirements_http.txt
+```
+
+Run the web server:
+
+```sh
+.venv/bin/python3 -m piper.http_server --model ...
+```
+
+See `--help` for more options.
+
+Using a `GET` request:
+
+```sh
+curl -G --data-urlencode 'text=This is a test.' -o test.wav 'localhost:5000'
+```
+
+Using a `POST` request:
+
+```sh
+curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
+```
--- a/src/python_run/piper/http_server.py
+++ b/src/python_run/piper/http_server.py
@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+import argparse
+import io
+import logging
+import wave
+from pathlib import Path
+from typing import Any, Dict
+
+from flask import Flask, request
+
+from . import PiperVoice
+from .download import ensure_voice_exists, find_voice, get_voices
+
+_LOGGER = logging.getLogger()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
+    parser.add_argument("--port", type=int, default=5000, help="HTTP server port")
+    #
+    parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
+    parser.add_argument("-c", "--config", help="Path to model config file")
+    #
+    parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
+    parser.add_argument(
+        "--length-scale", "--length_scale", type=float, help="Phoneme length"
+    )
+    parser.add_argument(
+        "--noise-scale", "--noise_scale", type=float, help="Generator noise"
+    )
+    parser.add_argument(
+        "--noise-w", "--noise_w", type=float, help="Phoneme width noise"
+    )
+    #
+    parser.add_argument("--cuda", action="store_true", help="Use GPU")
+    #
+    parser.add_argument(
+        "--sentence-silence",
+        "--sentence_silence",
+        type=float,
+        default=0.0,
+        help="Seconds of silence after each sentence",
+    )
+    #
+    parser.add_argument(
+        "--data-dir",
+        "--data_dir",
+        action="append",
+        default=[str(Path.cwd())],
+        help="Data directory to check for downloaded models (default: current directory)",
+    )
+    parser.add_argument(
+        "--download-dir",
+        "--download_dir",
+        help="Directory to download voices into (default: first data dir)",
+    )
+    #
+    parser.add_argument(
+        "--update-voices",
+        action="store_true",
+        help="Download latest voices.json during startup",
+    )
+    #
+    parser.add_argument(
+        "--debug", action="store_true", help="Print DEBUG messages to console"
+    )
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+    _LOGGER.debug(args)
+
+    if not args.download_dir:
+        # Download to first data directory by default
+        args.download_dir = args.data_dir[0]
+
+    # Download voice if file doesn't exist
+    model_path = Path(args.model)
+    if not model_path.exists():
+        # Load voice info
+        voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
+
+        # Resolve aliases for backwards compatibility with old voice names
+        aliases_info: Dict[str, Any] = {}
+        for voice_info in voices_info.values():
+            for voice_alias in voice_info.get("aliases", []):
+                aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
+
+        voices_info.update(aliases_info)
+        ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
+        args.model, args.config = find_voice(args.model, args.data_dir)
+
+    # Load voice
+    voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
+    synthesize_args = {
+        "speaker_id": args.speaker,
+        "length_scale": args.length_scale,
+        "noise_scale": args.noise_scale,
+        "noise_w": args.noise_w,
+        "sentence_silence": args.sentence_silence,
+    }
+
+    # Create web server
+    app = Flask(__name__)
+
+    @app.route("/", methods=["GET", "POST"])
+    def app_synthesize() -> bytes:
+        if request.method == "POST":
+            text = request.data.decode("utf-8")
+        else:
+            text = request.args.get("text", "")
+
+        text = text.strip()
+        if not text:
+            raise ValueError("No text provided")
+
+        _LOGGER.debug("Synthesizing text: %s", text)
+        with io.BytesIO() as wav_io:
+            with wave.open(wav_io, "wb") as wav_file:
+                voice.synthesize(text, wav_file, **synthesize_args)
+
+            return wav_io.getvalue()
+
+    app.run(host=args.host, port=args.port)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/python_run/requirements_http.txt
+++ b/src/python_run/requirements_http.txt
@ -0,0 +1 @@
+flask>=3,<4
--- a/src/python_run/setup.py
+++ b/src/python_run/setup.py
@ -33,7 +33,7 @@ setup(
        ]
    },
    install_requires=requirements,
-    extras_require={"gpu": ["onnxruntime-gpu>=1.11.0,<2"]},
+    extras_require={"gpu": ["onnxruntime-gpu>=1.11.0,<2"], "http": ["flask>=3,<4"]},
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",