pull/85/head
Michael Hansen 1 year ago
parent c53f739f5f
commit 28a45a2a46

@ -78,7 +78,7 @@ def synthesize(model, phoneme_ids, speaker_id, sample_rate) -> float:
)
end_time = time.monotonic_ns()
audio_sec = (len(audio) / 2) / sample_rate
audio_sec = len(audio) / sample_rate
infer_sec = (end_time - start_time) / 1e9
rtf = infer_sec / audio_sec

@ -107,7 +107,7 @@ def synthesize(session, phoneme_ids, speaker_id, sample_rate) -> float:
)[0].squeeze()
end_time = time.monotonic_ns()
audio_sec = (len(audio) / 2) / sample_rate
audio_sec = len(audio) / sample_rate
infer_sec = (end_time - start_time) / 1e9
rtf = infer_sec / audio_sec

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
import logging
import json
import time
import statistics
@ -11,6 +12,8 @@ _NOISE_SCALE = 0.667
_LENGTH_SCALE = 1.0
_NOISE_W = 0.8
_LOGGER = logging.getLogger(__name__)
def main() -> None:
parser = argparse.ArgumentParser()
@ -19,6 +22,7 @@ def main() -> None:
)
parser.add_argument("-c", "--config", help="Path to model config file (.json)")
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG)
if not args.config:
args.config = f"{args.model}.json"
@ -81,10 +85,18 @@ def synthesize(model, phoneme_ids, speaker_id, sample_rate) -> float:
)
end_time = time.monotonic_ns()
audio_sec = (len(audio) / 2) / sample_rate
audio_sec = len(audio) / sample_rate
infer_sec = (end_time - start_time) / 1e9
rtf = infer_sec / audio_sec
_LOGGER.debug(
"Real-time factor: %s (infer=%s sec, audio=%s sec)",
rtf,
infer_sec,
audio_sec,
)
return infer_sec / audio_sec
return rtf
if __name__ == "__main__":

Loading…
Cancel
Save