diff --git a/.gitignore b/.gitignore index 3d75bbd0c..f6d9742f8 100644 --- a/.gitignore +++ b/.gitignore @@ -229,3 +229,6 @@ doku/* **/workspace_status.json .pytest_cache/ + +# tts model +*/bitbots_tts/model/* diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py index 8a8afd0c5..7b58ca46e 100755 --- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py +++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 -import os -import subprocess -import time +import io import traceback +import wave +from pathlib import Path +import numpy as np import rclpy -import requests -from ament_index_python import get_package_prefix +import sounddevice as sd +from piper import PiperVoice from rcl_interfaces.msg import Parameter, SetParametersResult from rclpy.callback_groups import MutuallyExclusiveCallbackGroup from rclpy.executors import MultiThreadedExecutor @@ -16,6 +17,12 @@ from bitbots_msgs.msg import Audio +# Load the Piper voice +bb_tts_dir = Path(__file__).parent.parent / "model" # TODO: check how to get nice relative paths +model_path = bb_tts_dir / "en_US-lessac-medium.onnx" +config_path = bb_tts_dir / "en_US-lessac-medium.onnx.json" +voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False) + def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: bool = True) -> None: """Utility method which can be used by other classes to easily publish a message.""" @@ -27,10 +34,35 @@ def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: def say(text: str) -> None: - """Start the shell `say.sh` script to output given text with mimic3. Beware: this is blocking.""" - script_path = os.path.join(get_package_prefix("bitbots_tts"), "lib/bitbots_tts/say.sh") - process = subprocess.Popen((script_path, text)) - process.wait() + """Use piper for speech synthesis and audio playback. + This is also used for speaking the ip adress during startup.""" + synthesize_args = { + "length_scale": 1.0, # Phoneme length, if lower -> faster + "noise_scale": 0.667, # Generator noise, if lower -> more robotic + "noise_w": 0.8, # Phoneme width noise, if lower -> more robotic + "sentence_silence": 0.1, # seconds of silence after each sentence + } + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wav_file: + voice.synthesize(text, wav_file, **synthesize_args) + + buffer.seek(0) + with wave.open(buffer, "rb") as wav: + framerate = wav.getframerate() + sampwidth = wav.getsampwidth() + nchannels = wav.getnchannels() + nframes = wav.getnframes() + audio_bytes = wav.readframes(nframes) + + # bytes to np array + dtype_map = {1: np.int8, 2: np.int16, 4: np.int32} + if sampwidth not in dtype_map: + raise ValueError(f"Unsupported sample width: {sampwidth}") + audio = np.frombuffer(audio_bytes, dtype=dtype_map[sampwidth]) + if nchannels > 1: + audio = audio.reshape(-1, nchannels) + + sd.play(audio, samplerate=framerate, blocking=True) class Speaker(Node): @@ -62,17 +94,6 @@ def __init__(self) -> None: # Subscribe to the speak topic self.create_subscription(Audio, "speak", self.speak_cb, 10, callback_group=MutuallyExclusiveCallbackGroup()) - # Wait for the mimic server to start - while True: - try: - requests.get("http://localhost:59125") - break - except requests.exceptions.ConnectionError: - # log once per second that the server is not yet available - self.get_logger().info("Waiting for mimic server to start...", throttle_duration_sec=2.0) - time.sleep(0.5) - pass - # Start processing the queue self.create_timer(0.1, self.run_speaker, callback_group=MutuallyExclusiveCallbackGroup()) diff --git a/bitbots_misc/bitbots_tts/setup.py b/bitbots_misc/bitbots_tts/setup.py index 29cedacd8..fc7b93835 100644 --- a/bitbots_misc/bitbots_tts/setup.py +++ b/bitbots_misc/bitbots_tts/setup.py @@ -12,6 +12,7 @@ ("share/ament_index/resource_index/packages", ["resource/" + package_name]), ("share/" + package_name + "/config", glob.glob("config/*.yaml")), ("share/" + package_name + "/launch", glob.glob("launch/*.launch")), + ("share/" + package_name + "/model", glob.glob("model/*")), ], install_requires=[ "setuptools",