bit-bots · val-ba · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -229,3 +229,6 @@ doku/*
 **/workspace_status.json
 
 .pytest_cache/
+
+# tts model
+*/bitbots_tts/model/*
diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
@@ -1,13 +1,14 @@
 #!/usr/bin/env python3
 
-import os
-import subprocess
-import time
+import io
 import traceback
+import wave
+from pathlib import Path
 
+import numpy as np
 import rclpy
-import requests
-from ament_index_python import get_package_prefix
+import sounddevice as sd
+from piper import PiperVoice
 from rcl_interfaces.msg import Parameter, SetParametersResult
 from rclpy.callback_groups import MutuallyExclusiveCallbackGroup
 from rclpy.executors import MultiThreadedExecutor
@@ -16,6 +17,12 @@
 
 from bitbots_msgs.msg import Audio
 
+# Load the Piper voice
+bb_tts_dir = Path(__file__).parent.parent / "model"  # TODO: check how to get nice relative paths
+model_path = bb_tts_dir / "en_US-lessac-medium.onnx"
+config_path = bb_tts_dir / "en_US-lessac-medium.onnx.json"
+voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False)
+
 
 def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: bool = True) -> None:
     """Utility method which can be used by other classes to easily publish a message."""
@@ -27,10 +34,35 @@ def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active:
 
 
 def say(text: str) -> None:
-    """Start the shell `say.sh` script to output given text with mimic3. Beware: this is blocking."""
-    script_path = os.path.join(get_package_prefix("bitbots_tts"), "lib/bitbots_tts/say.sh")
-    process = subprocess.Popen((script_path, text))
-    process.wait()
+    """Use piper for speech synthesis and audio playback.
+    This is also used for speaking the ip adress during startup."""
+    synthesize_args = {
+        "length_scale": 1.0,  # Phoneme length, if lower -> faster
+        "noise_scale": 0.667,  # Generator noise, if lower -> more robotic
+        "noise_w": 0.8,  # Phoneme width noise, if lower -> more robotic
+        "sentence_silence": 0.1,  # seconds of silence after each sentence
+    }
+    with io.BytesIO() as buffer:
+        with wave.open(buffer, "wb") as wav_file:
+            voice.synthesize(text, wav_file, **synthesize_args)
+
+        buffer.seek(0)
+        with wave.open(buffer, "rb") as wav:
+            framerate = wav.getframerate()
+            sampwidth = wav.getsampwidth()
+            nchannels = wav.getnchannels()
+            nframes = wav.getnframes()
+            audio_bytes = wav.readframes(nframes)
+
+            # bytes to np array
+            dtype_map = {1: np.int8, 2: np.int16, 4: np.int32}
+            if sampwidth not in dtype_map:
+                raise ValueError(f"Unsupported sample width: {sampwidth}")
+            audio = np.frombuffer(audio_bytes, dtype=dtype_map[sampwidth])
+            if nchannels > 1:
+                audio = audio.reshape(-1, nchannels)
+
+            sd.play(audio, samplerate=framerate, blocking=True)
 
 
 class Speaker(Node):
@@ -62,17 +94,6 @@ def __init__(self) -> None:
         # Subscribe to the speak topic
         self.create_subscription(Audio, "speak", self.speak_cb, 10, callback_group=MutuallyExclusiveCallbackGroup())
 
-        # Wait for the mimic server to start
-        while True:
-            try:
-                requests.get("http://localhost:59125")
-                break
-            except requests.exceptions.ConnectionError:
-                # log once per second that the server is not yet available
-                self.get_logger().info("Waiting for mimic server to start...", throttle_duration_sec=2.0)
-                time.sleep(0.5)
-                pass
-
         # Start processing the queue
         self.create_timer(0.1, self.run_speaker, callback_group=MutuallyExclusiveCallbackGroup())
 

diff --git a/bitbots_misc/bitbots_tts/setup.py b/bitbots_misc/bitbots_tts/setup.py
@@ -12,6 +12,7 @@
         ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
         ("share/" + package_name + "/config", glob.glob("config/*.yaml")),
         ("share/" + package_name + "/launch", glob.glob("launch/*.launch")),
+        ("share/" + package_name + "/model", glob.glob("model/*")),
     ],
     install_requires=[
         "setuptools",