-
Notifications
You must be signed in to change notification settings - Fork 15
Replace mimic3 with piper #686
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -229,3 +229,6 @@ doku/* | |
**/workspace_status.json | ||
|
||
.pytest_cache/ | ||
|
||
# tts model | ||
*/bitbots_tts/model/* | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,14 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import subprocess | ||
import time | ||
import io | ||
import traceback | ||
import wave | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import rclpy | ||
import requests | ||
from ament_index_python import get_package_prefix | ||
import sounddevice as sd | ||
from piper import PiperVoice | ||
from rcl_interfaces.msg import Parameter, SetParametersResult | ||
from rclpy.callback_groups import MutuallyExclusiveCallbackGroup | ||
from rclpy.executors import MultiThreadedExecutor | ||
|
@@ -16,6 +17,12 @@ | |
|
||
from bitbots_msgs.msg import Audio | ||
|
||
# Load the Piper voice | ||
bb_tts_dir = Path(__file__).parent.parent / "model" # TODO: check how to get nice relative paths | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean depending on where you wanna locate the The current solutions should also work, but I would then use an absolute path to ensure that it will work when building without symlinks. |
||
model_path = bb_tts_dir / "en_US-lessac-medium.onnx" | ||
config_path = bb_tts_dir / "en_US-lessac-medium.onnx.json" | ||
voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False) | ||
|
||
|
||
def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: bool = True) -> None: | ||
"""Utility method which can be used by other classes to easily publish a message.""" | ||
|
@@ -27,10 +34,35 @@ def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: | |
|
||
|
||
def say(text: str) -> None: | ||
"""Start the shell `say.sh` script to output given text with mimic3. Beware: this is blocking.""" | ||
script_path = os.path.join(get_package_prefix("bitbots_tts"), "lib/bitbots_tts/say.sh") | ||
process = subprocess.Popen((script_path, text)) | ||
process.wait() | ||
"""Use piper for speech synthesis and audio playback. | ||
This is also used for speaking the ip adress during startup.""" | ||
synthesize_args = { | ||
"length_scale": 1.0, # Phoneme length, if lower -> faster | ||
"noise_scale": 0.667, # Generator noise, if lower -> more robotic | ||
"noise_w": 0.8, # Phoneme width noise, if lower -> more robotic | ||
"sentence_silence": 0.1, # seconds of silence after each sentence | ||
} | ||
with io.BytesIO() as buffer: | ||
with wave.open(buffer, "wb") as wav_file: | ||
voice.synthesize(text, wav_file, **synthesize_args) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would mean that we will generate a new Have you tried how long the wav generation takes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is real time on my laptop, but I did not try it on the robot yet. Maybe we should cache them again though... just to be sure |
||
|
||
buffer.seek(0) | ||
with wave.open(buffer, "rb") as wav: | ||
framerate = wav.getframerate() | ||
sampwidth = wav.getsampwidth() | ||
nchannels = wav.getnchannels() | ||
nframes = wav.getnframes() | ||
audio_bytes = wav.readframes(nframes) | ||
|
||
# bytes to np array | ||
dtype_map = {1: np.int8, 2: np.int16, 4: np.int32} | ||
if sampwidth not in dtype_map: | ||
raise ValueError(f"Unsupported sample width: {sampwidth}") | ||
audio = np.frombuffer(audio_bytes, dtype=dtype_map[sampwidth]) | ||
if nchannels > 1: | ||
audio = audio.reshape(-1, nchannels) | ||
|
||
sd.play(audio, samplerate=framerate, blocking=True) | ||
|
||
|
||
class Speaker(Node): | ||
|
@@ -62,17 +94,6 @@ def __init__(self) -> None: | |
# Subscribe to the speak topic | ||
self.create_subscription(Audio, "speak", self.speak_cb, 10, callback_group=MutuallyExclusiveCallbackGroup()) | ||
|
||
# Wait for the mimic server to start | ||
while True: | ||
try: | ||
requests.get("http://localhost:59125") | ||
break | ||
except requests.exceptions.ConnectionError: | ||
# log once per second that the server is not yet available | ||
self.get_logger().info("Waiting for mimic server to start...", throttle_duration_sec=2.0) | ||
time.sleep(0.5) | ||
pass | ||
|
||
# Start processing the queue | ||
self.create_timer(0.1, self.run_speaker, callback_group=MutuallyExclusiveCallbackGroup()) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would suggest
**/bitbots_tts/model/
to ignore the whole directory at any depth (in case we move it at some point).