Skip to content

Replace mimic3 with piper #686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,6 @@ doku/*
**/workspace_status.json

.pytest_cache/

# tts model
*/bitbots_tts/model/*
61 changes: 41 additions & 20 deletions bitbots_misc/bitbots_tts/bitbots_tts/tts.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#!/usr/bin/env python3

import os
import subprocess
import time
import io
import traceback
import wave
from pathlib import Path

import numpy as np
import rclpy
import requests
from ament_index_python import get_package_prefix
import sounddevice as sd
from piper import PiperVoice
from rcl_interfaces.msg import Parameter, SetParametersResult
from rclpy.callback_groups import MutuallyExclusiveCallbackGroup
from rclpy.executors import MultiThreadedExecutor
Expand All @@ -16,6 +17,12 @@

from bitbots_msgs.msg import Audio

# Load the Piper voice
bb_tts_dir = Path(__file__).parent.parent / "model" # TODO: check how to get nice relative paths
model_path = bb_tts_dir / "en_US-lessac-medium.onnx"
config_path = bb_tts_dir / "en_US-lessac-medium.onnx.json"
voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False)


def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: bool = True) -> None:
"""Utility method which can be used by other classes to easily publish a message."""
Expand All @@ -27,10 +34,35 @@ def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active:


def say(text: str) -> None:
"""Start the shell `say.sh` script to output given text with mimic3. Beware: this is blocking."""
script_path = os.path.join(get_package_prefix("bitbots_tts"), "lib/bitbots_tts/say.sh")
process = subprocess.Popen((script_path, text))
process.wait()
"""Use piper for speech synthesis and audio playback.
This is also used for speaking the ip adress during startup."""
synthesize_args = {
"length_scale": 1.0, # Phoneme length, if lower -> faster
"noise_scale": 0.667, # Generator noise, if lower -> more robotic
"noise_w": 0.8, # Phoneme width noise, if lower -> more robotic
"sentence_silence": 0.1, # seconds of silence after each sentence
}
with io.BytesIO() as buffer:
with wave.open(buffer, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)

buffer.seek(0)
with wave.open(buffer, "rb") as wav:
framerate = wav.getframerate()
sampwidth = wav.getsampwidth()
nchannels = wav.getnchannels()
nframes = wav.getnframes()
audio_bytes = wav.readframes(nframes)

# bytes to np array
dtype_map = {1: np.int8, 2: np.int16, 4: np.int32}
if sampwidth not in dtype_map:
raise ValueError(f"Unsupported sample width: {sampwidth}")
audio = np.frombuffer(audio_bytes, dtype=dtype_map[sampwidth])
if nchannels > 1:
audio = audio.reshape(-1, nchannels)

sd.play(audio, samplerate=framerate, blocking=True)


class Speaker(Node):
Expand Down Expand Up @@ -62,17 +94,6 @@ def __init__(self) -> None:
# Subscribe to the speak topic
self.create_subscription(Audio, "speak", self.speak_cb, 10, callback_group=MutuallyExclusiveCallbackGroup())

# Wait for the mimic server to start
while True:
try:
requests.get("http://localhost:59125")
break
except requests.exceptions.ConnectionError:
# log once per second that the server is not yet available
self.get_logger().info("Waiting for mimic server to start...", throttle_duration_sec=2.0)
time.sleep(0.5)
pass

# Start processing the queue
self.create_timer(0.1, self.run_speaker, callback_group=MutuallyExclusiveCallbackGroup())

Expand Down
1 change: 1 addition & 0 deletions bitbots_misc/bitbots_tts/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
("share/ament_index/resource_index/packages", ["resource/" + package_name]),
("share/" + package_name + "/config", glob.glob("config/*.yaml")),
("share/" + package_name + "/launch", glob.glob("launch/*.launch")),
("share/" + package_name + "/model", glob.glob("model/*")),
],
install_requires=[
"setuptools",
Expand Down
Loading