Skip to content

Commit e73997e

Browse files
committed
Wip openvoice
1 parent e49ea01 commit e73997e

File tree

11 files changed

+377
-3
lines changed

11 files changed

+377
-3
lines changed

.github/workflows/test-extra.yml

+22
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,28 @@ jobs:
123123
run: |
124124
make --jobs=5 --output-sync=target -C backend/python/parler-tts
125125
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
126+
127+
tests-openvoice:
128+
runs-on: ubuntu-latest
129+
steps:
130+
- name: Clone
131+
uses: actions/checkout@v4
132+
with:
133+
submodules: true
134+
- name: Dependencies
135+
run: |
136+
sudo apt-get update
137+
sudo apt-get install build-essential ffmpeg
138+
# Install UV
139+
curl -LsSf https://astral.sh/uv/install.sh | sh
140+
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
141+
sudo apt-get install -y libopencv-dev
142+
pip install --user grpcio-tools==1.63.0
143+
144+
- name: Test openvoice
145+
run: |
146+
make --jobs=5 --output-sync=target -C backend/python/openvoice
147+
make --jobs=5 --output-sync=target -C backend/python/openvoice test
126148
127149
tests-transformers-musicgen:
128150
runs-on: ubuntu-latest

Dockerfile

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ ARG TARGETARCH
1212
ARG TARGETVARIANT
1313

1414
ENV DEBIAN_FRONTEND=noninteractive
15-
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
15+
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
1616

1717
ARG GO_TAGS="stablediffusion tinydream tts"
1818

@@ -305,6 +305,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
305305
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
306306
make -C backend/python/vall-e-x \
307307
; fi && \
308+
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
309+
make -C backend/python/openvoice \
310+
; fi && \
308311
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
309312
make -C backend/python/petals \
310313
; fi && \

Makefile

+11-2
Original file line numberDiff line numberDiff line change
@@ -445,10 +445,10 @@ protogen-go-clean:
445445
$(RM) bin/*
446446

447447
.PHONY: protogen-python
448-
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
448+
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
449449

450450
.PHONY: protogen-python-clean
451-
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
451+
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
452452

453453
.PHONY: autogptq-protogen
454454
autogptq-protogen:
@@ -562,6 +562,14 @@ vall-e-x-protogen:
562562
vall-e-x-protogen-clean:
563563
$(MAKE) -C backend/python/vall-e-x protogen-clean
564564

565+
.PHONY: openvoice-protogen
566+
openvoice-protogen:
567+
$(MAKE) -C backend/python/openvoice protogen
568+
569+
.PHONY: openvoice-protogen-clean
570+
openvoice-protogen-clean:
571+
$(MAKE) -C backend/python/openvoice protogen-clean
572+
565573
.PHONY: vllm-protogen
566574
vllm-protogen:
567575
$(MAKE) -C backend/python/vllm protogen
@@ -585,6 +593,7 @@ prepare-extra-conda-environments: protogen-python
585593
$(MAKE) -C backend/python/transformers-musicgen
586594
$(MAKE) -C backend/python/parler-tts
587595
$(MAKE) -C backend/python/vall-e-x
596+
$(MAKE) -C backend/python/openvoice
588597
$(MAKE) -C backend/python/exllama
589598
$(MAKE) -C backend/python/petals
590599
$(MAKE) -C backend/python/exllama2

backend/python/openvoice/Makefile

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
.DEFAULT_GOAL := install
2+
3+
.PHONY: install
4+
install: protogen
5+
bash install.sh
6+
7+
.PHONY: protogen
8+
protogen: backend_pb2_grpc.py backend_pb2.py
9+
10+
.PHONY: protogen-clean
11+
protogen-clean:
12+
$(RM) backend_pb2_grpc.py backend_pb2.py
13+
14+
backend_pb2_grpc.py backend_pb2.py:
15+
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
16+
17+
.PHONY: clean
18+
clean: protogen-clean
19+
rm -rf venv __pycache__
20+
21+
.PHONY: test
22+
test: protogen
23+
@echo "Testing openvoice..."
24+
bash test.sh
25+
@echo "openvoice tested."

backend/python/openvoice/backend.py

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Extra gRPC server for OpenVoice models.
4+
"""
5+
from concurrent import futures
6+
7+
import argparse
8+
import signal
9+
import sys
10+
import os
11+
import torch
12+
from openvoice import se_extractor
13+
from openvoice.api import ToneColorConverter
14+
from melo.api import TTS
15+
16+
import time
17+
import backend_pb2
18+
import backend_pb2_grpc
19+
20+
import grpc
21+
22+
23+
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
24+
25+
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
26+
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
27+
28+
# Implement the BackendServicer class with the service methods
29+
class BackendServicer(backend_pb2_grpc.BackendServicer):
30+
"""
31+
A gRPC servicer for the backend service.
32+
33+
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
34+
"""
35+
def Health(self, request, context):
36+
"""
37+
A gRPC method that returns the health status of the backend service.
38+
39+
Args:
40+
request: A HealthRequest object that contains the request parameters.
41+
context: A grpc.ServicerContext object that provides information about the RPC.
42+
43+
Returns:
44+
A Reply object that contains the health status of the backend service.
45+
"""
46+
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
47+
48+
def LoadModel(self, request, context):
49+
"""
50+
A gRPC method that loads a model into memory.
51+
52+
Args:
53+
request: A LoadModelRequest object that contains the request parameters.
54+
context: A grpc.ServicerContext object that provides information about the RPC.
55+
56+
Returns:
57+
A Result object that contains the result of the LoadModel operation.
58+
"""
59+
model_name = request.Model
60+
try:
61+
62+
self.clonedVoice = False
63+
# Assume directory from request.ModelFile.
64+
# Only if request.LoraAdapter it's not an absolute path
65+
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
66+
# get base path of modelFile
67+
modelFileBase = os.path.dirname(request.ModelFile)
68+
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
69+
if request.AudioPath != "":
70+
self.clonedVoice = True
71+
72+
self.modelpath = request.ModelFile
73+
self.speaker = request.Type
74+
self.ClonedVoicePath = request.AudioPath
75+
76+
ckpt_converter = request.Model+'/converter'
77+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
78+
self.device = device
79+
self.tone_color_converter = None
80+
if self.clonedVoice:
81+
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
82+
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
83+
84+
except Exception as err:
85+
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
86+
87+
return backend_pb2.Result(message="Model loaded successfully", success=True)
88+
89+
def TTS(self, request, context):
90+
model_name = request.model
91+
if model_name == "":
92+
return backend_pb2.Result(success=False, message="request.model is required")
93+
try:
94+
# Speed is adjustable
95+
speed = 1.0
96+
voice = "EN"
97+
if request.voice:
98+
voice = request.voice
99+
model = TTS(language=voice, device=self.device)
100+
speaker_ids = model.hps.data.spk2id
101+
speaker_key = self.speaker
102+
modelpath = self.modelpath
103+
for s in speaker_ids.keys():
104+
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
105+
speaker_id = speaker_ids[speaker_key]
106+
speaker_key = speaker_key.lower().replace('_', '-')
107+
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
108+
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
109+
if self.clonedVoice:
110+
reference_speaker = self.ClonedVoicePath
111+
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
112+
# Run the tone color converter
113+
encode_message = "@MyShell"
114+
self.tone_color_converter.convert(
115+
audio_src_path=request.dst,
116+
src_se=source_se,
117+
tgt_se=target_se,
118+
output_path=request.dst,
119+
message=encode_message)
120+
121+
print("[OpenVoice] TTS generated!", file=sys.stderr)
122+
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
123+
print(request, file=sys.stderr)
124+
except Exception as err:
125+
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
126+
return backend_pb2.Result(success=True)
127+
128+
def serve(address):
129+
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
130+
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
131+
server.add_insecure_port(address)
132+
server.start()
133+
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
134+
135+
# Define the signal handler function
136+
def signal_handler(sig, frame):
137+
print("[OpenVoice] Received termination signal. Shutting down...")
138+
server.stop(0)
139+
sys.exit(0)
140+
141+
# Set the signal handlers for SIGINT and SIGTERM
142+
signal.signal(signal.SIGINT, signal_handler)
143+
signal.signal(signal.SIGTERM, signal_handler)
144+
145+
try:
146+
while True:
147+
time.sleep(_ONE_DAY_IN_SECONDS)
148+
except KeyboardInterrupt:
149+
server.stop(0)
150+
151+
if __name__ == "__main__":
152+
parser = argparse.ArgumentParser(description="Run the gRPC server.")
153+
parser.add_argument(
154+
"--addr", default="localhost:50051", help="The address to bind the server to."
155+
)
156+
args = parser.parse_args()
157+
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
158+
serve(args.addr)

backend/python/openvoice/install.sh

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
set -e
3+
4+
source $(dirname $0)/../common/libbackend.sh
5+
6+
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
7+
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
8+
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
9+
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
10+
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
11+
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
12+
fi
13+
14+
installRequirements
15+
16+
python -m unidic download
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
2+
intel-extension-for-pytorch
3+
torch
4+
optimum[openvino]
5+
grpcio==1.63.0
6+
protobuf
7+
librosa==0.9.1
8+
faster-whisper==0.9.0
9+
pydub==0.25.1
10+
wavmark==0.0.3
11+
numpy==1.22.0
12+
eng_to_ipa==0.0.2
13+
inflect==7.0.0
14+
unidecode==1.3.7
15+
whisper-timestamped==1.14.2
16+
openai
17+
python-dotenv
18+
pypinyin==0.50.0
19+
cn2an==0.5.22
20+
jieba==0.42.1
21+
gradio==3.48.0
22+
langid==1.1.6
23+
git+https://github.com/myshell-ai/MeloTTS.git
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
grpcio==1.63.0
2+
protobuf
3+
librosa==0.9.1
4+
faster-whisper==0.9.0
5+
pydub==0.25.1
6+
wavmark==0.0.3
7+
numpy==1.22.0
8+
eng_to_ipa==0.0.2
9+
inflect==7.0.0
10+
unidecode==1.3.7
11+
whisper-timestamped==1.14.2
12+
openai
13+
python-dotenv
14+
pypinyin==0.50.0
15+
cn2an==0.5.22
16+
jieba==0.42.1
17+
gradio==3.48.0
18+
langid==1.1.6
19+
git+https://github.com/myshell-ai/MeloTTS.git
20+
git+https://github.com/myshell-ai/OpenVoice.git

backend/python/openvoice/run.sh

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
source $(dirname $0)/../common/libbackend.sh
3+
4+
startBackend $@

0 commit comments

Comments
 (0)