-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
115 lines (88 loc) · 3.52 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
import whisper
from translate import Translator
from dotenv import dotenv_values
from elevenlabs.client import ElevenLabs
from elevenlabs import VoiceSettings
# Configuración .env
config = dotenv_values(".env")
ELEVENLABS_API_KEY = config["ELEVENLABS_API_KEY"]
def translator(audio_file):
# 1. Transcribir texto
# Usamos Whisper: https://github.com/openai/whisper
# Alternativa API online: https://www.assemblyai.com
try:
model = whisper.load_model("base")
result = model.transcribe(audio_file, language="Spanish", fp16=False)
transcription = result["text"]
except Exception as e:
raise gr.Error(
f"Se ha producido un error transcribiendo el texto: {str(e)}")
print(f"Texto original: {transcription}")
# 2. Traducir texto
# Usamos Translate: https://github.com/terryyin/translate-python
try:
en_transcription = Translator(
from_lang="es", to_lang="en").translate(transcription)
it_transcription = Translator(
from_lang="es", to_lang="it").translate(transcription)
fr_transcription = Translator(
from_lang="es", to_lang="fr").translate(transcription)
ja_transcription = Translator(
from_lang="es", to_lang="ja").translate(transcription)
except Exception as e:
raise gr.Error(
f"Se ha producido un error traduciendo el texto: {str(e)}")
print(f"Texto traducido a Inglés: {en_transcription}")
print(f"Texto traducido a Italiano: {it_transcription}")
print(f"Texto traducido a Francés: {fr_transcription}")
print(f"Texto traducido a Japonés: {ja_transcription}")
# 3. Generar audio traducido
# Usamos Elevenlabs IO: https://elevenlabs.io/docs/api-reference/getting-started
en_save_file_path = text_to_speach(en_transcription, "en")
it_save_file_path = text_to_speach(it_transcription, "it")
fr_save_file_path = text_to_speach(fr_transcription, "fr")
ja_save_file_path = text_to_speach(ja_transcription, "ja")
return en_save_file_path, it_save_file_path, fr_save_file_path, ja_save_file_path
def text_to_speach(text: str, language: str) -> str:
try:
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
response = client.text_to_speech.convert(
voice_id="pNInz6obpgDQGcFmaJgB", # Adam
optimize_streaming_latency="0",
output_format="mp3_22050_32",
text=text,
model_id="eleven_turbo_v2",
voice_settings=VoiceSettings(
stability=0.0,
similarity_boost=0.0,
style=0.0,
use_speaker_boost=True,
),
)
save_file_path = f"audios/{language}.mp3"
with open(save_file_path, "wb") as f:
for chunk in response:
if chunk:
f.write(chunk)
except Exception as e:
raise gr.Error(
f"Se ha producido un error creando el audio: {str(e)}")
return save_file_path
web = gr.Interface(
fn=translator,
inputs=gr.Audio(
sources=["microphone"],
type="filepath",
label="Español"
),
outputs=[
gr.Audio(label="Inglés"),
gr.Audio(label="Italiano"),
gr.Audio(label="Francés"),
gr.Audio(label="Japonés")
],
title="Traductor de voz",
description="Traductor de voz con IA a varios idiomas"
)
web.launch()