Spaces:
Runtime error
Runtime error
File size: 2,514 Bytes
e80214b 53ed5ad e80214b 53ed5ad e80214b 53ed5ad e80214b 53ed5ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
from transformers import pipeline
from TTS.api import TTS
# -----------------------------
# 1. Load Speech-to-Text (Whisper)
# -----------------------------
stt = pipeline(
"automatic-speech-recognition",
model="openai/whisper-large-v3",
device="cpu"
)
# -----------------------------
# 2. Load Translation (M2M100)
# -----------------------------
translator = pipeline(
"translation",
model="facebook/m2m100_418M"
)
# List of supported languages
languages = {
"English": "en",
"French": "fr",
"Kinyarwanda": "rw",
"Swahili": "sw",
"German": "de",
"Spanish": "es",
"Portuguese": "pt",
"Italian": "it",
"Chinese (Mandarin)": "zh",
"Japanese": "ja",
"Korean": "ko",
"Arabic": "ar",
"Russian": "ru",
}
# -----------------------------
# 3. Load Text-to-Speech (XTTSv2)
# -----------------------------
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
# -----------------------------
# MAIN FUNCTION
# -----------------------------
def process_audio(audio, target_lang):
# Step 1: STT
text = stt(audio)["text"]
# Step 2: Translate
lang_code = languages[target_lang]
translation = translator(
text,
forced_bos_token_id=translator.tokenizer.get_lang_id(lang_code)
)[0]["translation_text"]
# Step 3: TTS
output_audio_path = "output.wav"
tts.tts_to_file(
text=translation,
file_path=output_audio_path,
speaker_wav=None,
language=lang_code
)
return text, translation, output_audio_path
# -----------------------------
# 4. Gradio UI
# -----------------------------
with gr.Blocks(css="custom.css") as app:
gr.Markdown("<h1>π Multilingual Voice-to-Voice AI Translator</h1>")
gr.Markdown("Record or upload audio β AI Converts speech β Translates β Speaks output voice.")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="π€ Upload or Record Audio")
lang_input = gr.Dropdown(list(languages.keys()), label="π Choose Target Language")
with gr.Row():
text_out = gr.Textbox(label="π Transcribed Text")
translation_out = gr.Textbox(label="π Translated Text")
audio_out = gr.Audio(label="π AI Generated Voice Output")
submit = gr.Button("π Translate & Convert")
submit.click(
fn=process_audio,
inputs=[audio_input, lang_input],
outputs=[text_out, translation_out, audio_out]
)
app.launch()
|