VIATEUR-AI's picture
import gradio as gr from transformers import pipeline from TTS.api import TTS # ----------------------------- # 1. Load Speech-to-Text (Whisper) # ----------------------------- stt = pipeline( "automatic-speech-recognition", model="openai/whisper-large-v3", device="cpu" ) # ----------------------------- # 2. Load Translation (M2M100) # ----------------------------- translator = pipeline( "translation", model="facebook/m2m100_418M" ) # List of supported languages languages = { "English": "en", "French": "fr", "Kinyarwanda": "rw", "Swahili": "sw", "German": "de", "Spanish": "es", "Portuguese": "pt", "Italian": "it", "Chinese (Mandarin)": "zh", "Japanese": "ja", "Korean": "ko", "Arabic": "ar", "Russian": "ru", } # ----------------------------- # 3. Load Text-to-Speech (XTTSv2) # ----------------------------- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") # ----------------------------- # MAIN FUNCTION # ----------------------------- def process_audio(audio, target_lang): # Step 1: STT text = stt(audio)["text"] # Step 2: Translate lang_code = languages[target_lang] translation = translator( text, forced_bos_token_id=translator.tokenizer.get_lang_id(lang_code) )[0]["translation_text"] # Step 3: TTS output_audio_path = "output.wav" tts.tts_to_file( text=translation, file_path=output_audio_path, speaker_wav=None, language=lang_code ) return text, translation, output_audio_path # ----------------------------- # 4. Gradio UI # ----------------------------- with gr.Blocks(css="custom.css") as app: gr.Markdown("<h1>🌍 Multilingual Voice-to-Voice AI Translator</h1>") gr.Markdown("Record or upload audio β†’ AI Converts speech β†’ Translates β†’ Speaks output voice.") with gr.Row(): audio_input = gr.Audio(type="filepath", label="🎀 Upload or Record Audio") lang_input = gr.Dropdown(list(languages.keys()), label="🌐 Choose Target Language") with gr.Row(): text_out = gr.Textbox(label="πŸ“ Transcribed Text") translation_out = gr.Textbox(label="🌍 Translated Text") audio_out = gr.Audio(label="πŸ”Š AI Generated Voice Output") submit = gr.Button("πŸš€ Translate & Convert") submit.click( fn=process_audio, inputs=[audio_input, lang_input], outputs=[text_out, translation_out, audio_out] ) app.launch()
53ed5ad verified
import gradio as gr
from transformers import pipeline
from TTS.api import TTS
# -----------------------------
# 1. Load Speech-to-Text (Whisper)
# -----------------------------
stt = pipeline(
"automatic-speech-recognition",
model="openai/whisper-large-v3",
device="cpu"
)
# -----------------------------
# 2. Load Translation (M2M100)
# -----------------------------
translator = pipeline(
"translation",
model="facebook/m2m100_418M"
)
# List of supported languages
languages = {
"English": "en",
"French": "fr",
"Kinyarwanda": "rw",
"Swahili": "sw",
"German": "de",
"Spanish": "es",
"Portuguese": "pt",
"Italian": "it",
"Chinese (Mandarin)": "zh",
"Japanese": "ja",
"Korean": "ko",
"Arabic": "ar",
"Russian": "ru",
}
# -----------------------------
# 3. Load Text-to-Speech (XTTSv2)
# -----------------------------
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
# -----------------------------
# MAIN FUNCTION
# -----------------------------
def process_audio(audio, target_lang):
# Step 1: STT
text = stt(audio)["text"]
# Step 2: Translate
lang_code = languages[target_lang]
translation = translator(
text,
forced_bos_token_id=translator.tokenizer.get_lang_id(lang_code)
)[0]["translation_text"]
# Step 3: TTS
output_audio_path = "output.wav"
tts.tts_to_file(
text=translation,
file_path=output_audio_path,
speaker_wav=None,
language=lang_code
)
return text, translation, output_audio_path
# -----------------------------
# 4. Gradio UI
# -----------------------------
with gr.Blocks(css="custom.css") as app:
gr.Markdown("<h1>🌍 Multilingual Voice-to-Voice AI Translator</h1>")
gr.Markdown("Record or upload audio β†’ AI Converts speech β†’ Translates β†’ Speaks output voice.")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="🎀 Upload or Record Audio")
lang_input = gr.Dropdown(list(languages.keys()), label="🌐 Choose Target Language")
with gr.Row():
text_out = gr.Textbox(label="πŸ“ Transcribed Text")
translation_out = gr.Textbox(label="🌍 Translated Text")
audio_out = gr.Audio(label="πŸ”Š AI Generated Voice Output")
submit = gr.Button("πŸš€ Translate & Convert")
submit.click(
fn=process_audio,
inputs=[audio_input, lang_input],
outputs=[text_out, translation_out, audio_out]
)
app.launch()