Spaces:
Runtime error
Runtime error
import gradio as gr from transformers import pipeline from TTS.api import TTS # ----------------------------- # 1. Load Speech-to-Text (Whisper) # ----------------------------- stt = pipeline( "automatic-speech-recognition", model="openai/whisper-large-v3", device="cpu" ) # ----------------------------- # 2. Load Translation (M2M100) # ----------------------------- translator = pipeline( "translation", model="facebook/m2m100_418M" ) # List of supported languages languages = { "English": "en", "French": "fr", "Kinyarwanda": "rw", "Swahili": "sw", "German": "de", "Spanish": "es", "Portuguese": "pt", "Italian": "it", "Chinese (Mandarin)": "zh", "Japanese": "ja", "Korean": "ko", "Arabic": "ar", "Russian": "ru", } # ----------------------------- # 3. Load Text-to-Speech (XTTSv2) # ----------------------------- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") # ----------------------------- # MAIN FUNCTION # ----------------------------- def process_audio(audio, target_lang): # Step 1: STT text = stt(audio)["text"] # Step 2: Translate lang_code = languages[target_lang] translation = translator( text, forced_bos_token_id=translator.tokenizer.get_lang_id(lang_code) )[0]["translation_text"] # Step 3: TTS output_audio_path = "output.wav" tts.tts_to_file( text=translation, file_path=output_audio_path, speaker_wav=None, language=lang_code ) return text, translation, output_audio_path # ----------------------------- # 4. Gradio UI # ----------------------------- with gr.Blocks(css="custom.css") as app: gr.Markdown("<h1>π Multilingual Voice-to-Voice AI Translator</h1>") gr.Markdown("Record or upload audio β AI Converts speech β Translates β Speaks output voice.") with gr.Row(): audio_input = gr.Audio(type="filepath", label="π€ Upload or Record Audio") lang_input = gr.Dropdown(list(languages.keys()), label="π Choose Target Language") with gr.Row(): text_out = gr.Textbox(label="π Transcribed Text") translation_out = gr.Textbox(label="π Translated Text") audio_out = gr.Audio(label="π AI Generated Voice Output") submit = gr.Button("π Translate & Convert") submit.click( fn=process_audio, inputs=[audio_input, lang_input], outputs=[text_out, translation_out, audio_out] ) app.launch()
53ed5ad
verified
| import gradio as gr | |
| from transformers import pipeline | |
| from TTS.api import TTS | |
| # ----------------------------- | |
| # 1. Load Speech-to-Text (Whisper) | |
| # ----------------------------- | |
| stt = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-large-v3", | |
| device="cpu" | |
| ) | |
| # ----------------------------- | |
| # 2. Load Translation (M2M100) | |
| # ----------------------------- | |
| translator = pipeline( | |
| "translation", | |
| model="facebook/m2m100_418M" | |
| ) | |
| # List of supported languages | |
| languages = { | |
| "English": "en", | |
| "French": "fr", | |
| "Kinyarwanda": "rw", | |
| "Swahili": "sw", | |
| "German": "de", | |
| "Spanish": "es", | |
| "Portuguese": "pt", | |
| "Italian": "it", | |
| "Chinese (Mandarin)": "zh", | |
| "Japanese": "ja", | |
| "Korean": "ko", | |
| "Arabic": "ar", | |
| "Russian": "ru", | |
| } | |
| # ----------------------------- | |
| # 3. Load Text-to-Speech (XTTSv2) | |
| # ----------------------------- | |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") | |
| # ----------------------------- | |
| # MAIN FUNCTION | |
| # ----------------------------- | |
| def process_audio(audio, target_lang): | |
| # Step 1: STT | |
| text = stt(audio)["text"] | |
| # Step 2: Translate | |
| lang_code = languages[target_lang] | |
| translation = translator( | |
| text, | |
| forced_bos_token_id=translator.tokenizer.get_lang_id(lang_code) | |
| )[0]["translation_text"] | |
| # Step 3: TTS | |
| output_audio_path = "output.wav" | |
| tts.tts_to_file( | |
| text=translation, | |
| file_path=output_audio_path, | |
| speaker_wav=None, | |
| language=lang_code | |
| ) | |
| return text, translation, output_audio_path | |
| # ----------------------------- | |
| # 4. Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks(css="custom.css") as app: | |
| gr.Markdown("<h1>π Multilingual Voice-to-Voice AI Translator</h1>") | |
| gr.Markdown("Record or upload audio β AI Converts speech β Translates β Speaks output voice.") | |
| with gr.Row(): | |
| audio_input = gr.Audio(type="filepath", label="π€ Upload or Record Audio") | |
| lang_input = gr.Dropdown(list(languages.keys()), label="π Choose Target Language") | |
| with gr.Row(): | |
| text_out = gr.Textbox(label="π Transcribed Text") | |
| translation_out = gr.Textbox(label="π Translated Text") | |
| audio_out = gr.Audio(label="π AI Generated Voice Output") | |
| submit = gr.Button("π Translate & Convert") | |
| submit.click( | |
| fn=process_audio, | |
| inputs=[audio_input, lang_input], | |
| outputs=[text_out, translation_out, audio_out] | |
| ) | |
| app.launch() | |