Mgolo commited on
Commit
d333245
Β·
verified Β·
1 Parent(s): 80c5d50

Rename app (2).py to app.py

Browse files
Files changed (1) hide show
  1. app (2).py β†’ app.py +42 -72
app (2).py β†’ app.py RENAMED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
3
  import torch
4
  import unicodedata
@@ -6,7 +6,6 @@ import re
6
  import whisper
7
  import tempfile
8
  import os
9
-
10
  import nltk
11
  nltk.download('punkt')
12
  from nltk.tokenize import sent_tokenize
@@ -18,18 +17,15 @@ from bs4 import BeautifulSoup
18
  import markdown2
19
  import chardet
20
 
21
-
22
  # --- Device selection ---
23
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
24
 
25
  # --- Load translation models ---
26
- @st.cache_resource
27
  def load_models():
28
  en_dar_model_path = "/LocaleNLP/English_Hausa"
29
  en_wol_model_path = "/LocaleNLP/eng_wolof"
30
  en_hau_model_path = "/LocaleNLP/English_Darija"
31
 
32
-
33
  en_dar_model = AutoModelForSeq2SeqLM.from_pretrained(en_dar_model_path).to(device)
34
  en_dar_tokenizer = MarianTokenizer.from_pretrained(en_dar_model_path)
35
 
@@ -45,7 +41,6 @@ def load_models():
45
 
46
  return en_dar_translator, en_hau_translator, en_wol_translator
47
 
48
- @st.cache_resource
49
  def load_whisper_model():
50
  return whisper.load_model("base")
51
 
@@ -54,7 +49,7 @@ def transcribe_audio(audio_path):
54
  return whisper_model.transcribe(audio_path)["text"]
55
 
56
  def translate(text, target_lang):
57
- en_bm_translator, en_dar_translator, en_hau_translator, en_wol_translator = load_models()
58
 
59
  if target_lang == "Darija (Morocco)":
60
  translator = en_dar_translator
@@ -66,7 +61,6 @@ def translate(text, target_lang):
66
  raise ValueError("Unsupported target language")
67
 
68
  lang_tag = {
69
-
70
  "Darija (Morocco)": ">>dar<<",
71
  "Hausa (Nigeria)": ">>hau<<",
72
  "Wolof (Senegal)": ">>wol<<"
@@ -124,70 +118,46 @@ def extract_text_from_file(uploaded_file):
124
  else:
125
  raise ValueError("Unsupported file type")
126
 
127
- # --- Main Streamlit App ---
128
- def main():
129
- st.set_page_config(page_title="LocaleNLP Translator", layout="wide", initial_sidebar_state="expanded")
130
-
131
- with st.sidebar:
132
- st.image("localenpl5.jpeg", use_container_width=True)
133
- st.markdown("""
134
- <h3 style='text-align: left; color: #4B8BBE;'>🌐 Models Overview</h3>
135
- At <b>LocaleNLP</b>, we develop AI-powered NLP tools for low-resource languages across Africa and Asia.
136
- πŸ“Œ This application showcases translation using custom MarianMT models trained on bilingual datasets in English and Bambara, Darija, Hausa, and Wolof.
137
- """, unsafe_allow_html=True)
138
-
139
- st.markdown("""<h4 style='text-align: center; color: #306998;'>Translate between English, Darija, Hausa and Wolof</h4>""", unsafe_allow_html=True)
140
-
141
- col1, col2 = st.columns(2)
142
- with col1:
143
- input_mode = st.selectbox("Select input mode:", ("Text", "Audio", "File"))
144
- st.markdown("""<hr>""", unsafe_allow_html=True)
145
- with col2:
146
- target_lang = st.selectbox("Select target language:", ("Darija (Morocco)", "Hausa (Nigeria)", "Wolof (Senegal)"))
147
- st.markdown("""<hr>""", unsafe_allow_html=True)
148
-
149
- col3, col4 = st.columns(2)
150
- with col3:
151
- input_text = ""
152
- if input_mode == "Text":
153
- input_text = st.text_area("✏️ Enter English text:", height=250)
154
- elif input_mode == "Audio":
155
- audio_file = st.file_uploader("πŸ”Š Upload audio (.wav, .mp3, .m4a)", type=["wav", "mp3", "m4a"])
156
- if audio_file:
157
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{audio_file.type.split('/')[-1]}") as tmp:
158
- tmp.write(audio_file.read())
159
- tmp_path = tmp.name
160
- with st.spinner("Transcribing..."):
161
- input_text = transcribe_audio(tmp_path)
162
- os.remove(tmp_path)
163
- st.text_area("πŸ“ Transcribed Text:", value=input_text, height=150)
164
- elif input_mode == "File":
165
- uploaded_file = st.file_uploader("πŸ“„ Upload document (PDF, Word, HTML, Markdown, SRT)", type=["pdf", "docx", "html", "htm", "md", "srt", "txt"])
166
- if uploaded_file:
167
- try:
168
- input_text = extract_text_from_file(uploaded_file)
169
- st.text_area("πŸ“ƒ Extracted Text:", value=input_text, height=200)
170
- except Exception as e:
171
- st.error(f"Error extracting text: {str(e)}")
172
-
173
- with col4:
174
- if input_text:
175
- with st.spinner("Translating..."):
176
- translated_text = translate(input_text, target_lang)
177
- st.write("Output in "+ target_lang+ " language")
178
- st.success(translated_text)
179
-
180
- # Optional download
181
- st.download_button(
182
- label="πŸ’Ύ Download Translation",
183
- data=translated_text,
184
- file_name=f"translated_{target_lang.replace(' ', '_').lower()}.txt",
185
- mime="text/plain"
186
- )
187
- else:
188
- st.info("Translation will appear here.")
189
 
190
- st.markdown("""<hr><div style='text-align: center; color: #4B8BBE; font-size: 0.9rem'>LocaleNLP Β© 2025 β€’ Empowering communities through AI and language</div>""", unsafe_allow_html=True)
 
191
 
192
  if __name__ == "__main__":
193
- main()
 
1
+ import gradio as gr
2
  from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
3
  import torch
4
  import unicodedata
 
6
  import whisper
7
  import tempfile
8
  import os
 
9
  import nltk
10
  nltk.download('punkt')
11
  from nltk.tokenize import sent_tokenize
 
17
  import markdown2
18
  import chardet
19
 
 
20
  # --- Device selection ---
21
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
22
 
23
  # --- Load translation models ---
 
24
  def load_models():
25
  en_dar_model_path = "/LocaleNLP/English_Hausa"
26
  en_wol_model_path = "/LocaleNLP/eng_wolof"
27
  en_hau_model_path = "/LocaleNLP/English_Darija"
28
 
 
29
  en_dar_model = AutoModelForSeq2SeqLM.from_pretrained(en_dar_model_path).to(device)
30
  en_dar_tokenizer = MarianTokenizer.from_pretrained(en_dar_model_path)
31
 
 
41
 
42
  return en_dar_translator, en_hau_translator, en_wol_translator
43
 
 
44
  def load_whisper_model():
45
  return whisper.load_model("base")
46
 
 
49
  return whisper_model.transcribe(audio_path)["text"]
50
 
51
  def translate(text, target_lang):
52
+ en_dar_translator, en_hau_translator, en_wol_translator = load_models()
53
 
54
  if target_lang == "Darija (Morocco)":
55
  translator = en_dar_translator
 
61
  raise ValueError("Unsupported target language")
62
 
63
  lang_tag = {
 
64
  "Darija (Morocco)": ">>dar<<",
65
  "Hausa (Nigeria)": ">>hau<<",
66
  "Wolof (Senegal)": ">>wol<<"
 
118
  else:
119
  raise ValueError("Unsupported file type")
120
 
121
+ # --- Main Gradio Function ---
122
+ def process(input_mode, target_lang, text_input, audio_input, file_input):
123
+ input_text = ""
124
+
125
+ if input_mode == "Text" and text_input:
126
+ input_text = text_input
127
+ elif input_mode == "Audio" and audio_input:
128
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
129
+ tmp.write(audio_input.read())
130
+ tmp_path = tmp.name
131
+ input_text = transcribe_audio(tmp_path)
132
+ os.remove(tmp_path)
133
+ elif input_mode == "File" and file_input:
134
+ input_text = extract_text_from_file(file_input)
135
+
136
+ if not input_text.strip():
137
+ return "", "No input text provided."
138
+
139
+ translated_text = translate(input_text, target_lang)
140
+ return input_text, translated_text
141
+
142
+ # --- Gradio Interface ---
143
+ with gr.Blocks() as demo:
144
+ gr.Markdown("## 🌐 LocaleNLP Translator β€” English ↔ Darija / Hausa / Wolof")
145
+
146
+ with gr.Row():
147
+ input_mode = gr.Dropdown(["Text", "Audio", "File"], label="Select input mode")
148
+ target_lang = gr.Dropdown(["Darija (Morocco)", "Hausa (Nigeria)", "Wolof (Senegal)"], label="Select target language")
149
+
150
+ with gr.Row():
151
+ text_input = gr.Textbox(label="Enter English text", lines=10)
152
+ audio_input = gr.Audio(type="file", label="Upload Audio")
153
+ file_input = gr.File(label="Upload Document")
154
+
155
+ with gr.Row():
156
+ extracted_text = gr.Textbox(label="Extracted / Transcribed Text", lines=10)
157
+ translated_output = gr.Textbox(label="Translated Text", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ run_btn = gr.Button("Translate")
160
+ run_btn.click(process, inputs=[input_mode, target_lang, text_input, audio_input, file_input], outputs=[extracted_text, translated_output])
161
 
162
  if __name__ == "__main__":
163
+ demo.launch()