Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,14 +20,14 @@ import chardet
|
|
| 20 |
# Device setup
|
| 21 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 22 |
|
| 23 |
-
# Load
|
| 24 |
translator = None
|
| 25 |
whisper_model = None
|
| 26 |
|
| 27 |
-
def
|
| 28 |
global translator
|
| 29 |
if translator is None:
|
| 30 |
-
model_name = "LocaleNLP/
|
| 31 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
|
| 32 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
| 33 |
translator = pipeline("translation", model=model, tokenizer=tokenizer, device=0 if device.type == 'cuda' else -1)
|
|
@@ -91,8 +91,8 @@ def extract_text_from_file(uploaded_file):
|
|
| 91 |
raise ValueError("Unsupported file type")
|
| 92 |
|
| 93 |
def translate(text):
|
| 94 |
-
translator =
|
| 95 |
-
lang_tag = ">>
|
| 96 |
|
| 97 |
paragraphs = text.split("\n")
|
| 98 |
translated_output = []
|
|
@@ -136,8 +136,8 @@ def translate_and_return(text):
|
|
| 136 |
|
| 137 |
# Gradio UI components
|
| 138 |
with gr.Blocks() as demo:
|
| 139 |
-
gr.Markdown("## LocaleNLP English-to-
|
| 140 |
-
gr.Markdown("Upload English text, audio, or document to translate to
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
input_mode = gr.Radio(choices=["Text", "Audio", "File"], label="Select input mode", value="Text")
|
|
@@ -147,8 +147,8 @@ with gr.Blocks() as demo:
|
|
| 147 |
file_input = gr.File(file_types=['.pdf', '.docx', '.html', '.htm', '.md', '.srt', '.txt'], label="Upload document", visible=False)
|
| 148 |
|
| 149 |
extracted_text = gr.Textbox(label="Extracted / Transcribed Text", lines=10, interactive=False)
|
| 150 |
-
translate_button = gr.Button("Translate to
|
| 151 |
-
output_text = gr.Textbox(label="Translated
|
| 152 |
|
| 153 |
def update_visibility(mode):
|
| 154 |
return {
|
|
|
|
| 20 |
# Device setup
|
| 21 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 22 |
|
| 23 |
+
# Load Darija MarianMT model from HF hub (cached manually)
|
| 24 |
translator = None
|
| 25 |
whisper_model = None
|
| 26 |
|
| 27 |
+
def load_darija_model():
|
| 28 |
global translator
|
| 29 |
if translator is None:
|
| 30 |
+
model_name = "LocaleNLP/english_darija"
|
| 31 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
|
| 32 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
| 33 |
translator = pipeline("translation", model=model, tokenizer=tokenizer, device=0 if device.type == 'cuda' else -1)
|
|
|
|
| 91 |
raise ValueError("Unsupported file type")
|
| 92 |
|
| 93 |
def translate(text):
|
| 94 |
+
translator = load_darija_model()
|
| 95 |
+
lang_tag = ">>dar<<"
|
| 96 |
|
| 97 |
paragraphs = text.split("\n")
|
| 98 |
translated_output = []
|
|
|
|
| 136 |
|
| 137 |
# Gradio UI components
|
| 138 |
with gr.Blocks() as demo:
|
| 139 |
+
gr.Markdown("## LocaleNLP English-to-Darija Translator")
|
| 140 |
+
gr.Markdown("Upload English text, audio, or document to translate to Darija using Localenlp model.")
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
input_mode = gr.Radio(choices=["Text", "Audio", "File"], label="Select input mode", value="Text")
|
|
|
|
| 147 |
file_input = gr.File(file_types=['.pdf', '.docx', '.html', '.htm', '.md', '.srt', '.txt'], label="Upload document", visible=False)
|
| 148 |
|
| 149 |
extracted_text = gr.Textbox(label="Extracted / Transcribed Text", lines=10, interactive=False)
|
| 150 |
+
translate_button = gr.Button("Translate to Darija")
|
| 151 |
+
output_text = gr.Textbox(label="Translated Darija Text", lines=10, interactive=False)
|
| 152 |
|
| 153 |
def update_visibility(mode):
|
| 154 |
return {
|