Spaces:
Sleeping
Sleeping
| import spacy | |
| from spacy import displacy | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| import gradio as gr | |
| import torch | |
| import difflib | |
| import nltk | |
| nltk.download("punkt") | |
| nltk.download('punkt_tab') | |
| from nltk.tokenize import sent_tokenize | |
| nlp = spacy.load("en_test_L1_model") | |
| model = T5ForConditionalGeneration.from_pretrained("Unbabel/gec-t5_small") | |
| tokenizer = T5Tokenizer.from_pretrained('t5-small') | |
| def text_analysis(text): | |
| sentences = sent_tokenize(text) | |
| processed_sentences = [] | |
| highlighted_sentences = [] | |
| for sentence in sentences: | |
| doc = nlp(sentence) | |
| html_highlight = displacy.render(doc, style="span", options = {"compact": True}) | |
| html_highlight = ( | |
| "<div style='max-width:100%; max-height:360px; overflow:auto'>" | |
| + html_highlight | |
| + "</div>" | |
| ) | |
| processed_sentences.append(html_highlight) | |
| inputs = tokenizer("gec: " + sentence, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True) | |
| corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| diff = difflib.ndiff(sentence.split(), corrected_sentence.split()) | |
| highlighted_output = "" | |
| for word in diff: | |
| if word.startswith("+ "): | |
| highlighted_output += f"<span style='color: green; font-weight: bold;'>{word[2:]}</span> " | |
| elif word.startswith("- "): | |
| highlighted_output += f"<span style='color: red; text-decoration: line-through;'>{word[2:]}</span> " | |
| else: | |
| highlighted_output += word[2:] + " " | |
| highlighted_sentences.append(f"<p><b>Corrected:</b> {highlighted_output}</p>") | |
| return "<hr>".join(processed_sentences) + "<hr>", "<hr>".join(highlighted_sentences) | |
| demo = gr.Interface( | |
| text_analysis, | |
| gr.Textbox(placeholder="Enter sentence here..."), | |
| ["html", "html"], | |
| examples=[ | |
| ["Then there was a sharp decrease so by 2013 the worldwide outlay accounted for 214 billions. Moreother there is a huge difference between part of 60+ years people. It is clearly seen that in Yemen the share of children before 14 years tend to become less - from 50,1% in 2000 to 37% in 2050."], | |
| ["In post - school 70 percent were the same men a postgraduate diploma and women undergraduate diploma. Parents can try to know friends of their child, so they will know what they are doing and who they are."], | |
| ], | |
| ) | |
| demo.launch() |