Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import spacy | |
| import graphviz | |
| import pandas as pd | |
| import base64 | |
| import shutil | |
| import subprocess | |
| # Load English language model for spaCy | |
| nlp = spacy.load('en_core_web_md') | |
| def check_graphviz_installation(): | |
| """ | |
| Check if Graphviz is installed and accessible | |
| """ | |
| if shutil.which('dot') is None: | |
| return False | |
| try: | |
| subprocess.run(['dot', '-V'], capture_output=True, check=True) | |
| return True | |
| except (subprocess.SubprocessError, OSError): | |
| return False | |
| def identify_clauses(doc): | |
| """ | |
| Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses | |
| """ | |
| clauses = [] | |
| # First identify all subordinate clauses and their spans | |
| subordinate_spans = [] | |
| for token in doc: | |
| if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]: | |
| span = doc[token.left_edge.i:token.right_edge.i + 1] | |
| subordinate_spans.append({ | |
| "span": span, | |
| "type": { | |
| "ccomp": "Complement Clause", | |
| "xcomp": "Open Complement Clause", | |
| "advcl": "Adverbial Clause", | |
| "relcl": "Adjective Clause" | |
| }[token.dep_] | |
| }) | |
| # Find the root and construct the main clause by excluding subordinate spans | |
| root = None | |
| for token in doc: | |
| if token.dep_ == "ROOT": | |
| root = token | |
| break | |
| if root: | |
| # Get all tokens in the root's subtree | |
| main_clause_tokens = set(token for token in root.subtree) | |
| # Remove tokens that are part of subordinate clauses | |
| for sub_clause in subordinate_spans: | |
| for token in sub_clause["span"]: | |
| if token in main_clause_tokens: | |
| main_clause_tokens.remove(token) | |
| # Construct the main clause text from remaining tokens | |
| main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens], | |
| key=lambda x: [t.i for t in doc if t.text == x][0])) | |
| main_clause_text = main_clause_text.strip().replace(",","").replace(".","") | |
| clauses.append({"Type": "Independent Clause", "Text": main_clause_text}) | |
| # Add the subordinate clauses | |
| for sub_clause in subordinate_spans: | |
| clauses.append({ | |
| "Type": sub_clause["type"], | |
| "Text": sub_clause["span"].text | |
| }) | |
| return clauses | |
| def analyze_clause_functions(doc): | |
| """ | |
| Analyze the function of each clause | |
| """ | |
| functions = [] | |
| for token in doc: | |
| if token.dep_ == "ROOT": | |
| functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"}) | |
| elif token.dep_ == "ccomp": | |
| functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"}) | |
| elif token.dep_ == "xcomp": | |
| functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"}) | |
| elif token.dep_ == "advcl": | |
| functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"}) | |
| elif token.dep_ == "relcl": | |
| functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"}) | |
| return functions | |
| def create_dependency_graph(doc): | |
| """ | |
| Create a graphviz visualization of the dependency tree | |
| """ | |
| if not check_graphviz_installation(): | |
| return None | |
| dot = graphviz.Digraph(comment='Dependency Tree') | |
| # Add nodes | |
| for token in doc: | |
| dot.node(str(token.i), f"{token.text}\n({token.pos_})") | |
| # Add edges | |
| for token in doc: | |
| if token.head is not token: # Skip root | |
| dot.edge(str(token.head.i), str(token.i), token.dep_) | |
| return dot | |
| def get_graph_download_link(dot): | |
| """ | |
| Generate a download link for the graph image | |
| """ | |
| try: | |
| # Create PDF in memory | |
| pdf = dot.pipe(format='pdf') | |
| # Encode to base64 | |
| b64 = base64.b64encode(pdf).decode() | |
| href = f'<a href="data:application/pdf;base64,{b64}" download="syntax_tree.pdf">Download Syntax Tree (PDF)</a>' | |
| return href | |
| except Exception as e: | |
| return f"Error generating download link: {str(e)}" | |
| def main(): | |
| # Set page to wide mode for better visualization | |
| st.set_page_config(layout="wide") | |
| st.markdown("<h1 style='text-align: center; color: white;'>English Clause Analyzer</h1>", unsafe_allow_html=True) | |
| st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.") | |
| # Input text | |
| text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100) | |
| if st.button("Analyze"): | |
| if text: | |
| # Process the text | |
| doc = nlp(text) | |
| # Create two columns for layout | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Identify clauses | |
| clauses = identify_clauses(doc) | |
| st.subheader(f"Clauses Analysis") | |
| # Convert clauses to DataFrame for better presentation | |
| df_clauses = pd.DataFrame(clauses) | |
| st.table(df_clauses.style.set_properties(**{ | |
| 'background-color': 'rgba(0,0,0,0.1)', | |
| 'color': 'white' | |
| })) | |
| # Display clause functions | |
| functions = analyze_clause_functions(doc) | |
| st.subheader("Clause Functions") | |
| df_functions = pd.DataFrame(functions) | |
| st.table(df_functions.style.set_properties(**{ | |
| 'background-color': 'rgba(0,0,0,0.1)', | |
| 'color': 'white' | |
| })) | |
| with col2: | |
| # Display dependency visualization | |
| st.subheader("Syntax Tree Visualization") | |
| if not check_graphviz_installation(): | |
| st.error("Graphviz is not installed. Please install it using:") | |
| st.code("sudo apt-get install graphviz") | |
| st.markdown("After installation, restart the application.") | |
| else: | |
| dot = create_dependency_graph(doc) | |
| st.graphviz_chart(dot) | |
| # Add download button for the graph | |
| st.markdown(get_graph_download_link(dot), unsafe_allow_html=True) | |
| # Display part-of-speech tags in a table | |
| st.subheader("Part-of-Speech Analysis") | |
| pos_data = [{"Word": token.text, "Part of Speech": token.pos_, | |
| "Description": spacy.explain(token.pos_)} for token in doc] | |
| df_pos = pd.DataFrame(pos_data) | |
| st.table(df_pos.style.set_properties(**{ | |
| 'background-color': 'rgba(0,0,0,0.1)', | |
| 'color': 'white' | |
| })) | |
| if __name__ == "__main__": | |
| main() |