Spaces:

Huy227
/

English_Sentences_Analyzer

Sleeping

HuuHuy227

new-modified

d6d5bda 11 months ago

7.3 kB

	import streamlit as st
	import spacy
	import graphviz
	import pandas as pd
	import base64
	import shutil
	import subprocess

	# Load English language model for spaCy
	nlp = spacy.load('en_core_web_md')

	def check_graphviz_installation():
	"""
	Check if Graphviz is installed and accessible
	"""
	if shutil.which('dot') is None:
	return False
	try:
	subprocess.run(['dot', '-V'], capture_output=True, check=True)
	return True
	except (subprocess.SubprocessError, OSError):
	return False

	def identify_clauses(doc):
	"""
	Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses
	"""
	clauses = []

	# First identify all subordinate clauses and their spans
	subordinate_spans = []
	for token in doc:
	if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]:
	span = doc[token.left_edge.i:token.right_edge.i + 1]
	subordinate_spans.append({
	"span": span,
	"type": {
	"ccomp": "Complement Clause",
	"xcomp": "Open Complement Clause",
	"advcl": "Adverbial Clause",
	"relcl": "Adjective Clause"
	}[token.dep_]
	})

	# Find the root and construct the main clause by excluding subordinate spans
	root = None
	for token in doc:
	if token.dep_ == "ROOT":
	root = token
	break

	if root:
	# Get all tokens in the root's subtree
	main_clause_tokens = set(token for token in root.subtree)

	# Remove tokens that are part of subordinate clauses
	for sub_clause in subordinate_spans:
	for token in sub_clause["span"]:
	if token in main_clause_tokens:
	main_clause_tokens.remove(token)

	# Construct the main clause text from remaining tokens
	main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens],
	key=lambda x: [t.i for t in doc if t.text == x][0]))
	main_clause_text = main_clause_text.strip().replace(",","").replace(".","")
	clauses.append({"Type": "Independent Clause", "Text": main_clause_text})

	# Add the subordinate clauses
	for sub_clause in subordinate_spans:
	clauses.append({
	"Type": sub_clause["type"],
	"Text": sub_clause["span"].text
	})

	return clauses

	def analyze_clause_functions(doc):
	"""
	Analyze the function of each clause
	"""
	functions = []

	for token in doc:
	if token.dep_ == "ROOT":
	functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"})
	elif token.dep_ == "ccomp":
	functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"})
	elif token.dep_ == "xcomp":
	functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"})
	elif token.dep_ == "advcl":
	functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"})
	elif token.dep_ == "relcl":
	functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"})

	return functions

	def create_dependency_graph(doc):
	"""
	Create a graphviz visualization of the dependency tree
	"""
	if not check_graphviz_installation():
	return None

	dot = graphviz.Digraph(comment='Dependency Tree')

	# Add nodes
	for token in doc:
	dot.node(str(token.i), f"{token.text}\n({token.pos_})")

	# Add edges
	for token in doc:
	if token.head is not token: # Skip root
	dot.edge(str(token.head.i), str(token.i), token.dep_)

	return dot

	def get_graph_download_link(dot):
	"""
	Generate a download link for the graph image
	"""
	try:
	# Create PDF in memory
	pdf = dot.pipe(format='pdf')

	# Encode to base64
	b64 = base64.b64encode(pdf).decode()

	href = f'<a href="data:application/pdf;base64,{b64}" download="syntax_tree.pdf">Download Syntax Tree (PDF)</a>'
	return href
	except Exception as e:
	return f"Error generating download link: {str(e)}"

	def main():
	# Set page to wide mode for better visualization
	st.set_page_config(layout="wide")
	st.markdown("<h1 style='text-align: center; color: white;'>English Clause Analyzer</h1>", unsafe_allow_html=True)
	st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.")

	# Input text
	text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100)

	if st.button("Analyze"):
	if text:
	# Process the text
	doc = nlp(text)

	# Create two columns for layout
	col1, col2 = st.columns(2)

	with col1:
	# Identify clauses
	clauses = identify_clauses(doc)
	st.subheader(f"Clauses Analysis")

	# Convert clauses to DataFrame for better presentation
	df_clauses = pd.DataFrame(clauses)
	st.table(df_clauses.style.set_properties(**{
	'background-color': 'rgba(0,0,0,0.1)',
	'color': 'white'
	}))

	# Display clause functions
	functions = analyze_clause_functions(doc)
	st.subheader("Clause Functions")
	df_functions = pd.DataFrame(functions)
	st.table(df_functions.style.set_properties(**{
	'background-color': 'rgba(0,0,0,0.1)',
	'color': 'white'
	}))

	with col2:
	# Display dependency visualization
	st.subheader("Syntax Tree Visualization")
	if not check_graphviz_installation():
	st.error("Graphviz is not installed. Please install it using:")
	st.code("sudo apt-get install graphviz")
	st.markdown("After installation, restart the application.")
	else:
	dot = create_dependency_graph(doc)
	st.graphviz_chart(dot)

	# Add download button for the graph
	st.markdown(get_graph_download_link(dot), unsafe_allow_html=True)

	# Display part-of-speech tags in a table
	st.subheader("Part-of-Speech Analysis")
	pos_data = [{"Word": token.text, "Part of Speech": token.pos_,
	"Description": spacy.explain(token.pos_)} for token in doc]
	df_pos = pd.DataFrame(pos_data)
	st.table(df_pos.style.set_properties(**{
	'background-color': 'rgba(0,0,0,0.1)',
	'color': 'white'
	}))

	if __name__ == "__main__":
	main()