Spaces:

AlignAI
/

Safety_Perception

Sleeping

Safety_Perception / safetyapp.py

GitsSaikat

Update safetyapp.py

5748b83 unverified about 1 year ago

7.69 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
	from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
	from sklearn.preprocessing import LabelEncoder
	import shap

	# Load Dataset
	data_path = 'Survey Final.csv'
	df = pd.read_csv(data_path)

	# Encode Target Column
	le = LabelEncoder()
	df['Percieved Safety'] = le.fit_transform(df['Percieved Safety'])

	# Data Splitting (Global for Use in All Sections)
	test_size = 0.2 # Default test size (can be changed in data splitting section)
	X = df.drop(columns=['Percieved Safety'])
	y = df['Percieved Safety']
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

	# Streamlit App
	st.set_page_config(page_title="Evaluating Safety Perception on Commuting App", layout='wide')
	st.title("Evaluating Safety Perception on Commuting ")

	# Sidebar section
	with st.sidebar:
	st.image("logo.png", use_container_width=True, caption="Safety Perception")
	st.markdown("---")
	selected = st.selectbox(
	"Navigation",
	[
	"📊 Data Overview",
	"🔍 Exploratory Data Analysis",
	"🤖 Model Training, Evaluation & Explanations",
	"🔮 Predict Perceived Safety"
	]
	)

	# Data Overview
	if selected == "📊 Data Overview":
	st.header("📊 Data Overview")
	if st.checkbox("Show Dataset"):
	st.write(df.head())
	st.write(f"Dataset Shape: {df.shape}")
	st.write("Data Types:")
	st.write(df.dtypes)

	# Exploratory Data Analysis
	if selected == "🔍 Exploratory Data Analysis":
	st.header("🔍 Exploratory Data Analysis")
	if st.checkbox("Correlation Heatmap"):
	st.write("Correlation Heatmap")
	fig, ax = plt.subplots(figsize=(10, 6))
	sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
	st.pyplot(fig)

	if st.checkbox("Histogram"):
	st.write("Histograms of Numeric Columns")
	numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
	selected_column = st.selectbox("Select Column for Histogram", numeric_columns)
	fig, ax = plt.subplots()
	sns.histplot(df[selected_column], kde=True, ax=ax)
	st.pyplot(fig)

	if st.checkbox("Boxplot for Numeric Columns"):
	st.write("Boxplot of Numeric Columns")
	numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
	selected_column = st.selectbox("Select Column for Boxplot", numeric_columns)
	fig, ax = plt.subplots()
	sns.boxplot(data=df, x=selected_column, ax=ax)
	st.pyplot(fig)

	if st.checkbox("Pairplot of Dataset"):
	st.write("Pairplot of the Dataset")
	fig = sns.pairplot(df)
	st.pyplot(fig)

	# Model Training, Evaluation & Explanations
	if selected == "🤖 Model Training, Evaluation & Explanations":
	st.header("🤖 Model Training, Evaluation & Explanations")
	if st.checkbox("Train, Evaluate, and Explain Models"):
	# Model Training
	st.write("Training Tree-Based Models")
	models = {
	"Random Forest": RandomForestClassifier(random_state=42),
	"Gradient Boosting": GradientBoostingClassifier(random_state=42),
	"Extra Trees": ExtraTreesClassifier(random_state=42),
	"Histogram Gradient Boosting": HistGradientBoostingClassifier(random_state=42)
	}

	model_preds = {}
	model_accuracies = {}
	for model_name, model in models.items():
	model.fit(X_train, y_train)
	preds = model.predict(X_test)
	accuracy = accuracy_score(y_test, preds)
	model_preds[model_name] = preds
	model_accuracies[model_name] = accuracy
	st.write(f"{model_name} Accuracy: {accuracy:.2f}")

	# Model Evaluation
	selected_model = st.selectbox("Select Model for Detailed Evaluation", list(models.keys()))
	selected_model_instance = models[selected_model]
	selected_preds = model_preds[selected_model]
	st.write("Classification Report:")
	st.text(classification_report(y_test, selected_preds))
	st.write("Confusion Matrix:")
	st.write(confusion_matrix(y_test, selected_preds))

	# Feature Importance
	if st.checkbox("Show Feature Importance"):
	st.write(f"Feature Importance from {selected_model} Model")
	if hasattr(selected_model_instance, 'feature_importances_'):
	feature_importances = selected_model_instance.feature_importances_
	importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances})
	importance_df = importance_df.sort_values(by="Importance", ascending=False)
	st.bar_chart(importance_df.set_index("Feature"))
	else:
	st.write("The selected model does not support feature importances.")

	# SHAP Explanations
	if st.checkbox("Explain Predictions with SHAP"):
	st.write(f"SHAP Explanation for {selected_model} Model")
	explainer = shap.TreeExplainer(selected_model_instance)
	shap_values = explainer.shap_values(X_test)
	shap.summary_plot(shap_values, X_test, plot_type="bar")
	st.pyplot()

	# Predict Percieved Safety
	if selected == "🔮 Predict Percieved Safety":
	st.header("🔮 Predict Percieved Safety")
	st.write("Please provide the following information to predict Percieved Safety for transport:")

	# User Input for Prediction
	overcrowding = st.selectbox("How overcrowded do you think the transport is on a scale from 0 (Not overcrowded) to 4 (Very overcrowded)?", [0, 1, 2, 3, 4])
	preference = st.selectbox("How much do you prefer this mode of transport on a scale from 0 (Not preferred) to 4 (Highly preferred)?", [0, 1, 2, 3, 4])
	daytime_safety = st.selectbox("How safe do you feel using this transport during the daytime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
	nighttime_safety = st.selectbox("How safe do you feel using this transport during the nighttime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
	taxi_dsafety = st.selectbox("How safe do you feel using a taxi during the day on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
	taxi_nsafety = st.selectbox("How safe do you feel using a taxi during the night on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
	reporting = st.selectbox("How comfortable are you with reporting incidents related to this transport on a scale from 0 (Not comfortable) to 4 (Very comfortable)?", [0, 1, 2, 3, 4])
	background_check = st.selectbox("How effective do you think background checks are for transport personnel on a scale from 0 (Not effective) to 4 (Very effective)?", [0, 1, 2, 3, 4])

	user_data = np.array([[
	overcrowding, preference, daytime_safety, nighttime_safety,
	taxi_dsafety, taxi_nsafety, reporting, background_check
	]])

	if st.button("Predict Percieved Safety"):
	# Train the Model (Again) and Predict
	model = RandomForestClassifier(random_state=42)
	model.fit(X_train, y_train)
	prediction = model.predict(user_data)
	predicted_class = le.inverse_transform(prediction)

	st.write(f"Predicted Percieved Safety Class: {predicted_class[0]}")