Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier | |
| from sklearn.metrics import classification_report, accuracy_score, confusion_matrix | |
| from sklearn.preprocessing import LabelEncoder | |
| import shap | |
| # Load Dataset | |
| data_path = 'Survey Final.csv' | |
| df = pd.read_csv(data_path) | |
| # Encode Target Column | |
| le = LabelEncoder() | |
| df['Percieved Safety'] = le.fit_transform(df['Percieved Safety']) | |
| # Data Splitting (Global for Use in All Sections) | |
| test_size = 0.2 # Default test size (can be changed in data splitting section) | |
| X = df.drop(columns=['Percieved Safety']) | |
| y = df['Percieved Safety'] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42) | |
| # Streamlit App | |
| st.set_page_config(page_title="Evaluating Safety Perception on Commuting App", layout='wide') | |
| st.title("Evaluating Safety Perception on Commuting ") | |
| # Sidebar section | |
| with st.sidebar: | |
| st.image("logo.png", use_container_width=True, caption="Safety Perception") | |
| st.markdown("---") | |
| selected = st.selectbox( | |
| "Navigation", | |
| [ | |
| "๐ Data Overview", | |
| "๐ Exploratory Data Analysis", | |
| "๐ค Model Training, Evaluation & Explanations", | |
| "๐ฎ Predict Perceived Safety" | |
| ] | |
| ) | |
| # Data Overview | |
| if selected == "๐ Data Overview": | |
| st.header("๐ Data Overview") | |
| if st.checkbox("Show Dataset"): | |
| st.write(df.head()) | |
| st.write(f"Dataset Shape: {df.shape}") | |
| st.write("Data Types:") | |
| st.write(df.dtypes) | |
| # Exploratory Data Analysis | |
| if selected == "๐ Exploratory Data Analysis": | |
| st.header("๐ Exploratory Data Analysis") | |
| if st.checkbox("Correlation Heatmap"): | |
| st.write("Correlation Heatmap") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax) | |
| st.pyplot(fig) | |
| if st.checkbox("Histogram"): | |
| st.write("Histograms of Numeric Columns") | |
| numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist() | |
| selected_column = st.selectbox("Select Column for Histogram", numeric_columns) | |
| fig, ax = plt.subplots() | |
| sns.histplot(df[selected_column], kde=True, ax=ax) | |
| st.pyplot(fig) | |
| if st.checkbox("Boxplot for Numeric Columns"): | |
| st.write("Boxplot of Numeric Columns") | |
| numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist() | |
| selected_column = st.selectbox("Select Column for Boxplot", numeric_columns) | |
| fig, ax = plt.subplots() | |
| sns.boxplot(data=df, x=selected_column, ax=ax) | |
| st.pyplot(fig) | |
| if st.checkbox("Pairplot of Dataset"): | |
| st.write("Pairplot of the Dataset") | |
| fig = sns.pairplot(df) | |
| st.pyplot(fig) | |
| # Model Training, Evaluation & Explanations | |
| if selected == "๐ค Model Training, Evaluation & Explanations": | |
| st.header("๐ค Model Training, Evaluation & Explanations") | |
| if st.checkbox("Train, Evaluate, and Explain Models"): | |
| # Model Training | |
| st.write("Training Tree-Based Models") | |
| models = { | |
| "Random Forest": RandomForestClassifier(random_state=42), | |
| "Gradient Boosting": GradientBoostingClassifier(random_state=42), | |
| "Extra Trees": ExtraTreesClassifier(random_state=42), | |
| "Histogram Gradient Boosting": HistGradientBoostingClassifier(random_state=42) | |
| } | |
| model_preds = {} | |
| model_accuracies = {} | |
| for model_name, model in models.items(): | |
| model.fit(X_train, y_train) | |
| preds = model.predict(X_test) | |
| accuracy = accuracy_score(y_test, preds) | |
| model_preds[model_name] = preds | |
| model_accuracies[model_name] = accuracy | |
| st.write(f"{model_name} Accuracy: {accuracy:.2f}") | |
| # Model Evaluation | |
| selected_model = st.selectbox("Select Model for Detailed Evaluation", list(models.keys())) | |
| selected_model_instance = models[selected_model] | |
| selected_preds = model_preds[selected_model] | |
| st.write("Classification Report:") | |
| st.text(classification_report(y_test, selected_preds)) | |
| st.write("Confusion Matrix:") | |
| st.write(confusion_matrix(y_test, selected_preds)) | |
| # Feature Importance | |
| if st.checkbox("Show Feature Importance"): | |
| st.write(f"Feature Importance from {selected_model} Model") | |
| if hasattr(selected_model_instance, 'feature_importances_'): | |
| feature_importances = selected_model_instance.feature_importances_ | |
| importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances}) | |
| importance_df = importance_df.sort_values(by="Importance", ascending=False) | |
| st.bar_chart(importance_df.set_index("Feature")) | |
| else: | |
| st.write("The selected model does not support feature importances.") | |
| # SHAP Explanations | |
| if st.checkbox("Explain Predictions with SHAP"): | |
| st.write(f"SHAP Explanation for {selected_model} Model") | |
| explainer = shap.TreeExplainer(selected_model_instance) | |
| shap_values = explainer.shap_values(X_test) | |
| shap.summary_plot(shap_values, X_test, plot_type="bar") | |
| st.pyplot() | |
| # Predict Percieved Safety | |
| if selected == "๐ฎ Predict Percieved Safety": | |
| st.header("๐ฎ Predict Percieved Safety") | |
| st.write("Please provide the following information to predict Percieved Safety for transport:") | |
| # User Input for Prediction | |
| overcrowding = st.selectbox("How overcrowded do you think the transport is on a scale from 0 (Not overcrowded) to 4 (Very overcrowded)?", [0, 1, 2, 3, 4]) | |
| preference = st.selectbox("How much do you prefer this mode of transport on a scale from 0 (Not preferred) to 4 (Highly preferred)?", [0, 1, 2, 3, 4]) | |
| daytime_safety = st.selectbox("How safe do you feel using this transport during the daytime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
| nighttime_safety = st.selectbox("How safe do you feel using this transport during the nighttime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
| taxi_dsafety = st.selectbox("How safe do you feel using a taxi during the day on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
| taxi_nsafety = st.selectbox("How safe do you feel using a taxi during the night on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
| reporting = st.selectbox("How comfortable are you with reporting incidents related to this transport on a scale from 0 (Not comfortable) to 4 (Very comfortable)?", [0, 1, 2, 3, 4]) | |
| background_check = st.selectbox("How effective do you think background checks are for transport personnel on a scale from 0 (Not effective) to 4 (Very effective)?", [0, 1, 2, 3, 4]) | |
| user_data = np.array([[ | |
| overcrowding, preference, daytime_safety, nighttime_safety, | |
| taxi_dsafety, taxi_nsafety, reporting, background_check | |
| ]]) | |
| if st.button("Predict Percieved Safety"): | |
| # Train the Model (Again) and Predict | |
| model = RandomForestClassifier(random_state=42) | |
| model.fit(X_train, y_train) | |
| prediction = model.predict(user_data) | |
| predicted_class = le.inverse_transform(prediction) | |
| st.write(f"Predicted Percieved Safety Class: {predicted_class[0]}") | |