Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,6 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
from sklearn.neighbors import NearestNeighbors
|
| 5 |
-
from sklearn.decomposition import TruncatedSVD
|
| 6 |
-
import numpy as np
|
| 7 |
|
| 8 |
# Page config
|
| 9 |
st.set_page_config(
|
|
@@ -108,13 +106,13 @@ def run_imps(df):
|
|
| 108 |
|
| 109 |
# Collaborative Filtering
|
| 110 |
user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
song_factors = svd.components_.T
|
| 114 |
|
| 115 |
-
return df, tfidf, tfidf_matrix, nn, user_song_matrix,
|
| 116 |
|
| 117 |
-
df
|
|
|
|
| 118 |
|
| 119 |
# Content-based recommendation function
|
| 120 |
def content_based_recommend(song_title, top_n=5):
|
|
@@ -126,17 +124,28 @@ def content_based_recommend(song_title, top_n=5):
|
|
| 126 |
except IndexError:
|
| 127 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
| 128 |
|
|
|
|
| 129 |
def collaborative_recommend(user_id, top_n=5):
|
| 130 |
if user_id not in user_song_matrix.index:
|
| 131 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
| 136 |
listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
|
| 137 |
-
scores = {song: score for song, score in zip(user_song_matrix.columns, scores) if song not in listened_songs}
|
| 138 |
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
recommended_song_ids = [song for song, _ in recommended_songs]
|
| 141 |
return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
|
| 142 |
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
from sklearn.neighbors import NearestNeighbors
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Page config
|
| 7 |
st.set_page_config(
|
|
|
|
| 106 |
|
| 107 |
# Collaborative Filtering
|
| 108 |
user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
|
| 109 |
+
knn_cf = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='auto')
|
| 110 |
+
knn_cf.fit(user_song_matrix)
|
|
|
|
| 111 |
|
| 112 |
+
return df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf
|
| 113 |
|
| 114 |
+
df = load_data()
|
| 115 |
+
df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf = run_imps(df)
|
| 116 |
|
| 117 |
# Content-based recommendation function
|
| 118 |
def content_based_recommend(song_title, top_n=5):
|
|
|
|
| 124 |
except IndexError:
|
| 125 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
| 126 |
|
| 127 |
+
# Collaborative recommendation function using KNN
|
| 128 |
def collaborative_recommend(user_id, top_n=5):
|
| 129 |
if user_id not in user_song_matrix.index:
|
| 130 |
return pd.DataFrame(columns=['title', 'artist_name', 'release'])
|
| 131 |
|
| 132 |
+
# Get the nearest neighbors for the user
|
| 133 |
+
user_index = user_song_matrix.index.get_loc(user_id)
|
| 134 |
+
distances, indices = knn_cf.kneighbors(user_song_matrix.iloc[user_index].values.reshape(1, -1), n_neighbors=top_n + 1)
|
| 135 |
+
|
| 136 |
+
# Collect recommendations from neighbors
|
| 137 |
+
neighbors = indices.flatten()[1:]
|
| 138 |
listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
|
|
|
|
| 139 |
|
| 140 |
+
recommendations = {}
|
| 141 |
+
for neighbor in neighbors:
|
| 142 |
+
neighbor_songs = user_song_matrix.iloc[neighbor]
|
| 143 |
+
for song, play_count in neighbor_songs.items():
|
| 144 |
+
if song not in listened_songs and play_count > 0:
|
| 145 |
+
recommendations[song] = recommendations.get(song, 0) + play_count
|
| 146 |
+
|
| 147 |
+
# Sort songs by aggregated scores
|
| 148 |
+
recommended_songs = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:top_n]
|
| 149 |
recommended_song_ids = [song for song, _ in recommended_songs]
|
| 150 |
return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
|
| 151 |
|