| | |
| | |
| |
|
| | import pandas as pd |
| | import numpy as np |
| | from supabase import create_client, Client |
| |
|
| | |
| | URL = "https://oflclzbsbgkadqiagxqk.supabase.co" |
| | KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9mbGNsemJzYmdrYWRxaWFneHFrIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MDY0OTY3OTIsImV4cCI6MjAyMjA3Mjc5Mn0.2IGuSFqHbNp75vs-LskGjK0fw3ypqbiHJ9MKAAaYE8s" |
| | supabase: Client = create_client(URL, KEY) |
| |
|
| | def convert_table_to_pandas_dataframe(supabase, table_name): |
| | |
| | data = supabase.table(table_name).select("*").execute() |
| | |
| | |
| | df = pd.DataFrame(data.data) |
| |
|
| | return df |
| |
|
| | books_df = convert_table_to_pandas_dataframe(supabase, "books") |
| |
|
| | pd.set_option('display.max_colwidth', 50) |
| | pd.set_option('display.max_columns', None) |
| |
|
| | books_df.head(5) |
| |
|
| | books_df['combined'] = books_df['description'] + ' ' + books_df['title'] + ' ' + books_df['author_name'] |
| |
|
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | from sklearn.feature_extraction.text import TfidfVectorizer |
| |
|
| | |
| | tfidf = TfidfVectorizer(stop_words='english') |
| |
|
| | |
| | tfidf_matrix = tfidf.fit_transform(books_df['combined']) |
| |
|
| | feature_names = tfidf.get_feature_names() |
| |
|
| | |
| | tfidf_matrix.shape |
| |
|
| |
|
| | |
| | |
| |
|
| | feature_names[2000:2500] |
| |
|
| | |
| | |
| |
|
| | |
| | document_tfidf_vector = tfidf_matrix[10] |
| |
|
| | |
| | total_terms_in_document = document_tfidf_vector.sum() |
| |
|
| | print("Document vector: ", tfidf_matrix[10]) |
| | print("Total terms in document {}: {}".format(10, total_terms_in_document)) |
| |
|
| | tfidf |
| |
|
| | print(tfidf_matrix[0].shape) |
| |
|
| | |
| | |
| | from sklearn.metrics.pairwise import linear_kernel |
| |
|
| | |
| | cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) |
| |
|
| | indices = pd.Series(books_df.index, index=books_df['title']).drop_duplicates() |
| |
|
| | def get_original_book_id(title): |
| | return books_df.loc[books_df['title'] == title, 'id'].values[0] |
| |
|
| | |
| | def get_top_five_recommendations(title, cosine_sim=cosine_sim): |
| | |
| | idx = indices[title] |
| |
|
| | |
| | sim_scores = list(enumerate(cosine_sim[idx])) |
| | |
| | |
| | sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) |
| |
|
| | |
| | sim_scores = sim_scores[:11] |
| |
|
| | |
| | book_indices = [i[0] for i in sim_scores] |
| |
|
| | |
| | |
| |
|
| | ids = [] |
| | for title in books_df['title'].iloc[book_indices]: |
| | ids.append(get_original_book_id(title)) |
| | ids.pop(0) |
| | return ids |
| |
|
| | get_top_five_recommendations('Walls of Ash') |
| |
|
| | books_df[books_df['id'].isin(get_top_five_recommendations('Walls of Ash'))]['url'] |
| |
|
| |
|
| | from flask import Flask, jsonify, request |
| | from flask_ngrok import run_with_ngrok |
| |
|
| | app = Flask(__name__) |
| | run_with_ngrok(app) |
| |
|
| | import json |
| | @app.route('/predict/<int:id>', methods=['GET']) |
| | def predict(id): |
| | title = books_df[books_df['id'] == id]['title'].values[0] |
| | print(title) |
| | prediction_result = [int(x) for x in get_top_five_recommendations(title)] |
| | return json.dumps(prediction_result) |
| |
|
| | from waitress import serve |
| |
|
| | if __name__ == '__main__': |
| | serve(app, host="0.0.0.0", port=8080) |
| |
|
| |
|