Skip to content
This repository was archived by the owner on Nov 23, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
88 commits
Select commit Hold shift + click to select a range
9a28e85
Update edsa_recommender.py
Sandile98 Jul 7, 2023
cb4764b
genre insight and unfinished movie insight page
ThabisoNdlovu Jul 13, 2023
6bcf992
added csv with movie details for movie insight page.
ThabisoNdlovu Jul 14, 2023
6c6eb9e
movie insight updated
ThabisoNdlovu Jul 14, 2023
291a89d
insight pages functionality
ThabisoNdlovu Jul 15, 2023
8c797a7
removed pie chart numbers
ThabisoNdlovu Jul 15, 2023
293e8a3
Updated the recommender app and added pics
Sandile98 Jul 15, 2023
e736122
Merge pull request #1 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 15, 2023
397e693
Update edsa_recommender.py
Sandile98 Jul 15, 2023
7e3923a
Merge pull request #2 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 15, 2023
0a3bdda
Update edsa_recommender.py
Sandile98 Jul 16, 2023
58e6aa5
Merge pull request #3 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 16, 2023
59a6fd5
Update edsa_recommender.py
Sandile98 Jul 16, 2023
db0f30f
Merge pull request #4 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 16, 2023
88258f7
Update edsa_recommender.py
Sandile98 Jul 16, 2023
1ac3478
Merge pull request #5 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 16, 2023
8b16937
Updated notebook and logo
Sandile98 Jul 18, 2023
8565527
Merge pull request #6 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 18, 2023
1cd3c57
Update edsa_recommender.py
Sandile98 Jul 19, 2023
b663200
Merge pull request #7 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 19, 2023
fd985ac
Update edsa_recommender.py
Sandile98 Jul 19, 2023
3a8bb48
Merge pull request #8 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 19, 2023
99f9da0
Update edsa_recommender.py
Sandile98 Jul 19, 2023
8059a84
Merge pull request #9 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 19, 2023
392dbef
Update edsa_recommender.py
Sandile98 Jul 19, 2023
20aa26b
Merge pull request #10 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 19, 2023
3d4297f
Update edsa_recommender.py
Sandile98 Jul 19, 2023
e2a3b9d
Merge pull request #11 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 19, 2023
c1d5db9
Update edsa_recommender.py
Sandile98 Jul 19, 2023
f722bcf
Update edsa_recommender.py
Sandile98 Jul 19, 2023
5472c91
Update edsa_recommender.py
Sandile98 Jul 19, 2023
b429bf8
Update edsa_recommender.py
Sandile98 Jul 19, 2023
3bef356
Merge pull request #12 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 19, 2023
dae3b55
updates
Sandile98 Jul 20, 2023
16b5284
Merge pull request #13 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
e6a5866
Update edsa_recommender.py
Sandile98 Jul 20, 2023
af9ff37
Merge pull request #14 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
2f7de45
Update edsa_recommender.py
Sandile98 Jul 20, 2023
b39420b
Merge pull request #15 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
e28c066
Update edsa_recommender.py
Sandile98 Jul 20, 2023
c1d54fb
Merge pull request #16 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
2cbb462
Update edsa_recommender.py
Sandile98 Jul 20, 2023
666f515
Merge pull request #17 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
d518aab
Update collaborative_based.py
Sandile98 Jul 20, 2023
a4e5227
Merge pull request #18 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
50909ca
Update edsa_recommender.py
Sandile98 Jul 20, 2023
adc48a3
Merge pull request #19 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
155b04f
Update edsa_recommender.py
Sandile98 Jul 20, 2023
7212362
Merge pull request #20 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
65c907b
Update collaborative_based.py
Sandile98 Jul 20, 2023
3d56abd
Merge pull request #21 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
9e283ef
Update collaborative_based.py
Sandile98 Jul 20, 2023
498eb6d
Merge pull request #22 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
d38b035
Update collaborative_based.py
Sandile98 Jul 20, 2023
4e7cc69
Merge pull request #23 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
0c594ff
Update ratings_2.csv
Sandile98 Jul 20, 2023
ee3fa55
Merge pull request #24 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
8f7fe03
Update collaborative_based.py
Sandile98 Jul 20, 2023
29f1bc5
Merge pull request #25 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
f8025c5
Update edsa_recommender.py
Sandile98 Jul 20, 2023
c70bdec
Merge pull request #26 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
4b2547b
Update ratings_2.csv
Sandile98 Jul 20, 2023
c8758a4
Merge pull request #27 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
8f66f07
Updates on models and collab
Sandile98 Jul 20, 2023
93c102d
Merge pull request #28 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
515746f
Update edsa_recommender.py
Sandile98 Jul 20, 2023
c8896b9
Merge pull request #29 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 20, 2023
d89e76f
Updates on app and data added
Sandile98 Jul 21, 2023
b02cada
Merge pull request #30 from GM2-UNSUPERVISED-LEARNING/Sandile
Sandile98 Jul 21, 2023
a376a7e
movie insight update
ThabisoNdlovu Jul 22, 2023
a4539ad
Merge pull request #31 from GM2-UNSUPERVISED-LEARNING/Thabiso
ThabisoNdlovu Jul 22, 2023
b6dd7e8
Added the second bar chat.
Sandile98 Jul 22, 2023
bf09f8a
Merge branch 'master' into Sandile
Sandile98 Jul 22, 2023
13248fa
Update edsa_recommender.py
Sandile98 Jul 22, 2023
b1c27b5
Update collaborative_based.py
Sandile98 Jul 22, 2023
58489b5
Update edsa_recommender.py
Sandile98 Jul 22, 2023
749bfe8
Update edsa_recommender.py
Sandile98 Jul 23, 2023
dc7fd0d
Update collaborative_based.py
Sandile98 Jul 23, 2023
895bf9d
Update collaborative_based.py
Sandile98 Jul 23, 2023
aadff64
Update edsa_recommender.py
Sandile98 Jul 23, 2023
ce5e5b3
Update edsa_recommender.py
Sandile98 Jul 23, 2023
a2fcd0d
Update collaborative_based.py
Sandile98 Jul 23, 2023
b691e05
Update edsa_recommender.py
Sandile98 Jul 23, 2023
3b88dd1
Update edsa_recommender.py
Sandile98 Jul 24, 2023
5069d64
Update edsa_recommender.py
Sandile98 Jul 24, 2023
1571b78
Update edsa_recommender.py
Sandile98 Jul 24, 2023
afa0c3b
Update edsa_recommender.py
Sandile98 Jul 24, 2023
18fdf4b
Updates on content-base and recommender
Sandile98 Jul 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
430 changes: 424 additions & 6 deletions edsa_recommender.py

Large diffs are not rendered by default.

122 changes: 90 additions & 32 deletions recommenders/collaborative_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@

# Importing data
movies_df = pd.read_csv('resources/data/movies.csv',sep = ',')
ratings_df = pd.read_csv('resources/data/ratings.csv')
ratings_df = pd.read_csv('~/unsupervised_data/edsa-movie-recommendation-predict/train.csv')
movie_df = pd.read_csv('resources/data/movies.csv')
ratings_df.drop(['timestamp'], axis=1,inplace=True)

# We make use of an SVD model trained on a subset of the MovieLens 10k dataset.
model=pickle.load(open('resources/models/SVD.pkl', 'rb'))
model=pickle.load(open('resources/models/svd_model_5.pkl', 'rb'))

def prediction_item(item_id):
"""Map a given favourite movie to users within the
Expand Down Expand Up @@ -97,18 +98,25 @@ def pred_movies(movie_list):
id_store.append(pred.uid)
# Return a list of user id's
return id_store
#----------------------------------------------------------------------------------#
# Cache for storing similarity matrices for different movie_ids
similarity_cache = {}

# !! DO NOT CHANGE THIS FUNCTION SIGNATURE !!
# You are, however, encouraged to change its content.
def collab_model(movie_list,top_n=10):
def compute_movie_similarity(movie_ids, ratings_df):
"""Compute the cosine similarity matrix for the given movie_ids."""
df_init_users = pd.concat([ratings_df[ratings_df['userId'] == movie_id] for movie_id in movie_ids])
cosine_sim = cosine_similarity(df_init_users, df_init_users)
return cosine_sim

def collab_model(movie_list, top_n=10):
"""Performs Collaborative filtering based upon a list of movies supplied
by the app user.

Parameters
----------
movie_list : list (str)
Favorite movies chosen by the app user.
top_n : type
top_n : int, optional
Number of top recommendations to return to the user.

Returns
Expand All @@ -117,32 +125,82 @@ def collab_model(movie_list,top_n=10):
Titles of the top-n movie recommendations to the user.

"""
global similarity_cache

indices = pd.Series(movies_df['title'])
indices = pd.Series(movie_df['title'])
movie_ids = pred_movies(movie_list)
df_init_users = ratings_df[ratings_df['userId']==movie_ids[0]]
for i in movie_ids :
df_init_users=df_init_users.append(ratings_df[ratings_df['userId']==i])
# Getting the cosine similarity matrix
cosine_sim = cosine_similarity(np.array(df_init_users), np.array(df_init_users))
idx_1 = indices[indices == movie_list[0]].index[0]
idx_2 = indices[indices == movie_list[1]].index[0]
idx_3 = indices[indices == movie_list[2]].index[0]
# Creating a Series with the similarity scores in descending order
rank_1 = cosine_sim[idx_1]
rank_2 = cosine_sim[idx_2]
rank_3 = cosine_sim[idx_3]
# Calculating the scores
score_series_1 = pd.Series(rank_1).sort_values(ascending = False)
score_series_2 = pd.Series(rank_2).sort_values(ascending = False)
score_series_3 = pd.Series(rank_3).sort_values(ascending = False)
# Appending the names of movies
listings = score_series_1.append(score_series_1).append(score_series_3).sort_values(ascending = False)
recommended_movies = []
# Choose top 50
top_50_indexes = list(listings.iloc[1:50].index)
# Removing chosen movies
top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3])
for i in top_indexes[:top_n]:
recommended_movies.append(list(movies_df['title'])[i])

# Check if similarity matrix already computed and cached
cache_key = tuple(movie_ids)
if cache_key in similarity_cache:
cosine_sim = similarity_cache[cache_key]
else:
cosine_sim = compute_movie_similarity(movie_ids, ratings_df)
# Cache the similarity matrix for future use
similarity_cache[cache_key] = cosine_sim

# Get the indices of the selected movies in the movie DataFrame
movie_indexes = [indices[indices == movie].index[0] for movie in movie_list]

# Compute the similarity scores for each selected movie
score_series = pd.Series(cosine_sim[movie_indexes].sum(axis=0) / len(movie_indexes))

# Sort the similarity scores in descending order
score_series = score_series.sort_values(ascending=False)

# Choose top_n movies as recommendations
top_recommendations = score_series.iloc[1:top_n + 1]
recommended_movies = list(indices[top_recommendations.index])

return recommended_movies

#----------------------------------------------------------------------------------#

# !! DO NOT CHANGE THIS FUNCTION SIGNATURE !!
# You are, however, encouraged to change its content.
# def collab_model(movie_list,top_n=10):
# """Performs Collaborative filtering based upon a list of movies supplied
# by the app user.

# Parameters
# ----------
# movie_list : list (str)
# Favorite movies chosen by the app user.
# top_n : type
# Number of top recommendations to return to the user.

# Returns
# -------
# list (str)
# Titles of the top-n movie recommendations to the user.

# """

# indices = pd.Series(movies_df['title'])
# movie_ids = pred_movies(movie_list)
# df_init_users = ratings_df[ratings_df['userId']==movie_ids[0]]
# for i in movie_ids :
# df_init_users=df_init_users.append(ratings_df[ratings_df['userId']==i])
# # Getting the cosine similarity matrix
# cosine_sim = cosine_similarity(np.array(df_init_users), np.array(df_init_users))
# idx_1 = indices[indices == movie_list[0]].index[0]
# idx_2 = indices[indices == movie_list[1]].index[0]
# idx_3 = indices[indices == movie_list[2]].index[0]
# # Creating a Series with the similarity scores in descending order
# rank_1 = cosine_sim[idx_1]
# rank_2 = cosine_sim[idx_2]
# rank_3 = cosine_sim[idx_3]
# # Calculating the scores
# score_series_1 = pd.Series(rank_1).sort_values(ascending = False)
# score_series_2 = pd.Series(rank_2).sort_values(ascending = False)
# score_series_3 = pd.Series(rank_3).sort_values(ascending = False)
# # Appending the names of movies
# listings = score_series_1.append(score_series_1).append(score_series_3).sort_values(ascending = False)
# recommended_movies = []
# # Choose top 50
# top_50_indexes = list(listings.iloc[1:50].index)
# # Removing chosen movies
# top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3])
# for i in top_indexes[:top_n]:
# recommended_movies.append(list(movies_df['title'])[i])
# return recommended_movies
2 changes: 1 addition & 1 deletion recommenders/content_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

# Importing data
movies = pd.read_csv('resources/data/movies.csv', sep = ',')
ratings = pd.read_csv('resources/data/ratings.csv')
ratings = pd.read_csv('~/unsupervised_data/edsa-movie-recommendation-predict/train.csv')
movies.dropna(inplace=True)

def data_preprocessing(subset_size):
Expand Down
Loading