diff --git a/edsa_recommender.py b/edsa_recommender.py index f1192112..17bbe323 100644 --- a/edsa_recommender.py +++ b/edsa_recommender.py @@ -2,7 +2,7 @@ Streamlit webserver-based Recommender Engine. - Author: Explore Data Science Academy. + Author: EDSA2301_JM3. Note: --------------------------------------------------------------------- @@ -27,6 +27,13 @@ """ # Streamlit dependencies import streamlit as st +import streamlit as st +#import streamlit_option_menu +import streamlit_option_menu +import joblib,os +import csv +from PIL import Image, ImageDraw + # Data handling dependencies import pandas as pd @@ -45,7 +52,7 @@ def main(): # DO NOT REMOVE the 'Recommender System' option below, however, # you are welcome to add more options to enrich your app. - page_options = ["Recommender System","Solution Overview"] + page_options = ["Recommender System","Solution Overview","Register Here","Who We Are","Contact Us","Feedback","Privacy and Security"] # ------------------------------------------------------------------- # ----------- !! THIS CODE MUST NOT BE ALTERED !! ------------------- @@ -102,8 +109,96 @@ def main(): # ------------- SAFE FOR ALTERING/EXTENSION ------------------- if page_selection == "Solution Overview": st.title("Solution Overview") - st.write("Describe your winning approach on this page") - + + st.write("Prime Solutions is designed to revolutionize the way you discover and enjoy movies. We understand that choosing the perfect movie can be a daunting task, given the overwhelming number of options available. That's why we have crafted a cutting-edge solution that takes the guesswork out of movie selection and provides you with personalized recommendations that cater to your individual taste") + + tab1, tab2, = st.tabs (["Features", "How It Works"]) + with tab1: + st.markdown("Personalized Movie Recommendations: Discover new movies that align perfectly with your taste, based on your top three movie choices") + + + st.markdown("Extensive Movie Database: Our app has an extensive library of movies, ranging from timeless classics to the latest blockbusters, catering to every genre and taste.") + + st.markdown("User-Friendly Interface: Enjoy a seamless and intuitive user experience, making it effortless to find your ideal movie suggestions.") + + st.markdown("Rate and Review: Share your thoughts and feelings about the movies you've watched and help other users in the community find their next favorite film.") + + with tab2: + st.markdown(" How To Use The App") + image1 = Image.open("resources/imgs/Picture2.png") + st.image(image1) + + if page_selection == "Register Here": + st.title("Register Here") + with st.form("form1", clear_on_submit= True): + name= st.text_input("Enter Full Name") + name= st.text_input("Username") + name= st.text_input("Enter Email") + name= st.text_input("Enter Your Password") + name= st.text_input("Country Of Origin") + name= st.text_input("Favourite Movie Genre") + + submit= st.form_submit_button("Register") + + if submit: + st.write("Your details have been submitted successfully") + + if page_selection == "Who We Are": + st.title("Who We Are") + + tab1, tab2, tab3, = st.tabs(["About Us", "Meet The Team", "Contact details"]) + with tab1: + st.markdown("Prime Solutions is a cutting-edge tech company dedicated to revolutionizing the movie-watching experience through personalized recommendations.") + st.markdown("Mission:") + st.markdown("To empower movie enthusiasts worldwide by providing a seamless and personalized movie recommendation platform. By leveraging cutting-edge technology and an extensive movie database, we aim to simplify the process of discovering new films that resonate with each individual's unique tastes.") + image2 = Image.open("resources/imgs/logo.jpg") + st.image(image2) + with tab2: + st.markdown("Prime Solutions Team:") + + st.markdown("Isaac Sihlangu: CEO") + st.markdown("Kobus Leach: COO") + st.markdown("David Molefe: IMPLEMANTATION SPECIALIST") + st.markdown("Masindi Phionah: DATA SCIENTIST") + st.markdown("Seshwene Makhura: DATA ANALYST") + st.markdown("Xichavo Ngobeni: DATA ENGINEER") + st.markdown("Nthabiseng Madiba: SOFTWARE DEVELOPER") + + with tab3: + st.write("We value your feedback and suggestions. If you have any questions, suggestions or ideas on how we can enhance your movie-watching experience, feel free to get in touch with us. We would love to hear from you.") + st.markdown("Email Address: prime@solutions.org") + st.markdown("Telephone: 011 345 0000") + st.markdown("Website: www.primesolutions.com") + + if page_selection == "Contact Us": + st.title("Get In Touch With Us") + + with st.form("form1", clear_on_submit= True): + name= st.text_input("Enter Full Name") + name= st.text_input("Enter Email") + name= st.text_area("Your message") + + submit= st.form_submit_button("Submit Form") + + if submit: + st.write("Your form has been submitted, We will be in touch with you") + + if page_selection == "Feedback": + st.title("Rate and Review") + movie_title = st.text_input("Did You Like The Movies We Recommended?") + rating = st.radio("Rate The App:", options=[1, 2, 3, 4, 5], format_func=lambda x: "⭐ "*x) + feedback = st.text_area("Provide feedback on the app:") + if st.button("Submit"): + # Save the user's rating and feedback to a database or file + st.success("Rating and feedback submitted!") + + if page_selection == "Privacy and Security": + st.title("Privacy and Security") + st.write("At Prime Solutions, we take your privacy and security seriously. Rest assured that your movie preferences and personal data are protected and will never be shared with any third parties without your explicit consent.") + + image3 = Image.open("resources/imgs/privacy.jpg") + st.image(image3) + # You may want to add more sections here for aspects such as an EDA, # or to provide your business pitch. diff --git a/recommenders/collaborative_based.py b/recommenders/collaborative_based.py index 861b5d8f..2c13be39 100644 --- a/recommenders/collaborative_based.py +++ b/recommenders/collaborative_based.py @@ -30,17 +30,19 @@ # Script dependencies import pandas as pd import numpy as np +import scipy as sp import pickle import copy -from surprise import Reader, Dataset -from surprise import SVD, NormalPredictor, BaselineOnly, KNNBasic, NMF +from surprise import Reader, Dataset, SVD from sklearn.metrics.pairwise import cosine_similarity from sklearn.feature_extraction.text import CountVectorizer # Importing data -movies_df = pd.read_csv('resources/data/movies.csv',sep = ',') +#movies_df = pd.read_csv('/home/explore-student/unsupervised_data/unsupervised_movie_data/movies.csv',sep = ',',delimiter=',') +#ratings_df = pd.read_csv('/home/explore-student/unsupervised_data/unsupervised_movie_data/train.csv') +movies_df = pd.read_csv('resources/data/movies.csv') ratings_df = pd.read_csv('resources/data/ratings.csv') -ratings_df.drop(['timestamp'], axis=1,inplace=True) +ratings_df.drop(['timestamp'], axis=1, inplace=True) # We make use of an SVD model trained on a subset of the MovieLens 10k dataset. model=pickle.load(open('resources/models/SVD.pkl', 'rb')) @@ -99,7 +101,7 @@ def pred_movies(movie_list): return id_store # !! DO NOT CHANGE THIS FUNCTION SIGNATURE !! -# You are, however, encouraged to change its content. +# You are, however, encouraged to change its content. def collab_model(movie_list,top_n=10): """Performs Collaborative filtering based upon a list of movies supplied by the app user. @@ -117,32 +119,60 @@ def collab_model(movie_list,top_n=10): Titles of the top-n movie recommendations to the user. """ - - indices = pd.Series(movies_df['title']) - movie_ids = pred_movies(movie_list) - df_init_users = ratings_df[ratings_df['userId']==movie_ids[0]] - for i in movie_ids : - df_init_users=df_init_users.append(ratings_df[ratings_df['userId']==i]) - # Getting the cosine similarity matrix - cosine_sim = cosine_similarity(np.array(df_init_users), np.array(df_init_users)) + names = movies_df.copy() + names.set_index('movieId',inplace=True) + indices = pd.Series(names['title']) + users_ids = pred_movies(movie_list) + # Get movie IDs and ratings for top users + df_init_users = ratings_df[ratings_df['userId']==users_ids[0]] + for i in users_ids[1:]: + df_init_users = df_init_users.append(ratings_df[ratings_df['userId']==i]) + # Include predictions for chosen movies + for j in movie_list: + a = pd.DataFrame(prediction_item(j)) + for i in set(df_init_users['userId']): + mid = indices[indices == j].index[0] + est = a['est'][a['uid']==i].values[0] + df_init_users = df_init_users.append(pd.Series([int(i),int(mid),est], index=['userId','movieId','rating']), ignore_index=True) + # Remove duplicate entries + df_init_users.drop_duplicates(inplace=True) + #Create pivot table + util_matrix = df_init_users.pivot_table(index=['userId'], columns=['movieId'], values='rating') + # Fill Nan values with 0's and save the utility matrix in scipy's sparse matrix format + util_matrix.fillna(0, inplace=True) + util_matrix_sparse = sp.sparse.csr_matrix(util_matrix.values) + # Compute the similarity matrix using the cosine similarity metric + user_similarity = cosine_similarity(util_matrix_sparse.T) + # Save the matrix as a dataframe to allow for easier indexing + user_sim_df = pd.DataFrame(user_similarity, index = util_matrix.columns, columns = util_matrix.columns) + user_similarity = cosine_similarity(np.array(df_init_users), np.array(df_init_users)) + user_sim_df = pd.DataFrame(user_similarity, index = df_init_users['movieId'].values.astype(int), columns = df_init_users['movieId'].values.astype(int)) + # Remove duplicate rows from matrix + user_sim_df = user_sim_df.loc[~user_sim_df.index.duplicated(keep='first')] + # Transpose matrix + user_sim_df = user_sim_df.T + # Find IDs of chosen load_movie_titles idx_1 = indices[indices == movie_list[0]].index[0] idx_2 = indices[indices == movie_list[1]].index[0] idx_3 = indices[indices == movie_list[2]].index[0] # Creating a Series with the similarity scores in descending order - rank_1 = cosine_sim[idx_1] - rank_2 = cosine_sim[idx_2] - rank_3 = cosine_sim[idx_3] + rank_1 = user_sim_df[idx_1] + rank_2 = user_sim_df[idx_2] + rank_3 = user_sim_df[idx_3] # Calculating the scores score_series_1 = pd.Series(rank_1).sort_values(ascending = False) score_series_2 = pd.Series(rank_2).sort_values(ascending = False) score_series_3 = pd.Series(rank_3).sort_values(ascending = False) - # Appending the names of movies - listings = score_series_1.append(score_series_1).append(score_series_3).sort_values(ascending = False) - recommended_movies = [] + # Appending the names of movies + listings = score_series_1.append(score_series_2).append(score_series_3).sort_values(ascending = False) # Choose top 50 top_50_indexes = list(listings.iloc[1:50].index) # Removing chosen movies top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3]) + # Get titles of recommended movies + recommended_movies = [] for i in top_indexes[:top_n]: - recommended_movies.append(list(movies_df['title'])[i]) + recommended_movies.append(list(movies_df[movies_df['movieId']==i]['title'])) + # Return list of movies + recommended_movies = [val for sublist in recommended_movies for val in sublist] return recommended_movies diff --git a/resources/imgs/EDSA_logo.png b/resources/imgs/EDSA_logo.png deleted file mode 100644 index 72141516..00000000 Binary files a/resources/imgs/EDSA_logo.png and /dev/null differ diff --git a/resources/imgs/Picture2.png b/resources/imgs/Picture2.png new file mode 100644 index 00000000..c30831cf Binary files /dev/null and b/resources/imgs/Picture2.png differ diff --git a/resources/imgs/What_is_a_recommender_system.png b/resources/imgs/What_is_a_recommender_system.png deleted file mode 100644 index e0c987b8..00000000 Binary files a/resources/imgs/What_is_a_recommender_system.png and /dev/null differ diff --git a/resources/imgs/logo.jpg b/resources/imgs/logo.jpg new file mode 100644 index 00000000..83bb0b98 Binary files /dev/null and b/resources/imgs/logo.jpg differ diff --git a/resources/imgs/machine.jpg b/resources/imgs/machine.jpg new file mode 100644 index 00000000..58d32a52 Binary files /dev/null and b/resources/imgs/machine.jpg differ diff --git a/resources/imgs/privacy.jpg b/resources/imgs/privacy.jpg new file mode 100644 index 00000000..b088da66 Binary files /dev/null and b/resources/imgs/privacy.jpg differ