diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
new file mode 100644
index 00000000..57940bdb
--- /dev/null
+++ b/.github/workflows/python-package-conda.yml
@@ -0,0 +1,34 @@
+name: Python Package using Conda
+
+on: [push]
+
+jobs:
+ build-linux:
+ runs-on: ubuntu-latest
+ strategy:
+ max-parallel: 5
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python 3.10
+ uses: actions/setup-python@v3
+ with:
+ python-version: 3.10
+ - name: Add conda to system path
+ run: |
+ # $CONDA is an environment variable pointing to the root of the miniconda directory
+ echo $CONDA/bin >> $GITHUB_PATH
+ - name: Install dependencies
+ run: |
+ conda env update --file environment.yml --name base
+ - name: Lint with flake8
+ run: |
+ conda install flake8
+ # stop the build if there are Python syntax errors or undefined names
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+ - name: Test with pytest
+ run: |
+ conda install pytest
+ pytest
diff --git a/edsa_recommender.py b/edsa_recommender.py
index f1192112..68f1e56d 100644
--- a/edsa_recommender.py
+++ b/edsa_recommender.py
@@ -1,112 +1,152 @@
"""
-
Streamlit webserver-based Recommender Engine.
-
Author: Explore Data Science Academy.
-
Note:
---------------------------------------------------------------------
Please follow the instructions provided within the README.md file
located within the root of this repository for guidance on how to use
this script correctly.
-
NB: !! Do not remove/modify the code delimited by dashes !!
-
This application is intended to be partly marked in an automated manner.
Altering delimited code may result in a mark of 0.
---------------------------------------------------------------------
-
Description: This file is used to launch a minimal streamlit web
application. You are expected to extend certain aspects of this script
and its dependencies as part of your predict project.
-
For further help with the Streamlit framework, see:
-
https://docs.streamlit.io/en/latest/
-
"""
# Streamlit dependencies
import streamlit as st
+import hydralit_components as hc
# Data handling dependencies
import pandas as pd
import numpy as np
+from sympy import im
# Custom Libraries
from utils.data_loader import load_movie_titles
from recommenders.collaborative_based import collab_model
from recommenders.content_based import content_model
-
+import functions.youtube_scrapper as top_trailers
+import menu.trailers as t
+import menu.data_professionals as dreamers
+import menu.statistics as stat
+import menu.helper as h
+import menu.About as a
+import time
# Data Loading
-title_list = load_movie_titles('resources/data/movies.csv')
+title_list = load_movie_titles('https://raw.githubusercontent.com/Dream-Team-Unsupervised/Data/main/movies.csv')
+
+st.set_page_config(page_icon='resources/imgs/MovieXplorer.png', page_title= 'Movie Xplorer', layout='wide', initial_sidebar_state='auto')
+
+over_theme = {'txc_inactive': '#FFFFFF'}
+
+# specify the primary menu definition
+menu_data = [
+ {'icon': "far fa-copy", 'label':'About'},
+ {'id':'Trailers','icon':'fas fa-film','label':'Trailers'},
+ {'icon': 'far fa-chart-bar', 'label':'Statistics'}, #no tooltip message
+ {'id':'Contact Us','icon': 'fas fa-laptop', 'label':'Contact Us'},
+ {'id':'Help', 'icon': 'fas fa-question', 'label':'Help'}
+]
# App declaration
-def main():
+def main():
+ # define hydralit navbar
+ menu_id = hc.nav_bar(
+ menu_definition=menu_data,
+ override_theme=over_theme,
+
+ home_name='Home',
+ # login_name='Logout',
+ hide_streamlit_markers=False, #will show the st hamburger as well as the navbar now!
+ sticky_nav=True, #at the top or not
+ sticky_mode='pinned', #jumpy or not-jumpy, but sticky or pinned
+)
+ page_selection = f'{menu_id}'
# DO NOT REMOVE the 'Recommender System' option below, however,
# you are welcome to add more options to enrich your app.
- page_options = ["Recommender System","Solution Overview"]
+ # page_options = ["Recommender System", "About", "Trailers", "Statistics", "The Dream Team", "Help Page"]
# -------------------------------------------------------------------
# ----------- !! THIS CODE MUST NOT BE ALTERED !! -------------------
# -------------------------------------------------------------------
- page_selection = st.sidebar.selectbox("Choose Option", page_options)
- if page_selection == "Recommender System":
+ # page_selection = st.sidebar.selectbox("Choose Option", page_options)
+
+ if page_selection == 'Home':
# Header contents
- st.write('# Movie Recommender Engine')
- st.write('### EXPLORE Data Science Academy Unsupervised Predict')
- st.image('resources/imgs/Image_header.png',use_column_width=True)
+ st.write('# Movie Xplorer')
+ # st.write('### EXPLORE Data Science Academy Unsupervised Predict')
+ st.image('resources/imgs/Header2L.gif',use_column_width=True)
# Recommender System algorithm selection
- sys = st.radio("Select an algorithm",
- ('Content Based Filtering',
- 'Collaborative Based Filtering'))
+ st.write('', unsafe_allow_html=True)
+ st.write('', unsafe_allow_html=True)
+ sys = st.radio("", ('Content Based Filtering', 'Collaborative Based Filtering'))
# User-based preferences
- st.write('### Enter Your Three Favorite Movies')
- movie_1 = st.selectbox('Fisrt Option',title_list[14930:15200])
- movie_2 = st.selectbox('Second Option',title_list[25055:25255])
- movie_3 = st.selectbox('Third Option',title_list[21100:21200])
+ st.write('### Select Your Three Favorite Movies')
+ movie_1 = st.selectbox('1ˢᵗ Movie',title_list[14930:15200])
+ movie_2 = st.selectbox('2ⁿᵈ Movie',title_list[25055:25255])
+ movie_3 = st.selectbox('3ʳᵈ Movie',title_list[21100:21200])
fav_movies = [movie_1,movie_2,movie_3]
# Perform top-10 movie recommendation generation
if sys == 'Content Based Filtering':
- if st.button("Recommend"):
+ if st.button('Recommend'):
try:
- with st.spinner('Crunching the numbers...'):
- top_recommendations = content_model(movie_list=fav_movies,
- top_n=10)
- st.title("We think you'll like:")
+ # intialize hydralit loaders
+ with hc.HyLoader('We\'re getting movies only you will love...\n',hc.Loaders.standard_loaders,index=[5,0,3]):
+ # get top 10 recommended movies using the content_model algorithm
+ top_recommendations = content_model(movie_list=fav_movies, top_n=10)
+ time.sleep(5)
+ st.title('Only you will love these movies...')
for i,j in enumerate(top_recommendations):
st.subheader(str(i+1)+'. '+j)
+ # get trailer from youtube
+ top_trailers.youtubeScrapper(top_recommendations[i])
except:
st.error("Oops! Looks like this algorithm does't work.\
We'll need to fix it!")
-
if sys == 'Collaborative Based Filtering':
- if st.button("Recommend"):
+ if st.button('Recommend'):
try:
- with st.spinner('Crunching the numbers...'):
- top_recommendations = collab_model(movie_list=fav_movies,
- top_n=10)
- st.title("We think you'll like:")
+ # intialize hydralit loaders
+ with hc.HyLoader('We\'re getting movies only you will love...\n',hc.Loaders.standard_loaders,index=[5,0,3]):
+ # get top 10 recommended movies using the collab_model algorithm
+ top_recommendations = collab_model(movie_list=fav_movies, top_n=10)
+ time.sleep(5)
+ st.title('Only you will love these movies...')
for i,j in enumerate(top_recommendations):
st.subheader(str(i+1)+'. '+j)
+ # get trailer from youtube
+ top_trailers.youtubeScrapper(top_recommendations[i])
except:
st.error("Oops! Looks like this algorithm does't work.\
We'll need to fix it!")
-
-
# -------------------------------------------------------------------
- # ------------- SAFE FOR ALTERING/EXTENSION -------------------
- if page_selection == "Solution Overview":
- st.title("Solution Overview")
- st.write("Describe your winning approach on this page")
-
+ # ------------- SAFE FOR ALTERING/EXTENSION -------------------------
+ elif page_selection == 'About':
+ # navigate to the About page
+ a.about()
+ elif page_selection == 'Trailers':
+ # navigate to the Trailers page
+ t.vids()
+ elif page_selection == 'Contact Us':
+ # navigate to the Contact Us page
+ dreamers.data_professionals()
+ elif page_selection == 'Statistics':
+ # navigate to the Statistics page
+ stat.visuals()
+ elif page_selection == 'Help':
+ # navigate to the Help page
+ h.helppage()
# You may want to add more sections here for aspects such as an EDA,
# or to provide your business pitch.
-
if __name__ == '__main__':
main()
diff --git a/functions/CRUD/CREATE.py b/functions/CRUD/CREATE.py
new file mode 100644
index 00000000..3c5403ad
--- /dev/null
+++ b/functions/CRUD/CREATE.py
@@ -0,0 +1,7 @@
+import csv
+
+def insert(post):
+ with open('./resources/data/comments.csv', 'a', newline='', encoding='UTF8') as c:
+ writer = csv.writer(c)
+ # write the data
+ writer.writerow(post)
\ No newline at end of file
diff --git a/functions/CRUD/READ.py b/functions/CRUD/READ.py
new file mode 100644
index 00000000..132e4e4d
--- /dev/null
+++ b/functions/CRUD/READ.py
@@ -0,0 +1,16 @@
+import csv
+import streamlit as st
+
+COMMENT_TEMPLATE_MD = """{} - {}
+> {}"""
+
+def select():
+ # opening the CSV file
+ with open('./resources/data/comments.csv', mode='r') as c:
+ # reading the CSV file
+ csvFile = csv.reader(c)
+ # displaying the contents of the CSV file
+ next(csvFile)
+ for lines in csvFile:
+ st.markdown(COMMENT_TEMPLATE_MD.format(lines[0], lines[2], lines[1]))
+
\ No newline at end of file
diff --git a/functions/CRUD/UPDATE.py b/functions/CRUD/UPDATE.py
new file mode 100644
index 00000000..3c5403ad
--- /dev/null
+++ b/functions/CRUD/UPDATE.py
@@ -0,0 +1,7 @@
+import csv
+
+def insert(post):
+ with open('./resources/data/comments.csv', 'a', newline='', encoding='UTF8') as c:
+ writer = csv.writer(c)
+ # write the data
+ writer.writerow(post)
\ No newline at end of file
diff --git a/functions/comments.py b/functions/comments.py
new file mode 100644
index 00000000..8448aa9e
--- /dev/null
+++ b/functions/comments.py
@@ -0,0 +1,21 @@
+from datetime import datetime
+import streamlit as st
+import functions.CRUD.CREATE as create
+import functions.CRUD.READ as read
+
+def commenter(yr):
+
+ read.select()
+
+ form = st.form(yr + 'comments')
+ name = form.text_input('Name')
+ comment = form.text_area('Comment')
+ submit = form.form_submit_button('Share comment')
+
+ if submit:
+ date = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+ par = [name, comment, date]
+ create.insert(par)
+ if "just_posted" not in st.session_state:
+ st.session_state["just_posted"] = True
+ st.experimental_rerun()
\ No newline at end of file
diff --git a/functions/youtube_scrapper.py b/functions/youtube_scrapper.py
new file mode 100644
index 00000000..556e932f
--- /dev/null
+++ b/functions/youtube_scrapper.py
@@ -0,0 +1,26 @@
+import urllib.request
+import requests
+import unicodedata
+import re
+from streamlit_player import st_player
+# import os
+# import streamlit as st
+# import googleapiclient.discovery
+
+def youtubeScrapper(top_10):
+ search_string = unicodedata.normalize('NFKD', top_10).encode('ascii', 'ignore').decode()
+ youtube_str = re.sub("[ ]", "+", search_string)
+ html = urllib.request.urlopen('https://www.youtube.com/results?search_query=' + youtube_str + '+trailer')
+ vid_id = re.findall(r'watch\?v=(\S{11})', html.read().decode())
+
+ # Below we verify that the video contains nudity or not
+ # os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
+ # api_service_name = "youtube"
+ # api_version = "v3"
+ # youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey = 'AIzaSyCLfeDKMIRoTZ9JsDGALP3gbNnTb-3DLTQ')
+ # request = youtube.videos().getRating(id=str(vid_id[0]))
+ # response = request.execute()
+ # st.write(str(response))
+
+ trailer_res = 'https://www.youtube.com/watch?v=' + vid_id[0]
+ st_player(trailer_res)
\ No newline at end of file
diff --git a/menu/About.py b/menu/About.py
new file mode 100644
index 00000000..0681bafd
--- /dev/null
+++ b/menu/About.py
@@ -0,0 +1,26 @@
+import streamlit as st
+
+def about():
+ #about page title
+ st.title("Trends Analytics")
+ # company behind the app infomation
+ about_movieXplorer, movieXplorer_logo, = st.columns([2, 3])
+
+ with about_movieXplorer:
+ st.write("**About Trends Analytics**.")
+ st.info("Founded in 2022, Trends Analytics is a data driven company which provides data and business solutions. " +
+ "We use Artificial Intelligence (AI) and Advanced Analytics to enable more personalised " +
+ "solutions for your business needs. We are passionate about Artificial Intelligence and its impact in the " +
+ "future of our society. We help optimise operations, superior customer experiences and innovative business models. " +
+ "We have a team of certified Data Professionals, which consists of Data Scientists, Engineers, Analysts and Project Managers.")
+ st.markdown("")
+ #app information
+ st.write("**About Movie Xplorer**.")
+ st.info("Movie Xplorer is a movie recommendation application which uses collaborative and content-based algorithms to recommend " +
+ "movies to the user. The dataset is obtained from MovieLens and is maintained by the GroupLens research group in the " +
+ "Department of Computer Science and Engineering at the University of Minnesota. Additional movie content data was legally " +
+ "scraped from IMDB. The movie recommendations are catered to each user\'s needs and the user can comment about the movies they" +
+ " have watched. The video content is legally scraped from YouTube.")
+ st.markdown("")
+ with movieXplorer_logo:
+ st.image('resources/imgs/MovieXplorer.png', caption="© The Dream Team")
\ No newline at end of file
diff --git a/menu/data_professionals.py b/menu/data_professionals.py
new file mode 100644
index 00000000..014b539b
--- /dev/null
+++ b/menu/data_professionals.py
@@ -0,0 +1,92 @@
+# streamlit dependencies
+import streamlit as st
+# data dependencies
+import pandas as pd
+import numpy as np
+import base64
+from PIL import Image
+
+def data_professionals():
+ st.info('Explained, Gathered, Analyzed & Unsupervised by The Dream Team')
+
+ contact_form = """
+
+ """
+
+ # define Github links for each team member
+ link1 = "https://github.com/ThulaniNyama"
+ link2 = "https://github.com/alnaschutte"
+ link3 = "https://github.com/SiyandaMadlopha"
+ link4 = "https://github.com/Shoki2"
+ link5 = "https://github.com/ElelwaniTshikovhi"
+ link6 = "https://github.com/SoulR95"
+ # dream_team = Image.open('./resources/imgs/dream_works.gif')
+ dream_works = open('./resources/imgs/dream_works.gif', 'rb')
+ dream_team = dream_works.read()
+ data_url = base64.b64encode(dream_team).decode("utf-8")
+ dream_works.close()
+
+ # define Pandas data frame with team members that developed the models, and the app
+ df = pd.DataFrame(
+ {
+ "The Dream Team": [
+ f'Thulani Nyama',
+ f'Alna Scutte',
+ f'Siyanda Mandlopha',
+ f'Reshoketswe Makgamatha',
+ f'Elelwani Tshikovhi',
+ f'Riaan James-Verwey'
+ ],
+ "Profession": ["Data Scientist", "Data Analyst", "Data Scientist", "Data Analyst", "Data Scientist", "Data Engineer"]
+ }
+
+ )
+
+ team, members, contact, = st.columns([2, 1.5, 1.5])
+
+ with contact:
+ st.header(":mailbox: Get in touch with us!")
+ st.markdown(contact_form, unsafe_allow_html=True)
+ local_css("./utils/style.css")
+ with team:
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write(df.to_html(escape=False, index=False), unsafe_allow_html=True)
+ with members:
+ st.markdown(f'
',unsafe_allow_html=True)
+
+ st.write("")
+ with st.expander("We are based at"):
+ address, dream_map = st.columns([2, 3.5])
+ with address:
+ st.write('', unsafe_allow_html=True)
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write("")
+ st.write("")
+ st.markdown("2ⁿᵈ floor")
+ st.markdown("420 Milton Street")
+ st.markdown("Marshaltown")
+ st.markdown("Johannesburg 2000")
+ st.markdown("Tel no: 011 668 4397(moviexp)")
+ with dream_map:
+ df = pd.DataFrame(
+ np.random.randn(1, 2) / [50, 50] + [-26.204103, 28.047305],
+ columns=['lat', 'lon'])
+ st.map(df)
+
+def local_css(file_name):
+ with open(file_name) as f:
+ st.markdown(f"", unsafe_allow_html=True)
\ No newline at end of file
diff --git a/menu/helper.py b/menu/helper.py
new file mode 100644
index 00000000..2ec6df3f
--- /dev/null
+++ b/menu/helper.py
@@ -0,0 +1,122 @@
+#Help page info will come here
+import streamlit as st
+
+def helppage():
+ #Create the title and intro
+ st.title("Need Help?")
+ st.write("**Welcome** Xplorer, Not sure what to do or where to be? We have the support you need.")
+
+ with st.expander("Home"):
+
+
+ #Create the help section for home page.
+ st.title("Home")
+
+ #Add the recoomend video.
+ st.video('https://youtu.be/hebG9vo5D0E')
+
+
+ #create the Step by step guide for the Home page.
+ st.title("Steps")
+ st.write("- Choose Between Content based or Collaborative based filtering.")
+
+ st.info(
+ """
+ - Content based is where we see what a user may like based on keywords/movies.
+
+ - Collaborative based filtering is where we see what a user may like based on other users likes.
+
+ """
+ )
+
+ st.write(
+ """
+ - Select 1st, 2nd & 3rd favourite movie.
+ - Press the Recommend button.
+ - Enjoy the selection of recommended films and their trailers.
+ """
+ )
+
+ #create the About help section.
+ with st.expander("About"):
+ st.title("About")
+
+ #Add About help video
+ st.video('https://youtu.be/5q-vSzaKFik')
+
+ #create the Step by step guide for the About Page.
+ st.title("Steps")
+ st.write(
+ """
+ - Find out about the Company Trends Analytics.
+ - Find out more about the updates for the app.
+ - More info coming soon.
+ """
+ )
+
+ #create help section for trailers page.
+ with st.expander("Trailers"):
+ st.title("Trailers")
+
+ #Add trailer help video.
+ st.video('https://youtu.be/AYgxUezGG1A')
+
+ #create the Step by step guide for the Trailers Page.
+ st.title("Steps")
+ st.write(
+ """
+ - Move the slider to select years.
+ - Choose which years drop down to select and click it.
+ - Play the vid.
+ - Feel free to read the comments or leave a comment.
+ - To leave comment type name in the name box.
+ - Leave your comment in comment section.
+ - Press the share comment button to upload your comment.
+ """
+ )
+
+ #create the About help section.
+ with st.expander("Statistics"):
+ st.title("Statistics")
+
+ #Add Stats help video
+ st.video('https://youtu.be/Z8bAAdsZMOo')
+
+ #create the Step by step guide for the About Page.
+ st.title("Steps")
+ st.write(
+ """
+ - Click the drop down.
+ - Select your options/metrics.
+ - After selecting your 2nd metric your chart or plot should pop up.
+ - To select a different chart clear one of the other metrics.
+ - Feel free to check out the valuable data insights we have for the movie industry.
+ """
+ )
+
+ #create the Contact Us help section.
+ with st.expander("Contact Us"):
+ st.title("Contact Us")
+
+ #Add Contact help video
+ st.video('https://youtu.be/aKhkhLJyxCM')
+
+ #create the Step by step guide for the Contact Us Page.
+ st.title("Steps")
+ st.write(
+ """
+ As you can see Contact Us is pretty simple, but should you require any assistance feel free to reach out.
+
+ - Add your name.
+ - Add your email.
+ - Leave your message or any concerns.
+ - Or if you feel like reaching out telephonically use the drop down.
+ - To find where we are based use the drop down as well.
+ - Trends Analytics strife to make the world a better place Data at a time.
+
+ """
+ )
+
+
+
+
diff --git a/menu/statistics.py b/menu/statistics.py
new file mode 100644
index 00000000..f558e4d4
--- /dev/null
+++ b/menu/statistics.py
@@ -0,0 +1,86 @@
+# visuals will come here
+from sqlalchemy import true
+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import base64
+
+# load data
+movies = pd.read_csv('https://raw.githubusercontent.com/Dream-Team-Unsupervised/Data/main/movies.csv')
+imdb = pd.read_csv('https://raw.githubusercontent.com/Dream-Team-Unsupervised/Data/main/imdb_data.csv')
+imdb_data_budget = imdb['runtime'].to_list()
+ratings = pd.read_csv('resources/data/ratings.csv')
+movies = movies.dropna()
+@st.cache(allow_output_mutation=True)
+
+def get_base64_of_bin_file(bin_file):
+ with open(bin_file, 'rb') as f:
+ data = f.read()
+ return base64.b64encode(data).decode()
+
+def set_png_as_page_bg(png_file):
+ bin_str = get_base64_of_bin_file(png_file)
+ page_bg_img = '''
+
+ ''' % bin_str
+ st.markdown(page_bg_img, unsafe_allow_html=True)
+ return
+
+def visuals():
+
+ st.subheader('Xplore the Statistics')
+ st.write("**Hi Xplorer**, you can explore some interesting stats about the movies.")
+
+ with st.expander("Statistics Xplorer"):
+ st.write("### Moivie KPIs 1900-2022")
+ kpi2, kpi1, kpi3 = st.columns(3)
+ my_dynamic_value = 3716.5
+
+ new_val = 222
+
+ final_val = my_dynamic_value / new_val
+
+ kpi1.metric(label = "Avg released movies",
+ value = 3716.5,
+ delta = 1411)
+
+ kpi3.metric(label = "Avg rating",
+ value = 3.54 )
+ st.info('Select 2 or more metrics to plot 1 or more movie statistics')
+ options = st.multiselect('', ['Ratings', 'Movies', 'Ratings Frequency','Distribution', 'Genres', 'Top Rated'])
+ if 'Movies' in options and 'Distribution' in options:
+
+ movies['year'] = [x[-1].strip('()') for x in movies.title.str.split(" ")]
+
+ num_pattern = r'^$|[a-zA-Z]|Τσιτσάνης|101次求婚|2006–2007|выбывание|پدر|Начальник|Джа|Девочки|первого'
+ movies["year"] = movies["year"].replace(to_replace = num_pattern, value = np.nan, regex = True)
+ year = [int(x) for x in movies["year"].dropna()]
+ fig = plt.figure(figsize=(9,3))
+ sns.histplot(year, kde = True,color = '#FF4B4B')
+ plt.xlabel('Year')
+ plt.xlim(left=1900, right = 2022)
+ plt.title('Movie Release Year Distribution', fontweight = 'bold')
+ st.pyplot(fig)
+ st.info(f'Our algorithms recommend from few 90s movie classics and a significant number 21st century movies')
+ if 'Ratings' in options and 'Distribution' in options:
+ # plot movie ratings distribution
+ fig = plt.figure(figsize=(9,3))
+ sns.boxplot(x = "rating", data=ratings, color = '#FF4B4B')
+ plt.title('Movie Ratings Distribution', fontweight = 'bold')
+ plt.show()
+ st.pyplot(fig)
+ st.info(f'Average rating distribution is {round(np.mean(ratings["rating"]),2)} with 75% of the ratings greater than 3.')
+ if 'Distribution' in options and 'Ratings Frequency' in options:
+ st.image('./resources/imgs/Ratings.png')
+ st.info("We can see the most frequent movie rating is at 4.0 with the least at 1.5")
+ if 'Genres' in options and 'Top Rated' in options:
+ st.image('./resources/imgs/genres.png')
+ st.info("We can see the most popular movie genres rated and drama is dramatically the most popular with 16.2%")
+
\ No newline at end of file
diff --git a/menu/trailers.py b/menu/trailers.py
new file mode 100644
index 00000000..3939c35a
--- /dev/null
+++ b/menu/trailers.py
@@ -0,0 +1,35 @@
+import streamlit as st
+from streamlit_player import st_player
+import streamlit as st
+import functions.comments as coms
+
+def vids():
+ year = st.slider('Select Release Year Period', 2022, 2018, 2022)
+ st.write('You selected movies released between', str(2018), 'and', str(year))
+ # embed a youtube video
+ if year == 2022:
+ with st.expander('Top 10 Best Movies 2022'):
+ st_player('https://youtu.be/ZTv5lBU6qQ0')
+ st.write('**Which movies did you like?**')
+ coms.commenter('Top 10 Best Movies 2022')
+ if year >= 2021:
+ with st.expander('Top 10 Best Movies 2021'):
+ # 2021
+ st.write('**Which movies did you like?**')
+ st_player('https://youtu.be/QKN-YwYwI_I')
+ coms.commenter('Top 10 Best Movies 2021')
+ if year >= 2020:
+ with st.expander('Top 10 Best Movies 2020'):
+ # 2020
+ st_player('https://youtu.be/rGfbhugP1NI')
+ coms.commenter('Top 10 Best Movies 2020')
+ if year >= 2019:
+ with st.expander('Top 10 Best Movies 2019'):
+ # 2019
+ st_player('https://youtu.be/48NL3N6KMFo?t=9')
+ coms.commenter('Top 10 Best Movies 2019')
+ if year >= 2018:
+ with st.expander('Top 10 Best Movies 2018'):
+ # 2017
+ st_player('https://youtu.be/FkUtWUy77fQ?t=9')
+ coms.commenter('Top 10 Best Movies 2018')
\ No newline at end of file
diff --git a/recommenders/collaborative_based.py b/recommenders/collaborative_based.py
index 861b5d8f..0efe643a 100644
--- a/recommenders/collaborative_based.py
+++ b/recommenders/collaborative_based.py
@@ -1,5 +1,4 @@
"""
-
Collaborative-based filtering for item recommendation.
Author: Explore Data Science Academy.
@@ -24,125 +23,92 @@
Description: Provided within this file is a baseline collaborative
filtering algorithm for rating predictions on Movie data.
-
"""
-
-# Script dependencies
+ # Script dependencies
import pandas as pd
import numpy as np
-import pickle
-import copy
-from surprise import Reader, Dataset
-from surprise import SVD, NormalPredictor, BaselineOnly, KNNBasic, NMF
-from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.feature_extraction.text import CountVectorizer
+import streamlit as st
+from sklearn.neighbors import NearestNeighbors
+from scipy.sparse import csr_matrix
+
+# Suppress cell warnings for a cleaner notebook
+import warnings
+warnings.filterwarnings('ignore')
# Importing data
movies_df = pd.read_csv('resources/data/movies.csv',sep = ',')
ratings_df = pd.read_csv('resources/data/ratings.csv')
ratings_df.drop(['timestamp'], axis=1,inplace=True)
-# We make use of an SVD model trained on a subset of the MovieLens 10k dataset.
-model=pickle.load(open('resources/models/SVD.pkl', 'rb'))
-
-def prediction_item(item_id):
- """Map a given favourite movie to users within the
- MovieLens dataset with the same preference.
-
- Parameters
- ----------
- item_id : int
- A MovieLens Movie ID.
-
- Returns
- -------
- list
- User IDs of users with similar high ratings for the given movie.
-
- """
- # Data preprosessing
- reader = Reader(rating_scale=(0, 5))
- load_df = Dataset.load_from_df(ratings_df,reader)
- a_train = load_df.build_full_trainset()
+ # Below function creates a pivot table
- predictions = []
- for ui in a_train.all_users():
- predictions.append(model.predict(iid=item_id,uid=ui, verbose = False))
- return predictions
+def movie_data(movie):
+ # New pivot where each column would represent each unique userId and each row represents each unique movieId
+ movie_pivot = movie.pivot(index = 'movieId', columns = 'userId', values = 'rating')
+ # Convert NAN to zero value
+ movie_pivot.fillna(0, inplace = True)
-def pred_movies(movie_list):
- """Maps the given favourite movies selected within the app to corresponding
- users within the MovieLens dataset.
-
- Parameters
- ----------
- movie_list : list
- Three favourite movies selected by the app user.
-
- Returns
- -------
- list
- User-ID's of users with similar high ratings for each movie.
-
- """
- # Store the id of users
- id_store=[]
- # For each movie selected by a user of the app,
- # predict a corresponding user within the dataset with the highest rating
- for i in movie_list:
- predictions = prediction_item(item_id = i)
- predictions.sort(key=lambda x: x.est, reverse=True)
- # Take the top 10 user id's from each movie with highest rankings
- for pred in predictions[:10]:
- id_store.append(pred.uid)
- # Return a list of user id's
- return id_store
+ return movie_pivot
+ # Below function finds nearest neighbors and returns recommended movie list using cosine similarity between movies
+# @st.cache(show_spinner=False, suppress_st_warning=True)
# !! DO NOT CHANGE THIS FUNCTION SIGNATURE !!
-# You are, however, encouraged to change its content.
+# You are, however, encouraged to change its content.
def collab_model(movie_list,top_n=10):
- """Performs Collaborative filtering based upon a list of movies supplied
- by the app user.
-
- Parameters
- ----------
- movie_list : list (str)
- Favorite movies chosen by the app user.
- top_n : type
- Number of top recommendations to return to the user.
-
- Returns
- -------
- list (str)
- Titles of the top-n movie recommendations to the user.
-
- """
-
- indices = pd.Series(movies_df['title'])
- movie_ids = pred_movies(movie_list)
- df_init_users = ratings_df[ratings_df['userId']==movie_ids[0]]
- for i in movie_ids :
- df_init_users=df_init_users.append(ratings_df[ratings_df['userId']==i])
- # Getting the cosine similarity matrix
- cosine_sim = cosine_similarity(np.array(df_init_users), np.array(df_init_users))
- idx_1 = indices[indices == movie_list[0]].index[0]
- idx_2 = indices[indices == movie_list[1]].index[0]
- idx_3 = indices[indices == movie_list[2]].index[0]
- # Creating a Series with the similarity scores in descending order
- rank_1 = cosine_sim[idx_1]
- rank_2 = cosine_sim[idx_2]
- rank_3 = cosine_sim[idx_3]
- # Calculating the scores
- score_series_1 = pd.Series(rank_1).sort_values(ascending = False)
- score_series_2 = pd.Series(rank_2).sort_values(ascending = False)
- score_series_3 = pd.Series(rank_3).sort_values(ascending = False)
- # Appending the names of movies
- listings = score_series_1.append(score_series_1).append(score_series_3).sort_values(ascending = False)
- recommended_movies = []
- # Choose top 50
- top_50_indexes = list(listings.iloc[1:50].index)
- # Removing chosen movies
- top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3])
- for i in top_indexes[:top_n]:
- recommended_movies.append(list(movies_df['title'])[i])
- return recommended_movies
+ # Use function to merge dataframse and select subset based on highest coutn of movie ratings
+ movie = movies_df.merge(ratings_df, how = 'left', on='movieId')
+ # Convert df to a pivot table and replace NAN value with zero
+ movie_pivot = movie_data(movie)
+ # Reduce sparsity to assist with computation time on large dataset
+ csr_item = csr_matrix(movie_pivot.values)
+ movie_pivot.reset_index(inplace=True)
+ # Initiate KNN model using NearestNeighbors and Cosine similarity
+ knn_item = NearestNeighbors(metric = 'cosine', algorithm = 'brute', n_neighbors = 20, n_jobs = -1)
+ knn_item.fit(csr_item)
+ #movie_list2 = [x[:-7] for x in movie_list]
+ # Empty list to store recommended movieID's
+ full_list = []
+ # Check if selected movie is in the moevie dataframe
+ movie_list_1 = movies_df.loc[movies_df['title'] ==movie_list[0]]
+ movie_list_2 = movies_df.loc[movies_df['title'] ==movie_list[1]]
+ movie_list_3 = movies_df.loc[movies_df['title'] ==movie_list[2]]
+
+ if len(movie_list_1):
+ movie_index_1a = movie_list_1.iloc[0]['movieId'] # finds movieId of selected movie
+ movie_index_1 = movie_pivot[movie_pivot['movieId'] == movie_index_1a].index[0] # finds movie index in pivot table
+ distances_1 , indices_1 = knn_item.kneighbors(csr_item[movie_index_1],n_neighbors=top_n+1) # find 10 most similar movies with KNN model (index of movie and distance)
+ # index of recommended movies with distance in sorted list - most similar first
+ recommend_movie_indices_1 = sorted(list(zip(indices_1.squeeze().tolist(),distances_1.squeeze().tolist())),key=lambda x: x[1])[:0:-1] # excluding the selected movie
+ recommend_movie_indices_1 = recommend_movie_indices_1[0:4]
+
+ # Calculate the same for movie 2 and 3 as per movie 1 from movie list:
+
+ if len(movie_list_2):
+ movie_index_2a = movie_list_2.iloc[0]['movieId']
+ movie_index_2 = movie_pivot[movie_pivot['movieId'] == movie_index_2a].index[0]
+ distances_2 , indices_2 = knn_item.kneighbors(csr_item[movie_index_2],n_neighbors=top_n+1)
+ recommend_movie_indices_2 = sorted(list(zip(indices_2.squeeze().tolist(),distances_2.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
+ recommend_movie_indices_2 = recommend_movie_indices_2[4:7]
+
+ if len(movie_list_3):
+ movie_index_3a = movie_list_3.iloc[0]['movieId']
+ movie_index_3 = movie_pivot[movie_pivot['movieId'] == movie_index_3a].index[0]
+ distances_3 , indices_3 = knn_item.kneighbors(csr_item[movie_index_3],n_neighbors=top_n+1)
+ recommend_movie_indices_3 = sorted(list(zip(indices_3.squeeze().tolist(),distances_3.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
+ recommend_movie_indices_3 = recommend_movie_indices_3[7:10]
+
+
+ # Combine above three lists and sort from closest to lowest distance
+ full_list = recommend_movie_indices_1 + recommend_movie_indices_2 + recommend_movie_indices_3
+ full_list = sorted(full_list, key = lambda x:x[1], reverse = False)
+
+ recommend_list = [] # list for recommended movies
+ for item in full_list: # loop through recommended movies to find title of movies
+ movie_index = movie_pivot.iloc[item[0]]['movieId']
+ idx = movies_df[movies_df['movieId'] == movie_index].index
+ recommend_list.append({'Title':movies_df['title'].iloc[idx].values[0],'Distance':item[1]}) # extract title of movie
+ df_recommend = pd.DataFrame(recommend_list) # convert to dataframe
+
+ top_recommendations = df_recommend['Title'][:10].tolist()
+
+ return top_recommendations
diff --git a/recommenders/content_based.py b/recommenders/content_based.py
index ed7df363..df51cb08 100644
--- a/recommenders/content_based.py
+++ b/recommenders/content_based.py
@@ -26,18 +26,19 @@
filtering algorithm for rating predictions on Movie data.
"""
-
# Script dependencies
import os
import pandas as pd
import numpy as np
+import streamlit as st
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
+from mlxtend.preprocessing import TransactionEncoder
# Importing data
-movies = pd.read_csv('resources/data/movies.csv', sep = ',')
+
+movies_df = pd.read_csv('resources/data/movies.csv', sep = ',')
ratings = pd.read_csv('resources/data/ratings.csv')
-movies.dropna(inplace=True)
def data_preprocessing(subset_size):
"""Prepare data for use within Content filtering algorithm.
@@ -53,12 +54,22 @@ def data_preprocessing(subset_size):
Subset of movies selected for content-based filtering.
"""
- # Split genre data into individual words.
- movies['keyWords'] = movies['genres'].str.replace('|', ' ')
- # Subset of the data
- movies_subset = movies[:subset_size]
- return movies_subset
-
+ # Split genres column into individual words
+ genre = movies_df['genres'].str.split('|')
+ # Initiate TransactionEncoder
+ te = TransactionEncoder()
+ genre = te.fit_transform(genre) # Fit genre values
+ genre = pd.DataFrame(genre, columns = te.columns_) # Convert to DataFrame
+ # Convert boolean values to integers
+ genre = genre.astype('int')
+ # Insert the movie title
+ genre.insert(0, 'movie_title', movies_df['title'])
+ # Set movie title as index
+ genre = genre.set_index('movie_title')
+ # Transpose dataframe - movie title as column and genre as row
+ genre = genre.transpose()
+ return genre
+@st.cache(show_spinner=False, suppress_st_warning=True)
# !! DO NOT CHANGE THIS FUNCTION SIGNATURE !!
# You are, however, encouraged to change its content.
def content_model(movie_list,top_n=10):
@@ -78,35 +89,41 @@ def content_model(movie_list,top_n=10):
Titles of the top-n movie recommendations to the user.
"""
- # Initializing the empty list of recommended movies
- recommended_movies = []
- data = data_preprocessing(27000)
- # Instantiating and generating the count matrix
- count_vec = CountVectorizer()
- count_matrix = count_vec.fit_transform(data['keyWords'])
- indices = pd.Series(data['title'])
- cosine_sim = cosine_similarity(count_matrix, count_matrix)
- # Getting the index of the movie that matches the title
- idx_1 = indices[indices == movie_list[0]].index[0]
- idx_2 = indices[indices == movie_list[1]].index[0]
- idx_3 = indices[indices == movie_list[2]].index[0]
- # Creating a Series with the similarity scores in descending order
- rank_1 = cosine_sim[idx_1]
- rank_2 = cosine_sim[idx_2]
- rank_3 = cosine_sim[idx_3]
- # Calculating the scores
- score_series_1 = pd.Series(rank_1).sort_values(ascending = False)
- score_series_2 = pd.Series(rank_2).sort_values(ascending = False)
- score_series_3 = pd.Series(rank_3).sort_values(ascending = False)
- # Getting the indexes of the 10 most similar movies
- listings = score_series_1.append(score_series_1).append(score_series_3).sort_values(ascending = False)
-
- # Store movie names
- recommended_movies = []
- # Appending the names of movies
- top_50_indexes = list(listings.iloc[1:50].index)
- # Removing chosen movies
- top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3])
- for i in top_indexes[:top_n]:
- recommended_movies.append(list(movies['title'])[i])
- return recommended_movies
+ genre = data_preprocessing(movies_df)
+ indices = pd.Series(movies_df['title']) # List of all index for movies
+ # Names of the three selected movies
+ movie_1 = genre[movie_list[0]]
+ movie_2 = genre[movie_list[1]]
+ movie_3 = genre[movie_list[2]]
+ # Calculate similar movies for selected movie one
+ similar_movie_1 = genre.corrwith(movie_1)
+ similar_movie_1 = similar_movie_1.sort_values(ascending = False) # Sort by correlation score
+ similar_movie_1 = similar_movie_1.iloc[1:] # Drop movie selected by user
+ # Remove correlation value and only keep movie title
+ movie_one = pd.DataFrame(similar_movie_1)
+ movie_one.reset_index(inplace = True)
+ movie_one = movie_one.drop(movie_one.columns[1], axis =1)
+ movie_one_list = movie_one['movie_title'].tolist() # Convert to a list
+ movie_one_list = movie_one_list[0:3] # Keep top four
+
+ # Same process will be followed for movie two and three
+ similar_movie_2 = genre.corrwith(movie_2)
+ similar_movie_2 = similar_movie_2.sort_values(ascending = False)
+ similar_movie_2 = similar_movie_2.iloc[1:]
+ movie_two = pd.DataFrame(similar_movie_2)
+ movie_two.reset_index(inplace = True)
+ movie_two = movie_two.drop(movie_two.columns[1], axis =1)
+ movie_two_list = movie_two['movie_title'].tolist() # Convert to a list
+ movie_two_list = movie_two_list[0:4] # Keep top five
+
+ similar_movie_3 = genre.corrwith(movie_3)
+ similar_movie_3 = similar_movie_3.sort_values(ascending = False)
+ similar_movie_3 = similar_movie_3.iloc[1:]
+ movie_three = pd.DataFrame(similar_movie_3)
+ movie_three.reset_index(inplace = True)
+ movie_three_list = movie_three['movie_title'].tolist() # Convert to a list
+ movie_three_list = movie_three_list[0:3] # Keep top four
+
+ recommended_movies = movie_one_list + movie_two_list + movie_three_list
+
+ return recommended_movies
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..ae718afd
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,204 @@
+absl-py==1.0.0
+ALS.Milo==0.18.1
+altair==4.2.0
+anaconda-navigator==2.2.0
+anyascii==0.3.1
+appdirs==1.4.4
+argh==0.26.2
+astunparse==1.6.3
+bkcharts==0.2
+black==19.10b0
+blinker==1.4
+blis==0.7.8
+boto==2.49.0
+brotlipy==0.7.0
+bs4==0.0.1
+catalogue==2.0.7
+certifi==2022.6.15
+click==8.0.3
+clyent==1.2.2
+comet-ml==3.31.3
+commonmark==0.9.1
+compress-pickle==2.1.0
+comtypes==1.1.10
+conda==4.13.0
+conda-build==3.21.9
+conda-verify==3.4.2
+configobj==5.0.6
+contractions==0.1.72
+cycler==0.10.0
+cymem==2.0.6
+cytoolz==0.11.0
+daal4py==2021.3.0
+dask==2021.10.0
+dulwich==0.20.42
+emoji==1.7.0
+entrypoints==0.3
+enum34==1.1.10
+et-xmlfile==1.1.0
+everett==3.0.0
+findspark==2.0.1
+flatbuffers==2.0
+fonttools==4.25.0
+ftfy==6.1.1
+gast==0.5.3
+gitdb==4.0.9
+GitPython==3.1.27
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+graphviz==0.20
+grpcio==1.46.1
+htmlmin==0.1.12
+hydralit==1.0.13
+hydralit-components==1.0.10
+ImageHash==4.2.1
+imbalanced-learn==0.9.1
+inflection==0.5.1
+install-jdk==0.3.0
+ipython-sql==0.4.0
+jupyter-tabnine==1.2.3
+keras==2.8.0
+Keras-Preprocessing==1.1.2
+langcodes==3.3.0
+lazy_loader==0.1rc2
+libclang==14.0.1
+lightgbm==3.3.2
+littleutils==0.2.2
+llvmlite==0.38.0
+locket==0.2.1
+Markdown==3.3.7
+MarkupSafe==2.1.1
+mccabe==0.6.1
+missingno==0.5.1
+mkl-fft==1.3.1
+mkl-service==2.4.0
+mlxtend==0.20.0
+mpmath==1.2.1
+multimethod==1.8
+munkres==1.1.4
+murmurhash==1.0.7
+mypy-extensions==0.4.3
+navigator-updater==0.2.1
+nlppreprocess==1.0.2
+nltk==3.7
+numpy==1.22.4
+nvidia-ml-py3==7.352.0
+opt-einsum==3.3.0
+outdated==0.2.1
+p2j==1.3.2
+packaging==21.3
+pandas-flavor==0.3.0
+pandas-ml==0.6.1
+pandas-profiling==3.2.0
+parfit==0.220
+pathlib==1.0.1
+pathspec==0.7.0
+pathy==0.6.1
+patsy==0.5.2
+pep8==1.7.1
+phik==0.12.2
+Pillow==8.4.0
+pingouin==0.5.2
+pkginfo==1.7.1
+PkgScript==0.6.1
+plotly-express==0.4.1
+ply==3.11
+preshed==3.0.6
+prettytable==0.7.2
+protobuf==3.20.1
+py4j==0.10.9.5
+pyahocorasick==1.4.4
+pyarrow==8.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycosat==0.6.3
+pycurl==7.44.1
+pydantic==1.8.2
+pydeck==0.7.1
+pyls-spyder==0.4.0
+Pympler==1.0.1
+PyMySQL==1.0.2
+pyodbc===4.0.0-unsupported
+pyreadline==2.1
+pyspark==3.3.0
+pyspellchecker==0.6.3
+pytest==6.2.4
+python-dotenv==0.19.2
+python-jsonrpc-server==0.4.0
+python-language-server==0.36.2
+python-lsp-jsonrpc==1.0.0
+python-lsp-server==1.2.4
+python-pipeline==1.0
+pytz==2021.3
+pytz-deprecation-shim==0.1.0.post0
+pywin32==228
+PyYAML==6.0
+raimitigations==0.0.3
+requests-toolbelt==0.9.1
+rich==12.4.4
+ruamel.yaml==0.17.21
+ruamel.yaml.clib==0.2.6
+scikit-image==0.18.3
+scikit-learn-intelex==2021.20210714.120553
+scikit-surprise==1.1.1
+semantic-version==2.10.0
+semver==2.13.0
+sequential==1.0.0
+shap==0.40.0
+simplegeneric==0.8.1
+simplejson==3.17.6
+sip==4.19.13
+sklearn==0.0
+slicer==0.0.7
+smart-open==5.2.1
+smmap==5.0.0
+spacy==3.3.1
+spacy-legacy==3.0.9
+spacy-loggers==1.0.2
+Sphinx==4.2.0
+sql-magic==0.0.4
+sqlparse==0.4.2
+srsly==2.4.3
+statsmodels==0.13.2
+streamlit==1.11.0
+streamlit-aggrid==0.2.3.post2
+streamlit-lottie==0.0.3
+streamlit-player==0.1.5
+streamlit-tags==1.2.8
+tables==3.6.1
+tabulate==0.8.10
+tangled-up-in-unicode==0.2.0
+TBB==0.2
+tensorboard==2.8.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.8.0
+tensorflow-io-gcs-filesystem==0.25.0
+termcolor==1.1.0
+terminado==0.9.4
+textblob==0.17.1
+textsearch==0.0.21
+tf-estimator-nightly==2.8.0.dev2021122109
+thinc==8.0.17
+torch==1.10.2
+truncated-famd==0.0.1
+typer==0.4.1
+tzdata==2022.1
+tzlocal==4.2
+unicodecsv==0.14.1
+urllib3==1.26.7
+validators==0.20.0
+vega-datasets==0.9.0
+visions==0.7.4
+wasabi==0.9.1
+webencodings==0.5.1
+websocket-client==1.3.2
+win-unicode-console==0.5
+wincertstore==0.2
+wurlitzer==3.0.2
+xarray==2022.6.0
+xgboost==1.5.2
+xlwings==0.24.9
+xlwt==1.3.0
+zict==2.0.0
+zope.event==4.5.0
diff --git a/resources/data/comments.csv b/resources/data/comments.csv
new file mode 100644
index 00000000..681d9615
--- /dev/null
+++ b/resources/data/comments.csv
@@ -0,0 +1,12 @@
+name,comment,date
+Thulani Nyama,No,16/07/2022 16:39:55
+Thulani Nyama,Yes,16/07/2022 16:46:52
+Thulani Nyama,Maybe,16/07/2022 16:47:05
+Elle,Awesome movie,16/07/2022 19:17:57
+Siyanda,What a movie,16/07/2022 20:26:07
+Riaan,Red Notice is the best,17/07/2022 14:34:38
+Thulani Nyama,Wow Red Notice is the best,17/07/2022 20:30:14
+Thulani Nyama,Wow,18/07/2022 12:48:33
+Thabang,Extraction is awesome,22/07/2022 17:40:20
+Thulani Nyama,"Wow, I Red Notice",26/07/2022 20:33:45
+Thulani Nyama,Wow,28/07/2022 14:33:12
diff --git a/resources/imgs/DreamTeam.PNG b/resources/imgs/DreamTeam.PNG
new file mode 100644
index 00000000..0d8139f2
Binary files /dev/null and b/resources/imgs/DreamTeam.PNG differ
diff --git a/resources/imgs/Header.gif b/resources/imgs/Header.gif
new file mode 100644
index 00000000..14fed26f
Binary files /dev/null and b/resources/imgs/Header.gif differ
diff --git a/resources/imgs/Header2.gif b/resources/imgs/Header2.gif
new file mode 100644
index 00000000..7005010e
Binary files /dev/null and b/resources/imgs/Header2.gif differ
diff --git a/resources/imgs/Header2L.gif b/resources/imgs/Header2L.gif
new file mode 100644
index 00000000..42f96301
Binary files /dev/null and b/resources/imgs/Header2L.gif differ
diff --git a/resources/imgs/MovieXplorer.png b/resources/imgs/MovieXplorer.png
new file mode 100644
index 00000000..5e5a5603
Binary files /dev/null and b/resources/imgs/MovieXplorer.png differ
diff --git a/resources/imgs/Ratings.PNG b/resources/imgs/Ratings.PNG
new file mode 100644
index 00000000..8678aef0
Binary files /dev/null and b/resources/imgs/Ratings.PNG differ
diff --git a/resources/imgs/Rtest1.gif b/resources/imgs/Rtest1.gif
new file mode 100644
index 00000000..c5948be6
Binary files /dev/null and b/resources/imgs/Rtest1.gif differ
diff --git a/resources/imgs/budget.PNG b/resources/imgs/budget.PNG
new file mode 100644
index 00000000..6a2caec7
Binary files /dev/null and b/resources/imgs/budget.PNG differ
diff --git a/resources/imgs/download (1).png b/resources/imgs/download (1).png
new file mode 100644
index 00000000..f45152fe
Binary files /dev/null and b/resources/imgs/download (1).png differ
diff --git a/resources/imgs/dream_works.gif b/resources/imgs/dream_works.gif
new file mode 100644
index 00000000..35b737dc
Binary files /dev/null and b/resources/imgs/dream_works.gif differ
diff --git a/resources/imgs/genres.png b/resources/imgs/genres.png
new file mode 100644
index 00000000..1915517b
Binary files /dev/null and b/resources/imgs/genres.png differ
diff --git a/resources/imgs/newplot2.png b/resources/imgs/newplot2.png
new file mode 100644
index 00000000..e7d5794c
Binary files /dev/null and b/resources/imgs/newplot2.png differ
diff --git a/resources/imgs/newplot4.png b/resources/imgs/newplot4.png
new file mode 100644
index 00000000..b95887c7
Binary files /dev/null and b/resources/imgs/newplot4.png differ
diff --git a/resources/imgs/newplot5.png b/resources/imgs/newplot5.png
new file mode 100644
index 00000000..7e473656
Binary files /dev/null and b/resources/imgs/newplot5.png differ
diff --git a/resources/imgs/stats.gif b/resources/imgs/stats.gif
new file mode 100644
index 00000000..2f1511ec
Binary files /dev/null and b/resources/imgs/stats.gif differ
diff --git a/resources/vids/Recommend.mp4 b/resources/vids/Recommend.mp4
new file mode 100644
index 00000000..6bae8d17
Binary files /dev/null and b/resources/vids/Recommend.mp4 differ
diff --git a/resources/vids/trailers.mp4 b/resources/vids/trailers.mp4
new file mode 100644
index 00000000..3b5b81b8
Binary files /dev/null and b/resources/vids/trailers.mp4 differ
diff --git a/utils/db.py b/utils/db.py
new file mode 100644
index 00000000..0553384e
--- /dev/null
+++ b/utils/db.py
@@ -0,0 +1,73 @@
+# import socket
+
+# import google_auth_httplib2
+# import httplib2
+# import pandas as pd
+# import streamlit as st
+# from google.oauth2 import service_account
+# from googleapiclient.discovery import build
+# from googleapiclient.http import HttpRequest
+
+# socket.setdefaulttimeout(15 * 60)
+
+# SCOPE = "https://www.googleapis.com/auth/spreadsheets"
+# SPREADSHEET_ID = "1rkMVLvh3JrBq_tbi4Ho0qjCDAP3vYdNuWOEjYpkJLNU"
+# SHEET_NAME = "Database"
+# GSHEET_URL = f"https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}"
+
+
+# @st.experimental_singleton()
+# def connect():
+# # Create a connection object.
+# credentials = service_account.Credentials.from_service_account_info(
+# st.secrets["gcp_service_account"],
+# scopes=[SCOPE],
+# )
+
+# # Create a new Http() object for every request
+# def build_request(http, *args, **kwargs):
+# new_http = google_auth_httplib2.AuthorizedHttp(
+# credentials, http=httplib2.Http()
+# )
+# return HttpRequest(new_http, *args, **kwargs)
+
+# authorized_http = google_auth_httplib2.AuthorizedHttp(
+# credentials, http=httplib2.Http()
+# )
+# service = build(
+# "sheets",
+# "v4",
+# requestBuilder=build_request,
+# http=authorized_http,
+# )
+# gsheet_connector = service.spreadsheets()
+# return gsheet_connector
+
+
+# def collect(gsheet_connector) -> pd.DataFrame:
+# values = (
+# gsheet_connector.values()
+# .get(
+# spreadsheetId=SPREADSHEET_ID,
+# range=f"{SHEET_NAME}!A:C",
+# )
+# .execute()
+# )
+
+# df = pd.DataFrame(values["values"])
+# df.columns = df.iloc[0]
+# df = df[1:]
+# return df
+
+
+# def insert(gsheet_connector, row) -> None:
+# values = (
+# gsheet_connector.values()
+# .append(
+# spreadsheetId=SPREADSHEET_ID,
+# range=f"{SHEET_NAME}!A:C",
+# body=dict(values=row),
+# valueInputOption="USER_ENTERED",
+# )
+# .execute()
+# )
\ No newline at end of file
diff --git a/utils/style.css b/utils/style.css
new file mode 100644
index 00000000..7293b1d0
--- /dev/null
+++ b/utils/style.css
@@ -0,0 +1,28 @@
+/* Style inputs with type="text", type="email"and textareas */
+input[type=text], input[type=email], textarea {
+ width: 100%; /* Full width */
+ padding: 12px; /* Some padding */
+ border: 3px solid #FF4B4B; /* redish border */
+ border-radius: 10px; /* Rounded borders */
+ box-sizing: border-box; /* Make sure that padding and width stays in place */
+ margin-top: 6px; /* Add a top margin */
+ margin-bottom: 16px; /* Bottom margin */
+ resize: vertical /* Allow the user to vertically resize the textarea (not horizontally) */
+ }
+
+ /* Style the submit button with a specific background color etc */
+ button[type=submit] {
+ background-color: #FF4B4B;
+ color: white;
+ padding: 12px 20px;
+ border: none;
+ border-radius: 10px;
+ cursor: pointer;
+ margin-left: 6px;
+ }
+
+ /* When moving the mouse over the submit button, add a darker gray color */
+ button[type=submit]:hover {
+ background-color: white;
+ color: #FF4B4B;
+ }
\ No newline at end of file