-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
135 lines (99 loc) · 4.39 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#Task 1: Reading Data
# Write a function read_ratings_data(f) that takes in a ratings file, and returns a dictionary.
def read_ratings_data(f):
movieRating = open(f)
movie_ratings_dict = {}
for row in movieRating.read().split('\n'):
row = row.split('|')
if len(row) == 3 :
if row[0] in movie_ratings_dict.keys():
movie_ratings_dict[row[0]].append(float(row[1]))
else:
movie_ratings_dict[row[0]] = [float(row[1])]
return movie_ratings_dict
ratingFile = "movieRatingSample.txt"
rattings = read_ratings_data(ratingFile)
print(rattings)
# Write a function read_movie_genre(f) that takes in a movies file and returns a dictionary
def read_movie_genre(f):
movieGenre = open(f)
movie_genre_dict = {}
for row in movieGenre.read().split('\n'):
row = row.split('|')
if len(row) == 3 :
movie_genre_dict[row[2]] = row[0]
return movie_genre_dict
genreFile = "genreMovieSample.txt"
movie_to_genre = read_movie_genre(genreFile)
print(movie_to_genre)
# Task 2: Processing Data
# Genre dictionary
def create_genre_dict(movie_to_genre):
genre = {}
for key in movie_to_genre:
value = movie_to_genre[key]
if value in genre.keys():
genre[value].append(key)
else:
genre[value] = [key]
return genre
genre_to_movie = create_genre_dict(movie_to_genre)
print(genre_to_movie)
# Average Rating
def calculate_average_rating(ratings):
average_rating = {}
for movie in ratings:
average_rating[movie] = round(sum(rattings[movie])/len(rattings[movie]),1)
return average_rating
movie_to_average = calculate_average_rating(rattings)
print(movie_to_average)
# Task 3: Recommendation
# Popularity based
def get_popular_movies(movie_to_average):
return {k: v for k, v in sorted(movie_to_average.items(), key=lambda item: item[1],reverse=True)[0:10]}
# Threshold Rating
def filter_movies(movie_to_average, thresholdRatting = 3):
print(movie_to_average)
return {k: v for k, v in movie_to_average.items() if v >=thresholdRatting}
# Popularity + Genre based
def get_popular_in_genre(genre,genre_to_movie,movie_to_average,n=5):
movies={p:q for (p,q) in movie_to_average.items() if p in genre_to_movie[genre]}
return {k: v for k, v in sorted(movies.items(), key=lambda item: item[1],reverse=True)[0:n]}
# Genre Rating
def get_genre_rating(genre,genre_to_movie,movie_to_average):
movies={p:q for (p,q) in movie_to_average.items() if p in genre_to_movie[genre]}
return sum(movies.values())/len(movies)
# Genre Popularity
def genre_popularity(genre_to_movie,movie_to_average,n = 5):
movies = {k:get_genre_rating(k,genre_to_movie,movie_to_average) for k in [key for key,v in genre_to_movie.items()]}
return {k: v for k, v in sorted(movies.items(), key=lambda item: item[1],reverse=True)[0:n]}
# Task 4 (User Focused)
# read_user_ratings
def read_user_ratings(f):
movieRating = open(f)
movie_ratings_dict = {}
for row in movieRating.read().split('\n'):
row = row.split('|')
if len(row) == 3 :
if row[2] in movie_ratings_dict.keys():
movie_ratings_dict[row[2]].append((row[0],float(row[1])))
else:
movie_ratings_dict[row[2]] = [(row[0],float(row[1]))]
return movie_ratings_dict
user_to_movie = read_user_ratings(ratingFile)
print(user_to_movie)
def get_user_genre(user_id,user_to_movie,movie_to_genre):
genre_rating ={v:[n for (k,n) in user_to_movie[user_id] if movie_to_genre[k] == v] for (k,v) in movie_to_genre.items() if k in [i[0] for i in user_to_movie[user_id]]}
genre_average ={k:round(sum(v)/len(v),2) for k,v in genre_rating.items()}
max_key = max(genre_average, key=genre_average.get)
return max_key
# print({k:k for k in genre})
# recommend_movies
def recommend_movies(user_id,user_to_movie,movie_to_genre,movie_to_average):
genre = get_user_genre('1',user_to_movie,movie_to_genre)
movies_of_genre = {k:movie_to_average[k] for k,v in movie_to_genre.items() if v == genre and k not in user_to_movie[user_id][0]}
top_picks = {k: v for k, v in sorted(movies_of_genre.items(), key=lambda item: item[1],reverse=True)[0:3]}
return top_picks
print(recommend_movies('6',user_to_movie,movie_to_genre,movie_to_average))
# for row in get_user_genre('1',user_to_movie,movie_to_genre).items():
# print(row)