-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
239 lines (193 loc) · 7.84 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
import json
import requests
import nltk
import response as response
from nltk import sent_tokenize
from nltk import word_tokenize
from nltk.probability import FreqDist
from nltk.corpus import stopwords
import main_functions
from pprint import pprint
from wordcloud import WordCloud
import streamlit as st
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import re
import ax
from PIL import Image
import datetime
# Developer Name: Ricardo Brown
# Project 1
#Streamlit Apllication UI
st.title("COP 4813 Web Application Programming")
st.header("Project 1")
st.subheader("Part A - The Stories API")
st.write("This app uses the Top Stories API to Display the most common words used in the top "
"current articles based on a specific topic selected by the user. The data is displayed "
"as a line chart and as a wordcloud image.")
today = st.date_input("Today is", datetime.datetime.now())
st.subheader("I - Topic Selection")
#Accepting user name input saving as string
userName = st.text_input("Please enter your name")
if userName != "":
st.write("Hi {}! Welcome to the program.".format(userName))
selectTopic = st.selectbox("Select a topic of your interest",
["arts", "automobiles", "books","fashion", "food", "health",
"home", "insider", "magazine","movies", "nyregion", "obituaries","opinion",
"politics", "realestate", "Science", "sports", "sundayreview", "technology",
"theater", "t-magazine", "travel", "upshot", "us", "world"])
st.sidebar.header("Welcome to Ricardo's Web Application")
image0 = Image.open('images/u2aimg2.jpg')
image2 = Image.open('images/rate1.jpg')
st.image(image0, caption="City Clip-Art", use_column_width=True)
optionsRate = ""
while optionsRate == "":
st.sidebar.image(image2, use_column_width=True)
optionsRate = st.sidebar.selectbox(" On a scale from suite man to suite man, how would you rate this project?",
["", "6","5","4","3", "2", "1"])
counter = 1
counter = counter + 1
if counter >= 1:
break
if optionsRate == "1":
image1 = Image.open('images/IMPROVED.745.jpg')
# st.sidebar.write("Thank you! I learned alot in this project")
st.sidebar.image(image1, use_column_width=True)
st.success("Thank you! I learned alot in this project")
if optionsRate == "2":
image2 = Image.open('images/red-white-fi1.png')
st.success("Thank you! I learned alot in this project")
st.sidebar.image(image2, use_column_width=True)
if optionsRate == "3":
image3 = Image.open('images/6-2-confetti.png')
st.sidebar.image(image3, use_column_width=True)
if optionsRate == "4":
image4 = Image.open('images/-fireworks457.png')
st.sidebar.image(image4, use_column_width=True)
if optionsRate == "5":
image5 = Image.open('images/balloon_52.png')
st.sidebar.image(image5, use_column_width=True)
if optionsRate == "6":
image6 = Image.open('images/celebration.1.jpg')
st.sidebar.image(image6, use_column_width=True)
if userName == "":
st.write("Loading...")
latest_iteration = st.empty()
bar = st.progress(0)
for i in range(100):
latest_iteration.text(f'Iteration {i+1}')
bar.progress(i+1)
time.sleep(0.1)
st.warning("Please type your name in messagebox above")
api_key_dict = main_functions.read_from_file("JSON_Files/api_key.json")
api_key = api_key_dict["my_key"]
url = "https://api.nytimes.com/svc/topstories/v2/" + selectTopic + ".json?api-key=" + api_key
response = requests.get(url).json()
main_functions.save_to_file(response, "JSON_Files/response.json")
my_articles = main_functions.read_from_file("JSON_Files/response.json")
str1 = ""
for i in my_articles["results"]:
str1 = str1 + i["abstract"]
sentences = sent_tokenize(str1)
words = word_tokenize(str1)
#Get the frequency of words in the text
fdist = FreqDist(words)
#Getting rid of the puntation marks for the word cloud
words_no_punc = []
for w in words:
if w.isalpha():
words_no_punc.append(w.lower())
fdist2 = FreqDist(words_no_punc)
#Stopwiords were not intrested in
stopwords = stopwords.words("english")
clean_words=[]
for w in words_no_punc:
if w not in stopwords:
clean_words.append(w)
#Began printing most common used words here after taking out the garbage
fdist3 = FreqDist(clean_words)
#Creating the frequent distibution CHART
#===============================
st.subheader("II - Frequency Distribution")
selectFrequency = st.checkbox("Click here to generate frequency distribution")
if selectTopic != "":
if selectFrequency:
main_functions.save_to_file(fdist3.most_common(10), "JSON_Files/chartData.json")
myChart_data = main_functions.read_from_file("JSON_Files/chartData.json")
myData = fdist3.most_common(10)
chart_dat = pd.DataFrame(myData, myData, columns = ["Words","Count"])
st.line_chart(chart_dat)
chart_dat
#newData = np.array_str(chart_dat)
#pprint(newData)
#newData = newData.__dict__
#print(type(newData))
#pprint(newData)
exampleWords = ["New", "Book", "History", "Novel", "Times", "President", "Players", "Car", "Pandemic", "Coronavirus"]
exampleNumbers = [39 ,15 ,25 ,30 ,14 ,9 ,15 , 12 ,15 ,23]
# Plotting the graph here,
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(exampleWords, exampleNumbers, marker='o', color='cyan')
barChart = ax.set(title="The most popular words of all articles topics 2020", xlabel="Words", ylabel="Count")
plt.setp(ax.get_xticklabels(), rotation=45)
#plt.show()
st.pyplot()
#stopwords = stopwords.words("english")
newWords=[]
myData = fdist3.most_common(10)
def save_to_file(data,file_name):
with open(file_name, "w") as write_file:
json.dump(data,write_file,indent=2, )
print("You sucessfully saved to {}.".format(file_name))
def remove(list):
pattern = '[0-9]'
list = [re.sub(pattern, '', i) for i in list]
return list
list = ['4geeks', '3for','4geeks5']
print(remove(list))
data_parameters = {
1:["new", 9]
}
Chart_data = main_functions.read_from_file("JSON_Files/chartData.json")
#Removing numbers from myData Top Common strings
test_lits = [1, 3 ,4 ,6 ,7]
remove_list = [3, 6, 1, 2, 4, 5, 7,8 , 9, 0]
res = [w for w in myData if w not in remove_list]
#print(res)
#print(type(res))
#Creating Wordcloud for the user
#===============================
st.subheader("III - Wordcloud")
choice = 1
selectCloud = st.checkbox("Click here to generate frequency distribution", key = choice)
if selectCloud:
wordcloud = WordCloud().generate(str1)
# WordCloud Capilization
plt.figure(figsize=(12, 12))
plt.imshow(wordcloud)
plt.axis("off")
st.image(wordcloud.to_array())
else:
st.write("")
#===PART B MOST POPULAR ARTICLES===
st.header("Part B - Most Popular Articles")
st.subheader("Select if you want to see the most shared, emailed, or viewed articles.")
choice2 = 1
selectPopular= st.selectbox("Select your prefered set of articles",
["","shared", "emailed", "viewed",], key = choice2)
choice3 = 1
selectDays = st.selectbox("Select the period of time (last 30 days)",
["", "1", "7", "30", ], key=choice3)
# Second Text Wordcloud Paremeters
if selectDays:
#Error creating
#Where ID is equal to published_date, last 1, 7, 30 days of published.
#Streamlit codes generate numerous errors
wordcloud = WordCloud().generate(str1)
# WordCloud Capilization
plt.figure(figsize=(12, 12))
plt.imshow(wordcloud)
plt.axis("off")
st.image(wordcloud.to_array())