-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathst_app.py
306 lines (243 loc) · 13.9 KB
/
st_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
import streamlit as st
import pandas as pd
import numpy as np
import requests
import time
import pydeck as pdk
from PIL import Image
#import matplotlib.pyplot as plt
st.sidebar.title('Navigation')
st.sidebar.write('Please select a page :point_down:')
add_selectbox = st.sidebar.radio(
"",
("Introduction","Sentiment Analysis", "Pandemic Analysis", "Air Traffic")
)
if add_selectbox=='Introduction':
#st.sidebar.write('Author')
#st.sidebar.write('**Kewal Mishra**')
st.sidebar.markdown('#')
st.sidebar.markdown('#')
# st.sidebar.markdown('#')
# st.sidebar.markdown('#')
# st.sidebar.markdown('#')
st.sidebar.markdown('#')
st.sidebar.markdown('#')
# st.sidebar.markdown('###')
st.sidebar.write("Feedback/suggestion: [email protected]")
st.sidebar.write("Github [link](https://github.com/KewalMishra/year-of-lockdown)")
st.sidebar.write("Connect with me on [Linkedin](www.linkedin.com/in/kewal-mishra-933502143)")
st.title('An year in Lockdown: India :flag-in:')
st.markdown("###")
st.write(':arrow_right: On 23 March 2020, the Government of India under Prime Minister Narendra Modi ordered a nationwide lockdown for 21 days, limiting movement of the entire 1.38 billion or 138 Crore population of India as a preventive measure against the COVID-19 pandemic in India.')
':arrow_right: On 14 April, Prime minister Narendra Modi extended the nationwide lockdown until 3 May, with a conditional relaxations after 20 April for the regions where the spread had been contained or was minimal.'
':arrow_right: On 30 May, it was announced that lockdown restrictions were to be lifted from then onwards, while the ongoing lockdown would be further extended till 30 June for only the containment zones. Services would be resumed in a phased manner starting from 8 June. It was termed as "Unlock 1.0".'
'**This project** aims at investigating the impact of the lockdown over different aspects like: Social Media Sentiment, Virus Spread \
data and the Air traffic data.'
"For the Social Media sentiment analysis, A twitter scraper was developed using Python(Selenium and Twitter's Advanced Search) and used it to scrape Historical tweets from Twitter dating from March of \
2020 to March of 2021. The criteria for the tweet were that it must contain *'Lockdown' AND 'India'*. Approximately 300,000 \
tweets were scraped from Twitter, all of these tweets were then cleaned and indexed with their polarity and sentiment value using Textblob python package. \
The sentiment value of each tweet can lie anywhere betweeen the range of -1 to 1, where 1 is a positive sentiment and -1 \
represents negative sentiment."
'The air traffic data is publically hosted by the Airport Authority of India. The data can be found [here](https://www.aai.aero/en/business-opportunities/aai-traffic-news). The "Traffic News Summary" file contains a summary of all air traffic, passengers and freight \
for both Domestic and International airports.'
'The data for the Covid cases is available [here](https://api.covid19india.org/).'
if add_selectbox=='Sentiment Analysis':
st.sidebar.markdown('#')
st.sidebar.info('Tweets in India that mentioned "Lockdown" were used to assess the sentiment of social media. The scraper used for this project is available on my Github page.')
st.title('Social Media Sentiment Analysis :speech_balloon:')
@st.cache
def load_sentiment_data():
sent_df = pd.read_csv('datasets/Sentiment_scored.csv',parse_dates=['datetime'])
daily_sent = sent_df.groupby([sent_df['datetime'].dt.date]).mean()
day_hist_values = np.histogram(sent_df['datetime'].dt.hour, bins=24, range=(0,24))[0]
ax_perct = sent_df.groupby([sent_df['datetime'].dt.to_period('M'),sent_df['Sentiment']]).size() \
.groupby(level=0).apply(lambda x:100 * x / float(x.sum())).reset_index(name='percnt') \
.pivot("datetime", "Sentiment", "percnt")
senti_TS = sent_df.groupby([sent_df['datetime'].dt.to_period('M').astype(str),sent_df['Sentiment']]).size() \
.groupby(level=0).apply(lambda x:100 * x / float(x.sum())).reset_index(name='count') \
.pivot("datetime", "Sentiment", "count")
return sent_df, daily_sent, day_hist_values, ax_perct, senti_TS
data_load_state = st.text('Loading data...')
sent_df, daily_sent, day_hist_values, ax_perct, senti_TS = load_sentiment_data()
data_load_state.text('')
st.markdown('####')
st.header('Daily Average Sentiment')
st.markdown('####')
run = st.button('run animation')
if run:
daily_sent_chart = st.line_chart(daily_sent[0:1])
for i in range(1, len(daily_sent)):
new_rows = daily_sent[i:i+1]
daily_sent_chart.add_rows(new_rows)
time.sleep(0.03)
else:
st.line_chart(daily_sent)
daily_avg_sent_exp = st.beta_expander('Data insight')
daily_avg_sent_exp.write('The line graph is noisy but we can observe that the average sentiment goes higher(towards positive) \
as the year progressed.')
st.markdown('###')
st.header('Tweet count distribution throughout a day :clock3:')
st.markdown('####')
st.bar_chart(day_hist_values)
tweet_cnt_exp = st.beta_expander('Data insight')
tweet_cnt_exp.write('Twitter users start posting as early as 4 A.M. and the count peaks between 2P.M and 3 P.M., \
after which there is substantial drop after 5 P.M. which stagnates at night.')
st.markdown('###')
st.header('Scraped tweet data distribution :bar_chart:')
st.markdown('####')
image1 = Image.open('datasets/sent_plot1.png')
st.image(image1)
tweet_dist_exp = st.beta_expander('Data insight')
tweet_dist_exp.write('It is a no-brainer that tweets about lockdown were at an excess during the month of april and towards the end of march.')
#ax_month_hist = pd.to_datetime(sent_df['datetime']).dt.to_period('M').astype(str).hist(figsize=(15, 6))
#st.pyplot(ax_month_hist.get_figure())
st.markdown('###')
st.header('Monthly Sentiment percentage split')
st.markdown('####')
#ax_perct_chart = ax_perct.plot(kind='bar', color={"Positive": "mediumseagreen", "Negative": "coral","Neutral":"tab:blue"})
#patches, labels = ax_perct_chart.get_legend_handles_labels()
#ax_perct_chart.legend(patches, labels, loc='best')
#st.pyplot(ax_perct_chart.get_figure())
image2 = Image.open('datasets/sent_plot2.png')
st.image(image2)
perct_splt_exp = st.beta_expander('Data insight')
perct_splt_exp.write('December 2020 was the most positive month, September 2020 saw the highest weightage of negative sentiment')
st.markdown('###')
st.header('How did each Sentiment vary over time :question:')
st.markdown('####')
senti = st.selectbox('Select', senti_TS.columns.tolist())
st.area_chart(senti_TS[senti])
# show_data = st.checkbox('show data')
# if show_data:
# senti_TS
if add_selectbox=='Pandemic Analysis':
st.sidebar.markdown('#')
st.sidebar.info('The data used for this page is dynamic and gets updated every day!')
st.title('Pandemic Analysis :mask:')
# @st.cache
# def load_pandemic_data():
case_time_series ="https://api.covid19india.org/csv/latest/case_time_series.csv"
states ="https://api.covid19india.org/csv/latest/states.csv"
state_wise ="https://api.covid19india.org/csv/latest/state_wise.csv"
case_time_series = pd.read_csv(case_time_series ,parse_dates=['Date_YMD'])
state_wise = pd.read_csv(state_wise)
states = pd.read_csv(states,parse_dates=['Date'])
#return case_time_series,states,state_wise
# data_load_state = st.text('Loading data...')
# case_time_series,states,state_wise = load_pandemic_data()
# data_load_state.text('')
last_update_date = st.write('Last data update:',case_time_series.Date_YMD.max().date())
state_wise = state_wise[['State', 'Confirmed', 'Recovered', 'Deaths', 'Active']]
states = states[(states.State != 'India')&(states.State != 'State Unassigned')]
states.drop('Other',axis=1,inplace=True)
daily_df = case_time_series[['Date_YMD','Daily Confirmed','Daily Recovered','Daily Deceased']] \
.rename(columns={'Date_YMD':'index'}).set_index('index')
total_df = case_time_series[['Date_YMD','Total Confirmed','Total Recovered','Total Deceased']] \
.rename(columns={'Date_YMD':'index'}).set_index('index')
st.markdown('##')
st.header('Time Series plot of Covid-19 cases (nation-level) :chart_with_upwards_trend:')
st.markdown('####')
st.write('Select plot aggregation :arrow_heading_down:')
trend_plot = st.selectbox('',['Daily Trend','Cumulative Trend'])
run = st.button('run animation')
if trend_plot == 'Daily Trend':
if run:
daily_chart = st.line_chart(daily_df[0:1])
for i in range(1, len(daily_df)):
new_rows = daily_df[i:i+1]
daily_chart.add_rows(new_rows)
time.sleep(0.03)
else:
st.line_chart(daily_df)
if trend_plot == 'Cumulative Trend':
if run:
total_chart = st.area_chart(total_df[0:1])
for i in range(1, len(total_df)):
new_rows = total_df[i:i+1]
total_chart.add_rows(new_rows)
time.sleep(0.03)
else:
st.area_chart(total_df)
tim_ser_exp = st.beta_expander('Data insight')
tim_ser_exp.write('The graph shows that the daily count of new cases is rising again since March. Click on the expand icon on top right of graph to view it \
in larger mode.')
st.markdown('###')
st.header('State level analysis')
st.markdown('####')
left_column, right_column = st.beta_columns(2)
plot_data = right_column.radio('Select data',states.columns.tolist()[-4:])
state_select = left_column.multiselect("Select states", states.State.unique().tolist(), ["Maharashtra", "Kerala"])
if not state_select:
st.error("Please select at least one state.")
else:
state_level_chart = st.area_chart()
for stt in state_select:
state_level_chart.add_rows(states[states.State==stt][['Date',plot_data]] \
.rename(columns={'Date':'index',plot_data:stt}).set_index('index'))
state_lvl_exp = st.beta_expander('Data insight')
state_lvl_exp.write('You can select multiple states to analyse their and compare their trend!')
st.markdown('###')
st.header('Map Visualtization :earth_asia:')
st.markdown('###')
state_wise = state_wise[(state_wise.State != 'Total')&(state_wise.State != 'State Unassigned')]
coord_data = pd.read_csv('datasets/states_coords.csv')
coord_data = coord_data[['State','lon','lat']]
state_wise = pd.merge(state_wise,coord_data, on='State', how='left')
state_wise_mod = state_wise.copy()
state_wise_mod['Confirmed_scaled'] = state_wise_mod.Confirmed/500
state_wise_mod = state_wise_mod[['lon','lat','Confirmed_scaled']].reset_index(drop=True)
#view (location, zoom level, etc.)
view = pdk.ViewState(latitude=23.8343419, longitude=77.5640719, pitch=40, zoom=4)
# layer
column_layer = pdk.Layer('ColumnLayer',
data=state_wise_mod,
get_position=['lon', 'lat'],
get_elevation='Confirmed_scaled',
elevation_scale=100,
radius=10000,
get_fill_color=[255, 165, 0, 80],
pickable=True,
auto_highlight=True)
# render map
# with no map_style, map goes to default
st.pydeck_chart(pdk.Deck(layers=column_layer,
initial_view_state=view))
map_exp = st.beta_expander('Data insight')
map_exp.write('The height of column is proportional to the *Total Confirmed Cases* count of that location.')
show_data = st.checkbox('show data')
if show_data:
st.table(state_wise)
if add_selectbox=='Air Traffic':
st.sidebar.markdown('#')
st.sidebar.info('Airport Authority of India publishes "Traffic news Summary" that contains great amount of information about their operations. \
It is a no-brainer that the transport sector took one of the biggest hits due to the lockdown.')
st.title('Air Traffic Analysis :airplane:')
@st.cache
def load_air_traffic_data():
dom = pd.read_csv('datasets/dom_air_traffic.csv',parse_dates=['Month']).set_index('Month')
intn = pd.read_csv('datasets/intn_air_traffic.csv',parse_dates=['Month']).set_index('Month')
return dom,intn
data_load_state = st.text('Loading data...')
dom,intn = load_air_traffic_data()
data_load_state.text('')
left_column, right_column = st.beta_columns(2)
agg_lvl = left_column.radio('Select data aggregation',['All','Domestic','International'])
data_type = right_column.radio('Select Data',dom.columns.tolist())
st.subheader('Data Metrics:')
st.text('1. Aircraft Movement : in Thousands ')
st.text('2. Passengers : in Millions')
st.text('3. Freight : in Tonnes')
if agg_lvl == 'All':
chart_data = dom + intn
elif agg_lvl == 'Domestic':
chart_data = dom
elif agg_lvl == 'International':
chart_data = intn
st.area_chart(chart_data[data_type])
AT_exp = st.beta_expander('Data insight')
AT_exp.write('As is evident from the graph, Passenger traffic has taken the biggest hit, and only Freight transport has been able to reinstate \
itself to "pre-lockdown" condition.')
st.markdown('###')
data_btn = st.checkbox('Show data')
if data_btn:
st.dataframe(chart_data)