-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDFR Travel Map Code.py
237 lines (202 loc) · 9.03 KB
/
DFR Travel Map Code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import plotly.express as px
import numpy as np
import pandas as pd
from urllib.request import urlopen
import json
import plotly.io as pio
# Created by the Vermont Department of Financial Regulation
# Run in Atom's Hydrogen package
# %% Defines functions used later on in the map
green = '<400 Active Cases per Million<br>(No Quarantine Required)'
yellow = '<br>400-799 Active Cases per Million<br>(Quarantine Required'\
' in<br>Vermont or Home State)'
red = '<br>800+ Active Cases per Million<br>(Quarantine Required in'\
'<br>Vermont or Home State)<br>'
green_vt = '<400 Active Cases per Million (VT)'
yellow_vt = '400-799 Active Cases per Million (VT)'
red_vt = '800+ Active Cases per Million (VT)'
factor = 2.4
def to_len(x, i):
# Converts any string to desired length by adding initial
# zeros or deleting final characters
if len(x) < i:
diff = i-len(x)
return diff*'0' + x
if len(x) > i:
return x[:i]
else:
return x
def to_status(x, y):
# Determines the status of a county based on active cases per million
if y == 'Vermont':
if x < 400:
return green_vt
elif x < 800:
return yellow_vt
else:
return red_vt
elif x < 400:
return green
elif x < 800:
return yellow
else:
return red
# %% Gets county outlines from plotly
with urlopen('https://raw.githubusercontent.com/plotly/datasets/'
'master/geojson-counties-fips.json') as response:
counties = json.load(response)
# %% Takes most recent data from JHU GitHub repo
hopkinsurl = 'https://raw.githubusercontent.com/CSSEGISandData/'\
'COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'\
'time_series_covid19_confirmed_US.csv'
hopkins = pd.io.parsers.read_csv(hopkinsurl)
hop_ne = hopkins[(hopkins['Province_State'] == 'Vermont') |
(hopkins['Province_State'] == 'New Hampshire') |
(hopkins['Province_State'] == 'Maine') |
(hopkins['Province_State'] == 'Rhode Island') |
(hopkins['Province_State'] == 'Massachusetts') |
(hopkins['Province_State'] == 'Connecticut') |
(hopkins['Province_State'] == 'New Jersey') |
(hopkins['Province_State'] == 'Pennsylvania') |
(hopkins['Province_State'] == 'Ohio') |
(hopkins['Province_State'] == 'Maryland') |
(hopkins['Province_State'] == 'District of Columbia') |
(hopkins['Province_State'] == 'Delaware') |
(hopkins['Province_State'] == 'Virginia') |
(hopkins['Province_State'] == 'West Virginia') |
(hopkins['Province_State'] == 'New York')]
# %%
# Values obtained from our gamma distribution equal to
# 1 - CDF for the first 30 integer values of x
prop = [1, 0.9486632702, 0.864870849, 0.7727784056,
0.6814356899, 0.5951939501, 0.5161053777,
0.4449605251, 0.3818322198, 0.3263880516,
0.2780783636, 0.2362516399, 0.2002254171,
0.1693291345, 0.1429289957, 0.1204412361,
0.1013379529, 0.0851482333, 0.0714563987,
0.0598985719, 0.0501583661, 0.0419622164,
0.0350746871, 0.0292939607, 0.0244476271,
0.0203888345, 0.0169928249, 0.0141538527,
0.0117824696, 0.0098031505]
gamma = pd.DataFrame({'prop': prop})
# These values are used to calculate active cases via the methodology explained
# in the DFR whitepaper
new = hop_ne.iloc[:, -31:].transpose().diff().dropna()
temp2 = []
for j in np.arange(new.shape[1]):
temp = []
for i in np.arange(len(new)):
var = new.iloc[:, j][29 - i] * gamma.iloc[i]
temp.append(var)
var2 = np.sum(temp)
if var2 < 0:
var2 = 0
temp2.append(var2)
hop_ne['Active Cases'] = temp2
hop_ne['Active Cases'] = hop_ne['Active Cases'] * factor
# Gets rid of some columns we don't need
hop_ne_trim = hop_ne[['UID',
'FIPS', 'Admin2', 'Province_State', 'Country_Region',
'Lat', 'Long_', 'Combined_Key', 'Active Cases']]
hop_ne_trim = hop_ne_trim.dropna()
# %% JHU doesn't report NCY boroughs individually, so we break them out manually
# The file begreen is obtained from https://health.data.ny.gov/Health
# /New-York-State-Statewide-COVID-19-Testing/xdss-u53e/data
# File path will vary by user, so change accordingly if you're trying to
# replicate this code
boro = pd.read_csv('/Users/ryantaggard/Desktop/COVID-19/Current County'
' Status Map/Interactive/NYC_Boro.csv')
boro = boro[boro['County'].isin(['Bronx', 'Queens', 'Kings',
'Richmond', 'New York'])]
boro = boro[['Test Date', 'County', 'New Positives']]
boro['Test Date'] = pd.to_datetime(boro['Test Date'])
boro = boro.pivot_table(index='Test Date', columns='County',
values='New Positives')
boro = boro.iloc[-30:, :].reset_index()
boro['prop'] = gamma.reindex(index=gamma.index[::-1])\
.reset_index().drop('index', axis=1)
nycfips = {'Bronx': 36005,
'Kings': 36047,
'New York': 36061,
'Queens': 36081,
'Richmond': 36085}
hop_ne_trim = hop_ne_trim.set_index('FIPS')
# Active case counts are determined for the NYC boroughs
for i in ['Bronx', 'Kings', 'New York', 'Queens', 'Richmond']:
hop_ne_trim['Active Cases'][nycfips[i]] =\
boro[[i, 'prop']].prod(axis=1).sum()*factor
hop_ne_trim = hop_ne_trim.reset_index()
# %%
# Population estimates taken from the US census available at
# https://www2.census.gov/programs-surveys/popest/datasets/
# 2010-2019/counties/totals/
# Filepath will vary by user
pops_path = '/Users/ryantaggard/Desktop/COVID-19/Current County Status Map/'\
'co-est2019-alldata.csv'
pops = pd.read_csv(pops_path, encoding='Latin-1')
pops = pops[['STATE', 'COUNTY', 'STNAME', 'CTYNAME', 'POPESTIMATE2019']]
pops = pops[pops['COUNTY'] != 0]
pops['STATE'] = pops['STATE'].astype(str)
pops['COUNTY'] = pops['COUNTY'].astype(str)
# %% Get FIPS codes from state and county IDs
pops.STATE = pops.STATE.apply(to_len, args=(2,))
pops.COUNTY = pops.COUNTY.apply(to_len, args=(3,))
pops['FIPS'] = pops['STATE'] + pops['COUNTY']
# %% Add initial zeros to fips codes without them
hop_ne_trim
hop_ne_trim.FIPS = hop_ne_trim.FIPS.astype(str)
hop_ne_trim.FIPS = hop_ne_trim.FIPS.str.replace(r'.', '')
hop_ne_trim.FIPS = hop_ne_trim.FIPS.map(lambda x: str(x)[:-1])
hop_ne_trim.FIPS = hop_ne_trim.FIPS.apply(to_len, args=(5,))
# %% Merge census and active case data
hop_ne_trim = hop_ne_trim.set_index('FIPS')
pops = pops.set_index('FIPS')
merged = hop_ne_trim.join(pops)
merged = merged.dropna()
# %% Calculate active cases per million
merged['per_mm'] = merged['Active Cases']/merged['POPESTIMATE2019']*1000000
merged['Status'] = merged[['per_mm', 'Province_State']]\
.apply(lambda x: to_status(*x), axis=1)
# Rename columns and round case numbers to clean up data
merged = merged.rename(columns={'Admin2': 'County',
'Province_State': 'State',
'POPESTIMATE2019': 'Population',
'per_mm': 'Active Cases per Million'})
merged = merged[['State', 'County', 'Active Cases',
'Population', 'Active Cases per Million',
'Status']].dropna()
merged['Active Cases per Million'] =\
np.round(merged['Active Cases per Million'])
merged['Active Cases per Million'] = merged['Active Cases per Million'].astype(int)
# %% Get full name to display in map
merged['Full Name'] = merged['County'] + ', ' + merged['State']
# %% Map colors
palette = ['#00bbb8', '#ffbc35', '#ec001f', '#0073e7', '#005dbb', '#064789']
# %% Adding dummy values makes the folgreening labels show even when VT has
# no counties at these levels
save = merged
save.loc[12, 'Status'] = '800+ Active Cases per Million (VT)'
save.loc[13, 'Status'] = '400-799 Active Cases per Million (VT)'
# %% Creates the map
warning = 'Note: Vermont counties are provided for<br>informational purposes '\
'only as Vermont<br>is not subject to this policy. Instead it is<br>'\
'monitored with separate metrics.'
save = merged.rename(columns={'Status': warning}).reset_index()
order = {warning: [green, yellow, red,
green_vt, yellow_vt, red_vt]}
fig = px.choropleth_mapbox(save, geojson=counties, locations='FIPS',
color=warning,
center={"lat": 41.0458, "lon": -75.2479},
color_discrete_sequence=palette,
opacity=1,
title='VT Neighbors',
category_orders=order,
mapbox_style="carto-positron", zoom=4,
hover_name='Full Name',
hover_data={'Active Cases per Million': True,
'FIPS': False,
warning: False})
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.update_layout(legend=dict(y=0.9))
pio.write_html(fig, file='index.html', auto_open=True)
save = save.rename(columns={warning: 'Status'})