This repository has been archived by the owner on Feb 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvisualize.py
258 lines (202 loc) · 9.05 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# Z0096
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil
#################### Visualize telco_churn Data ####################
def big_heat(df):
'''
Use seaborn to create heatmap with coeffecient annotations to
visualize correlation between all chosen variables
'''
n_vars = len(df.columns.to_list())
# Set up large figure size for easy legibility
plt.figure(figsize=(n_vars + 5, n_vars + 1))
# assign pd.corr() output to variable and create a mask to remove
# redundancy from graphic
corr = df.corr()
mask = np.triu(corr, k=0)
# define custom cmap for heatmap where the darker the reds the more
# positive and vice versa for blues
cmap = sns.diverging_palette(h_neg=220, h_pos=13, sep=25, as_cmap=True)
# create graphic with zero centered cmap and annotations set to one
# significant figure
sns.heatmap(corr, cmap=cmap, center=0, annot=True, fmt=".1g", square=True,
mask=mask, cbar_kws={
'shrink':0.5,
'aspect':50,
'use_gridspec':False,
'anchor':(-0.75,0.75)
})
# format xticks for improved legibility and clarity
plt.xticks(ha='right', va='top', rotation=35, rotation_mode='anchor')
plt.title('Correlation Heatmap')
plt.show()
def heater(df):
'''
Creates heatmap with annotated coefficients of all current
DataFrame variables relative to target variable 'churn'
Darker Reds indicate stronger positive
Darker Blues indicate stronger negative
'''
# define variable for corr matrix
heat_churn = df.corr()['churn'][:-1]
# set figure size
fig, ax = plt.subplots(figsize=(30, 1))
# define cmap for chosen color palette
cmap = sns.diverging_palette(h_neg=220, h_pos=13, sep=25, as_cmap=True)
# plot matrix turned to DataFrame
sns.heatmap(heat_churn.to_frame().T, cmap=cmap, center=0,
annot=True, fmt=".1g", cbar=False, square=True)
# improve readability of xticks, remove churn ytick
plt.xticks(ha='right', va='top', rotation=35, rotation_mode='anchor')
plt.yticks(ticks=[])
# set title and print graphic
plt.title('Correlation to Churn\n')
plt.show()
def hist_vars(df):
'''
Creates figure and subplots of seaborn histplots for each variable in
DataFrame with a hue for churn
Figure size and rows automatically set based on number of variables chosen
'''
# set number of n_rows, n_cols, and n_plot
n_cols = 4
n_rows = ceil(len(df.columns[:-1].to_list()) / n_cols)
n_plot = 0
# set figure size based on number of plots
plt.figure(figsize=((n_cols * 7), (n_rows * 6)))
# loop for each column in DataFrame to create histplot
for col in df.columns[:-1]:
n_plot = n_plot + 1
plt.subplot(n_rows, n_cols, n_plot)
if len(df[col].value_counts()) == 2:
sns.histplot(data=df, x=df[col], hue=df.churn)
plt.xticks(ticks=[0,1], labels=[False, True])
plt.xlabel(' ')
plt.title(col)
else:
sns.histplot(data=df, x=df[col], hue=df.churn)
plt.xlabel(' ')
plt.title(col)
plt.legend(['Churn', ' Retain'], bbox_to_anchor=(.7,1))
plt.suptitle('Distribution of Variables')
plt.show()
def internet_violin(df):
'''
Creates violinplot figure with two sublots for monthly charges between
first year and post year customers with a hue for churn
'''
first_year = df[df.tenure <= 12]
first_year_count = first_year.shape[0]
post_year = df[df.tenure > 12]
post_year_count = post_year.shape[0]
first_year_net = first_year[first_year.has_internet == True]
post_year_net = post_year[post_year.has_internet == True]
plt.figure(figsize=(28, 14))
plt.subplot(2, 1, 1)
sns.violinplot(x=first_year_net['fiber'],
y=first_year_net['monthly_charges'], hue=first_year_net['churn'],
linewidth=5)
plt.xticks(ticks=[])
plt.xlabel('')
plt.ylabel('\nMonthly Charges $(USD)$\n')
plt.ylim((0,125))
plt.title(f'First Year Customers : {first_year_count}\n')
plt.subplot(2, 1, 2)
sns.violinplot(x=post_year_net['fiber'],
y=post_year_net['monthly_charges'], hue=post_year_net['churn'],
linewidth=5)
plt.xticks(ticks=[0,1], labels=['DSL', 'Fiber'])
plt.xlabel('Internet Service Type')
plt.ylabel('\nMonthly Charges $(USD)$\n')
plt.title(f'Post Year Customers : {post_year_count}\n')
plt.legend([])
plt.ylim((0,125))
plt.suptitle(' Monthly Charges by Internet Service Type\n')
plt.tight_layout()
plt.show()
def internet_breakdown(df):
'''
Prints report of fiber customers separated into first year and post year,
then shows the total number for each who churned along with the percentage
'''
first_year = df[df.tenure <= 12]
first_year_count = first_year.shape[0]
post_year = df[df.tenure > 12]
post_year_count = post_year.shape[0]
first_year_net = first_year[first_year.has_internet == True]
post_year_net = post_year[post_year.has_internet == True]
net_cust = df[df.has_internet == 1].shape[0]
year_net = first_year_net.shape[0]
post_year = post_year_net.shape[0]
year_fiber = first_year_net[first_year_net.fiber == 1].shape[0]
year_fiber_churn = first_year_net[(first_year_net.fiber == 1) &
(first_year_net.churn == 1)].shape[0]
post_fiber = post_year_net[post_year_net.fiber == 1].shape[0]
post_fiber_churn = post_year_net[(post_year_net.fiber == 1) &
(post_year_net.churn == 1)].shape[0]
print(f'''
Total Fiber Customers: {year_fiber + post_fiber}
+ ------------------------------------------ +
| |
| First Year Fiber Customers: {year_fiber:<13}|
| |
| Churned: {year_fiber_churn:<13}|
| Percent: {(year_fiber_churn / year_fiber):<13.2%}|
| |
| Post Year Fiber Customers: {post_fiber:<13}|
| |
| Churned: {post_fiber_churn:<13}|
| Percent: {(post_fiber_churn / post_fiber):<13.2%}|
| |
+ ------------------------------------------ +
''')
def internet_contract_compare(df):
'''
Prints report of fiber customers broken down by first year and post year,
then ouputs a table with their counts, mean charges, and the percentage of
those customers under service contract terms
'''
first_year = df[(df.fiber == 1) & (df.tenure <=12)]
post_year = df[(df.fiber == 1) & (df.tenure > 12)]
first_year_churn = first_year[first_year.churn == 1].no_contract.value_counts()
x = (first_year_churn[1] / first_year_churn.sum())
first_year_retain = first_year[first_year.churn == 0].no_contract.value_counts()
y = (first_year_retain[1] / first_year_retain.sum())
post_year_churn = post_year[post_year.churn == 1].no_contract.value_counts()
x2 = (post_year_churn[1] / post_year_churn.sum())
post_year_retain = post_year[post_year.churn == 0].no_contract.value_counts()
y2 = (post_year_retain[1] / post_year_retain.sum())
total_fiber = df[df.fiber == 1]
total_contract = total_fiber[total_fiber.no_contract == 0]
contract_percent = total_contract.shape[0] / total_fiber.shape[0]
print(f'''
Fiber Customer Comparisons
+----------------------------------------+
| |
| First Year Customers: {first_year.shape[0]:<13}|
| |
| Churned: {first_year_churn.sum():<13}|
| Mean Charges: ${first_year[first_year.churn == 1].monthly_charges.mean():<12.2f}|
| Under Contract: {1 - x:<13.2%}|
| |
| Retained: {first_year_retain.sum():<13}|
| Mean Charges: ${first_year[first_year.churn == 0].monthly_charges.mean():<12.2f}|
| Under Contracted: {1 - y:<13.2%}|
| |
| Post Year Customers: {post_year.shape[0]:<13}|
| |
| Churned: {post_year_churn.sum():<13}|
| Mean Charges: ${post_year[post_year.churn == 1].monthly_charges.mean():<12.2f}|
| Under Contract: {1 - x2:<13.2%}|
| |
| Retained: {post_year_retain.sum():<13}|
| Mean Charges: ${post_year[post_year.churn == 0].monthly_charges.mean():<12.2f}|
| Under Contracted: {1 - y2:<13.2%}|
| |
| Total Under Contract: {contract_percent:<13.2%}|
| |
+----------------------------------------+
''')