This repository has been archived by the owner on Feb 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathacquire.py
61 lines (43 loc) · 1.56 KB
/
acquire.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Z0096
import pandas as pd
from os.path import isfile
from env import db_connect
#################### Acquire telco_churn Data ####################
def new_data():
'''
Uses login credentials and db_connect function from env.py to query
Codeup database servers for all data contained in telco_churn
database tables
To be used in conjunction with get_data function defined in
acquire.py
'''
# MySQL Query for all data
query = '''
SELECT *
FROM customers
JOIN contract_types USING(contract_type_id)
JOIN internet_service_types USING(internet_service_type_id)
JOIN payment_types USING(payment_type_id);'''
# Use pandas to read into DataFrame
df = pd.read_sql(query, db_connect('telco_churn'))
return df
def get_data(cache=False):
'''
Obtains data from telco_churn database on Codeup server and checks
if CSV cached version is stored for offline and quicker access, if
not it creates one then reads into DataFrame
Used in conjunction with db_connect function defined in env.py
and new_data function defined in acquire.py
cache=False default behavior, set to true to force write new CSV
file
'''
# check if cached CSV file already exists or if forced cache true
if cache == True or isfile('telco_churn.csv') == False:
# read in new data into DataFrame and output to CSV file
df = new_data()
df.to_csv('telco_churn.csv')
df.drop_duplicates()
else:
df = pd.read_csv('telco_churn.csv')
df.drop_duplicates()
return df