-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathsimple.py
65 lines (43 loc) · 1.62 KB
/
simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import dask.dataframe as dd
taxes = dd.read_csv("FY2016-STC-Category-Table.csv", sep="\t")
taxes["Amount"] = taxes["Amount"].str.replace(",", "").replace("X", np.nan).astype(float)
taxes["Tax_Type"] = taxes["Tax_Type"].astype("category").cat.as_known() # XXX
pivot = taxes.pivot_table(index="Geo_Name", columns="Tax_Type", values="Amount")
has_property_info = pivot[~pivot["Property Taxes"].isna()].index
pivot_clean = pivot.loc[has_property_info]
frac_property = pivot_clean["Property Taxes"] / pivot_clean["Total Taxes"]
print(frac_property)
frac_property.visualize(filename="10-property.svg", rankdir="LR")#, collapse_output=True)
frac_property_result = frac_property.compute()
frac_property.sort_values()
frac_property.compute().sort_values()
pivot_clean["frac_property"] = pivot_clean["Property Taxes"] / pivot_clean["Total Taxes"]
no_property_states = taxes[taxes["Amount"] == "X"].Geo_Name
has_property_info = taxes[
(taxes["Tax_Type"] == "Property Taxes") &
(taxes["Amount"] != "X")][["Geo_Name"]]
taxes_clean = taxes.join(
has_property_info.set_index("Geo_Name"),
on="Geo_Name",
how="inner"
)
taxes.pivot
taxes_clean
tc.pivot()
no_property_states
tc = taxes.compute()
pivot = tc.pivot(index="Geo_Name", columns="Tax_Type", values="Amount")
pivot.loc["Texas"]
taxes.head()
type(taxes)
type(tc)
taxes
taxes.iloc[:,0].compute()
taxes_clean.visualize(filename="x.svg")
list(no_property_states)
no_property_states
taxes_clean.Geo_Name.unique().compute()
taxes_clean = taxes[~taxes.Geo_Name.isin(no_property_states)]
taxes_clean = taxes[~taxes.Geo_Name.isin(list(no_property_states))]
pivot