-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathtraversing.py
65 lines (48 loc) · 1.55 KB
/
traversing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import numpy as np
df = pd.read_csv("../sec1-intro/yellow_tripdata_2020-01.csv.gz")
df.columns
df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"])
df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"])
df.tip_amount.value_counts()
df.iloc[0].iat[0]
df = df[(df.total_amount != 0)]
df_10 = df.sample(frac=0.1)
df_100 = df.sample(frac=0.01)
def get_tip_mean_explicit(df): # df_100: 10s
all_tips = 0
all_totals = 0
for i in range(len(df)):
row = df.iloc[i]
all_tips += row["tip_amount"]
all_totals += row["total_amount"]
return all_tips / all_totals
def get_tip_mean_iterrows(df): # df_100: 4s
all_tips = 0
all_totals = 0
for i, row in df.iterrows():
all_tips += row["tip_amount"]
all_totals += row["total_amount"]
return all_tips / all_totals
def get_tip_mean_itertuples(df): # 18 s
all_tips = 0
all_totals = 0
for my_tuple in df.itertuples():
all_tips += my_tuple.tip_amount
all_totals += my_tuple.total_amount
return all_tips / all_totals
def get_tip_mean_apply(df): # df_10: 9.42s
frac_tip = df.apply(
lambda row: row["tip_amount"] / row["total_amount"],
axis=1
)
return frac_tip.mean()
def get_tip_mean_apply2(df): # df_10: 14.9s
frac_tip = df.apply(
lambda row: row.tip_amount / row.total_amount,
axis=1
)
return frac_tip.mean()
def get_tip_mean_vector(df): # 32 ms
frac_tip = df["tip_amount"] / df["total_amount"]
return frac_tip.mean()