-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
95 lines (73 loc) · 4.04 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Libraries
import pandas as pd
import datetime as dt
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from sklearn.preprocessing import MinMaxScaler
def outlier_thresholds(dataframe, variable):
quartile1 = dataframe[variable].quantile(0.01)
quartile3 = dataframe[variable].quantile(0.99)
interquantile_range = quartile3 - quartile1
up_limit = quartile3 + 1.5 * interquantile_range
low_limit = quartile1 - 1.5 * interquantile_range
return low_limit, up_limit
def replace_with_thresholds(dataframe, variable):
low_limit, up_limit = outlier_thresholds(dataframe, variable)
dataframe.loc[(dataframe[variable] < low_limit), variable] = round(low_limit,0)
dataframe.loc[(dataframe[variable] > up_limit), variable] = round(up_limit,0)
def create_cltv_df(dataframe):
# Data Preparation
columns = ["order_num_total_ever_online", "order_num_total_ever_offline", "customer_value_total_ever_offline","customer_value_total_ever_online"]
for col in columns:
replace_with_thresholds(dataframe, col)
dataframe["order_num_total"] = dataframe["order_num_total_ever_online"] + dataframe["order_num_total_ever_offline"]
dataframe["customer_value_total"] = dataframe["customer_value_total_ever_offline"] + dataframe["customer_value_total_ever_online"]
dataframe = dataframe[~(dataframe["customer_value_total"] == 0) | (dataframe["order_num_total"] == 0)]
date_columns = dataframe.columns[dataframe.columns.str.contains("date")]
dataframe[date_columns] = dataframe[date_columns].apply(pd.to_datetime)
# Creating CLTV data structure
dataframe["last_order_date"].max() # 2021-05-30
analysis_date = dt.datetime(2021, 6, 1)
cltv_df = pd.DataFrame()
cltv_df["customer_id"] = dataframe["master_id"]
cltv_df["recency_cltv_weekly"] = ((dataframe["last_order_date"] - dataframe["first_order_date"]).astype('timedelta64[D]')) / 7
cltv_df["T_weekly"] = ((analysis_date - dataframe["first_order_date"]).astype('timedelta64[D]')) / 7
cltv_df["frequency"] = dataframe["order_num_total"]
cltv_df["monetary_cltv_avg"] = dataframe["customer_value_total"] / dataframe["order_num_total"]
cltv_df = cltv_df[(cltv_df['frequency'] > 1)]
# BG-NBD Model
bgf = BetaGeoFitter(penalizer_coef=0.001)
bgf.fit(cltv_df['frequency'],
cltv_df['recency_cltv_weekly'],
cltv_df['T_weekly'])
cltv_df["exp_sales_3_month"] = bgf.predict(4 * 3,
cltv_df['frequency'],
cltv_df['recency_cltv_weekly'],
cltv_df['T_weekly'])
cltv_df["exp_sales_6_month"] = bgf.predict(4 * 6,
cltv_df['frequency'],
cltv_df['recency_cltv_weekly'],
cltv_df['T_weekly'])
# # Gamma-Gamma Model
ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(cltv_df['frequency'], cltv_df['monetary_cltv_avg'])
cltv_df["exp_average_value"] = ggf.conditional_expected_average_profit(cltv_df['frequency'],
cltv_df['monetary_cltv_avg'])
# CLTV Prediction
cltv = ggf.customer_lifetime_value(bgf,
cltv_df['frequency'],
cltv_df['recency_cltv_weekly'],
cltv_df['T_weekly'],
cltv_df['monetary_cltv_avg'],
time=6,
freq="W",
discount_rate=0.01)
cltv_df["cltv"] = cltv
# CLTV segmentation
cltv_df["cltv_segment"] = pd.qcut(cltv_df["cltv"], 4, labels=["D", "C", "B", "A"])
return cltv_df
if __name__ == "__main__":
df = pd.read_csv('data/flo_data_20k.csv')
cltv_df = create_cltv_df(df)
print(cltv_df)
cltv_df.to_csv('output/cltv_segments.csv', index=False)