forked from darinkist/medium_article_robyn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfb_robyn.exec.R
176 lines (138 loc) · 9.94 KB
/
fb_robyn.exec.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#############################################################################################
#################### Facebook MMM Open Source 'Robyn' Beta - V21.0 ######################
#################### 2021-03-03 ######################
#############################################################################################
################################################################
#### set locale for non English R
Sys.setlocale("LC_TIME", "C")
################################################################
#### load libraries
## R version 4.0.3 (2020-10-10) ## Update to R version 4.0.3 to avoid potential errors
## RStudio version 1.2.1335
rm(list=ls()); gc()
## Please make sure to install all libraries before rurnning the scripts
library(data.table)
library(stringr)
library(lubridate)
library(doFuture)
library(doRNG)
library(foreach)
library(glmnet)
library(car)
library(StanHeaders)
library(prophet)
library(rstan)
library(ggplot2)
library(gridExtra)
library(grid)
library(ggpubr)
library(see)
library(PerformanceAnalytics)
library(nloptr)
library(minpack.lm)
library(rPref)
library(reticulate)
library(rstudioapi)
## please see https://rstudio.github.io/reticulate/index.html for info on installing reticulate
# conda_create("r-reticulate") # must run this line once
# conda_install("r-reticulate", "nevergrad", pip=TRUE) # must install nevergrad in conda before running Robyn
# use_python("/Users/gufengzhou/Library/r-miniconda/envs/r-reticulate/bin/python3.6") # in case nevergrad still can't be imported after installation, please locate your python file and run this line
use_condaenv("r-reticulate")
################################################################
#### load data & scripts
script_path <- str_sub(rstudioapi::getActiveDocumentContext()$path, start = 1, end = max(unlist(str_locate_all(rstudioapi::getActiveDocumentContext()$path, "/"))))
dt_input <- fread(paste0(script_path,'simulated_marketing_data.csv')) # input time series should be daily, weekly or monthly
dt_holidays <- fread(paste0(script_path,'holidays.csv')) # when using own holidays, please keep the header c("ds", "holiday", "country", "year")
source(paste(script_path, "fb_robyn.func.R", sep=""))
source(paste(script_path, "fb_robyn.optm.R", sep=""))
################################################################
#### set model input variables
set_country <- "DE" # only one country allowed once. Including national holidays for 59 countries, whose list can be found on our githut guide
set_dateVarName <- c("DATE") # date format must be "2020-01-01"
set_depVarName <- c("revenue") # there should be only one dependent variable
set_depVarType <- "revenue" # "revenue" or "conversion" are allowed
activate_prophet <- T # Turn on or off the Prophet feature
set_prophet <- c("trend", "season", "holiday") # "trend","season", "weekday", "holiday" are provided and case-sensitive. Recommended to at least keep Trend & Holidays
set_prophetVarSign <- c("default","default", "default") # c("default", "positive", and "negative"). Recommend as default. Must be same length as set_prophet
activate_baseline <- T
set_baseVarName <- c("competitor_sales") # typically competitors, price & promotion, temperature, unemployment rate etc
set_baseVarSign <- c("negative") # c("default", "positive", and "negative"), control the signs of coefficients for baseline variables
set_mediaVarName <- c("tv_S" ,"radio_S", "paid_search_S") # we recommend to use media exposure metrics like impressions, GRP etc for the model. If not applicable, use spend instead
set_mediaVarSign <- c("positive", "positive", "positive") # c("default", "positive", and "negative"), control the signs of coefficients for media variables
set_mediaSpendName <- c("tv_S" ,"radio_S", "paid_search_S") # spends must have same order and same length as set_mediaVarName
set_factorVarName <- c() # please specify which variable above should be factor, otherwise leave empty c()
################################################################
#### set global model parameters
## set cores for parallel computing
registerDoSEQ(); detectCores()
set_cores <- 6 # I am using 6 cores from 8 on my local machine. Use detectCores() to find out cores
## set rolling window start (only works for whole dataset for now)
set_trainStartDate <- unlist(dt_input[, lapply(.SD, function(x) as.character(min(x))), .SDcols= set_dateVarName])
## set model core features
adstock <- "geometric" # geometric or weibull. weibull is more flexible, yet has one more parameter and thus takes longer
set_iter <- 500 # number of allowed iterations per trial. 500 is recommended
set_hyperOptimAlgo <- "DiscreteOnePlusOne" # selected algorithm for Nevergrad, the gradient-free optimisation library https://facebookresearch.github.io/nevergrad/index.html
set_trial <- 40 # number of allowed iterations per trial. 40 is recommended without calibration, 100 with calibration.
## Time estimation: with geometric adstock, 500 iterations * 40 trials and 6 cores, it takes less than 1 hour. Weibull takes at least twice as much time.
## helper plots: set plot to TRUE for transformation examples
f.plotAdstockCurves(F) # adstock transformation example plot, helping you understand geometric/theta and weibull/shape/scale transformation
f.plotResponseCurves(F) # s-curve transformation example plot, helping you understand hill/alpha/gamma transformation
################################################################
#### tune channel hyperparameters bounds
#### Guidance to set hypereparameter bounds ####
## 1. get correct hyperparameter names:
local_name <- f.getHyperNames(); local_name # names in set_hyperBoundLocal must equal names in local_name, case sensitive
## 2. get guidance for setting hyperparameter bounds:
# For geometric adstock, use theta, alpha & gamma. For weibull adstock, use shape, scale, alpha, gamma
# theta: In geometric adstock, theta is decay rate. guideline for usual media genre: TV c(0.3, 0.8), OOH/Print/Radio c(0.1, 0.4), digital c(0, 0.3)
# shape: In weibull adstock, shape controls the decay shape. Recommended c(0.0001, 2). The larger, the more S-shape. The smaller, the more L-shape
# scale: In weibull adstock, scale controls the decay inflexion point. Very conservative recommended bounce c(0, 0.1), becausee scale can increase adstocking half-life greaetly
# alpha: In s-curve transformation with hill function, alpha controls the shape between exponential and s-shape. Recommended c(0.5, 3). The larger the alpha, the more S-shape. The smaller, the more C-shape
# gamma: In s-curve transformation with hill function, gamma controls the inflexion point. Recommended bounce c(0.3, 1). The larger the gamma, the later the inflection point in the response curve
## 3. set each hyperparameter bounds. They either contains two values e.g. c(0, 0.5), or only one value (in which case you've "fixed" that hyperparameter)
set_hyperBoundLocal <- list(
radio_S_alphas = c(0.5, 3),
radio_S_gammas = c(0.3, 1),
radio_S_thetas = c(0.1, 0.4),
paid_search_S_alphas = c(0.5, 3),
paid_search_S_gammas = c(0.3, 1),
paid_search_S_thetas = c(0.1, 0.4),
tv_S_alphas = c(0.5, 3),
tv_S_gammas = c(0.3, 1),
tv_S_thetas = c(0.3, 0.8)
)
################################################################
#### define ground truth (e.g. Geo test, FB Lift test, MTA etc.)
activate_calibration <- F # Switch to TRUE to calibrate model.
# set_lift <- data.table(channel = c("facebook_I", "tv_S", "facebook_I"),
# liftStartDate = as.Date(c("2018-05-01", "2017-11-27", "2018-07-01")),
# liftEndDate = as.Date(c("2018-06-10", "2017-12-03", "2018-07-20")),
# liftAbs = c(400000, 300000, 200000))
################################################################
#### Prepare input data
dt_mod <- f.inputWrangling()
################################################################
#### Run models
model_output_collect <- f.robyn(set_hyperBoundLocal
,optimizer_name = set_hyperOptimAlgo
,set_trial = set_trial
,set_cores = set_cores
,plot_folder = paste0(script_path,'../plots')) # please set your folder path to save plots. It ends without "/".
## reload old models from csv
# dt_hyppar_fixed <- fread("/Users/gufengzhou/Documents/GitHub/plots/2021-04-07 09.12/pareto_hyperparameters.csv") # load hyperparameter csv. Provide your own path.
# model_output_collect <- f.robyn.fixed(plot_folder = "~/Documents/GitHub/plots", dt_hyppar_fixed = dt_hyppar_fixed[solID == "2_16_5"]) # solID must be included in the csv
################################################################
#### Budget Allocator - Beta
## Budget allocator result requires further validation. Please use this result with caution.
## Please don't interpret budget allocation result if there's no satisfying MMM result
model_output_collect$allSolutions
optim_result <- f.budgetAllocator(modID = "3_30_1" # input one of the model IDs in model_output_collect$allSolutions to get optimisation result
,scenario = "max_historical_response" # c(max_historical_response, max_response_expected_spend)
#,expected_spend = 100000 # specify future spend volume. only applies when scenario = "max_response_expected_spend"
#,expected_spend_days = 90 # specify period for the future spend volumne in days. only applies when scenario = "max_response_expected_spend"
,channel_constr_low = c(0.7, 0.75, 0.60) # must be between 0.01-1 and has same length and order as set_mediaVarName
,channel_constr_up = c(1.2, 1.5, 1.5) # not recommended to 'exaggerate' upper bounds. 1.5 means channel budget can increase to 150% of current level
)