Skip to content

Commit 22d7e89

Browse files
committed
Patch: Update GBR weighting scheme and add new tests.
1 parent c097f0c commit 22d7e89

13 files changed

+309
-114
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Suggests:
1515
testthat (>= 0.10.0),
1616
knitr (>= 1.12.3)
1717
VignetteBuilder: knitr
18-
Date: 2017-03-06
18+
Date: 2017-03-22
1919
Version: 1.0.0
2020
License: Apache License 2.0 | file LICENSE
2121
Copyright: Copyright (C) 2017 Google, Inc.

R/constants.R

+5-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ kStratum <- "stratum"
5151
kSpendChange <- ".spend"
5252
# SetIncrementalResponse<-
5353
kResponse <- ".response"
54-
54+
# DoGBRROASAnalysis, EstimateIncremental.
55+
kGbr1MinObs <- 4L
56+
kIncrCost <- "incr.cost"
57+
kWeight <- "weight"
58+
# Group ID indicating a geo to be omitted.
5559
kExcludeGeoGroup <- 0L
5660

5761
# IDs for various models.

R/dogbrroasanalysis.R

+31-6
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,36 @@ DoGBRROASAnalysis.GBRROASAnalysisData <- function(obj, ...) {
5050

5151
# Find ad spend differential for each geo.
5252
incremental.cost <- EstimateIncremental(obj, variable="cost")
53-
# Augment the original object with these two columns.
54-
obj[["incr.cost"]] <- incremental.cost
55-
obj[["lmweights"]] <- ComputeLinearModelWeights(obj[["resp.pre"]])
53+
# Augment the original object with columns kIncrCost & kWeight.
54+
obj[[kIncrCost]] <- incremental.cost
55+
#
56+
lmweights <- ComputeLinearModelWeights(obj[[kRespPre]])
57+
power <- attr(lmweights, "power")
58+
missing.weights <- structure(is.na(lmweights), names=obj[[kGeo]])
59+
if (any(missing.weights)) {
60+
warning(FormatText(missing.weights,
61+
"$N geo{|s} ($x) ha{s|ve} zero pretest response"))
62+
}
63+
obj[[kWeight]] <- lmweights
64+
data <- obj[!missing.weights, , drop=FALSE]
65+
n.data.points <- nrow(data)
66+
# Cannot fit if there are fewer than kGbr1MinObs geos available with weights.
67+
assert_that(n.data.points >= kGbr1MinObs,
68+
msg=Message(FormatText(n.data.points,
69+
"Cannot fit GBR model:",
70+
" {no|only one|only $N} data point{|s} available")))
71+
# Cannot fit if there are no test or control geos available.
72+
assert_that(sum(data[["control"]]) > 0,
73+
msg=Message(FormatText(n.data.points,
74+
"Cannot fit GBR model: no control geos to fit")))
75+
assert_that(sum(!data[["control"]]) > 0,
76+
msg=Message(FormatText(n.data.points,
77+
"Cannot fit GBR model: no treatment geos to fit")))
5678
# Fit the iROAS model.
57-
lmfit <- lm(resp.test ~ resp.pre + incr.cost, data=obj,
58-
weights=obj[["lmweights"]])
79+
model <- as.formula(sprintf("%s ~ %s + %s", kRespTest, kRespPre, kIncrCost))
80+
lmfit <- lm(model, data=data, weights=data[[kWeight]])
81+
assert_that(!anyNA(coef(lmfit)),
82+
msg=Message("GBR model fit failed: NA in coefficient"))
5983

6084
.PosteriorBeta2Tail <- function(x) {
6185
# Calculates the posterior of Pr(beta2 > x | data) with the uniform prior
@@ -88,8 +112,9 @@ DoGBRROASAnalysis.GBRROASAnalysisData <- function(obj, ...) {
88112
return(prob)
89113
}
90114
obj.result <- list(lmfit=lmfit,
91-
data=obj,
115+
data=data,
92116
iroas.post=.PosteriorBeta2Tail,
117+
power=power,
93118
model=kGBRModel1)
94119
class(obj.result) <- c(kClassName, class(obj.result))
95120
return(obj.result)

R/estimateincremental.R

+26-15
Original file line numberDiff line numberDiff line change
@@ -67,28 +67,39 @@ EstimateIncremental.GBRROASAnalysisData <- function(obj, variable=
6767
# For some experiments, ad spend in all of the control geos should
6868
# be zero in the pre and test periods. But, there may be small
6969
# amounts of ad spend in some of these geos.
70+
7071
variable <- match.arg(variable)
7172
df.control <- obj[obj[[kControl]], , drop=FALSE]
72-
pre <- switch(variable, response="resp.pre", cost="cost.pre")
73-
post <- switch(variable, response="resp.test", cost="cost.test")
73+
pre <- switch(variable, response=kRespPre, cost=kCostPre)
74+
post <- switch(variable, response=kRespTest, cost=kCostTest)
7475
lmweights <- ComputeLinearModelWeights(df.control[[pre]])
75-
counterfactual.model <- lm(post ~ pre,
76-
data = list(
77-
post=df.control[[post]],
78-
pre=df.control[[pre]]),
79-
weights=lmweights)
80-
# If all pre-test variables are constants, estimation is not possible.
81-
if (is.na(coef(counterfactual.model)["pre"])) {
82-
# In case estimation is not possible, the counterfactual is simply 'pre'.
83-
counterfactual <- obj[[pre]]
76+
df.control[[kWeight]] <- lmweights
77+
missing.weights <- is.na(lmweights)
78+
if (any(missing.weights)) {
79+
df.control <- df.control[!missing.weights, , drop=FALSE]
80+
}
81+
if (nrow(df.control) >= kGbr1MinObs - 1L) {
82+
counterfactual.model <- lm(post ~ pre,
83+
data = list(
84+
post=df.control[[post]],
85+
pre=df.control[[pre]]),
86+
weights=df.control[[kWeight]])
87+
# If all pre-test variables are constants, estimation is not possible.
88+
if (is.na(coef(counterfactual.model)["pre"])) {
89+
# In case estimation is not possible, the counterfactual is simply 'pre'.
90+
counterfactual <- obj[[pre]]
91+
} else {
92+
# Compute the counterfactual: what 'post' would have been, given 'pre'.
93+
counterfactual <- predict(counterfactual.model,
94+
newdata = list(pre=obj[[pre]]))
95+
}
8496
} else {
85-
# Compute the counterfactual: what 'post' would have been, given 'pre'.
86-
counterfactual <- predict(counterfactual.model,
87-
newdata = list(pre=obj[[pre]]))
97+
# Estimation is not possible; the counterfactual is simply 'pre'.
98+
counterfactual <- obj[[pre]]
8899
}
89100
incremental <- (obj[[post]] - counterfactual)
90101
# For Control geos, set the differential to zero.
91-
is.control <- obj[["control"]]
102+
is.control <- obj[[kControl]]
92103
incremental[is.control] <- 0
93104
return(incremental)
94105
}

R/mapgeogroups.R

+6-2
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,12 @@
8585

8686
geo.group.column <- obj[[kGeoGroup]]
8787
all.old.group.ids <- sort(na.omit(unique(geo.group.column)))
88-
n.old.groups <- max(all.old.group.ids)
89-
if (length(all.old.group.ids) == 0 || n.old.groups == 0) {
88+
if (length(all.old.group.ids) == 0) {
89+
n.old.groups <- 0L
90+
} else {
91+
n.old.groups <- max(all.old.group.ids)
92+
}
93+
if (n.old.groups == 0) {
9094
assert_that(length(group.map) == 0,
9195
msg=Message("Nothing to map: no geo group numbers in the data"))
9296
return(obj)

R/utils_analysis.R

+11-10
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,33 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
# Utilities for supporting analyses.
16-
17-
ComputeLinearModelWeights <- function(response, epsilon=0.001, power=2.0) {
15+
ComputeLinearModelWeights <- function(response, power=2.0) {
1816
# Computes the weights to be used in the weighted linear model used to
1917
# estimate ROAS.
2018
#
2119
# Args:
2220
# response: a vector of the response in the pre period. Length equal to the
2321
# number of geos. Must be all nonnegative.
24-
# epsilon: (number) a small positive increment to add to
25-
# 'response' to avoid 1 / 0.
2622
# power: default power to which 'response' is raised to. Can be overridden
27-
# by setting the global option 'geoexperiments.gbr1.weight.power'.
23+
# by setting the global option 'geoexperiments.gbr1.weight.power'. Must be
24+
# nonnegative.
2825
#
2926
# Returns:
30-
# A vector of weights of the same length as 'response'.
27+
# A vector of weights of the same length as 'response'. Data points with
28+
# response == 0 have weight NA (indicating these need to be taken special
29+
# care of). There is an attribute 'power' corresponding to the exponent
30+
# used.
3131
#
3232
# Notes:
3333
# If a component of 'response' tends to infinity, the
3434
# corresponding weight tends to 0 (i.e., the corresponding data
3535
# point is ignored).
3636

3737
assert_that(is.numeric(response), !anyNA(response), all(response >= 0))
38-
assert_that(is.real.number(epsilon), epsilon > 0)
3938
power <- getOption("geoexperiments.gbr1.weight.power", default=power)
40-
assert_that(is.numeric(power), !is.na(power))
41-
weights = 1 / (epsilon + response^power)
39+
assert_that(is.numeric(power), !is.na(power), power >= 0)
40+
weights <- 1 / (response^power)
41+
weights[response == 0] <- NA_real_
42+
attr(weights, "power") <- power
4243
return(weights)
4344
}

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ in the source package).
2929
[Measuring Ad Effectiveness Using Geo Experiments](http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/38355.pdf) 2011.
3030

3131
[2] Kerman, J., Wang, P. and Vaver, J.
32-
Measuring Ad Effectiveness Using Geo Experiments in a Time-Based Regression Framework. 2017.
32+
Estimating Ad Effectiveness Using Geo Experiments in a Time-Based Regression Framework. 2017.
94 Bytes
Binary file not shown.
-213 Bytes
Binary file not shown.

man/ComputeLinearModelWeights.Rd

+6-6
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,21 @@
77
\title{Computes the weights to be used in the weighted linear model used to
88
estimate ROAS.}
99
\usage{
10-
ComputeLinearModelWeights(response, epsilon = 0.001, power = 2)
10+
ComputeLinearModelWeights(response, power = 2)
1111
}
1212
\arguments{
1313
\item{response}{a vector of the response in the pre period. Length equal to
1414
the number of geos. Must be all nonnegative.}
1515

16-
\item{epsilon}{(number) a small positive increment to add to 'response' to
17-
avoid 1 / 0.}
18-
1916
\item{power}{default power to which 'response' is raised to. Can be
2017
overridden by setting the global option
21-
'geoexperiments.gbr1.weight.power'.}
18+
'geoexperiments.gbr1.weight.power'. Must be nonnegative.}
2219
}
2320
\value{
24-
A vector of weights of the same length as 'response'.
21+
A vector of weights of the same length as 'response'. Data points
22+
with response == 0 have weight NA (indicating these need to be taken
23+
special care of). There is an attribute 'power' corresponding to the
24+
exponent used.
2525
}
2626
\description{
2727
Computes the weights to be used in the weighted linear model used to

0 commit comments

Comments
 (0)