millett-a
diff --git a/‎DESCRIPTION
+1-1 b/‎DESCRIPTION
+1-1
diff --git a/‎R/constants.R
+5-1 b/‎R/constants.R
+5-1
diff --git a/‎R/dogbrroasanalysis.R
+31-6 b/‎R/dogbrroasanalysis.R
+31-6
diff --git a/‎R/estimateincremental.R
+26-15 b/‎R/estimateincremental.R
+26-15
diff --git a/‎R/mapgeogroups.R
+6-2 b/‎R/mapgeogroups.R
+6-2
diff --git a/‎R/utils_analysis.R
+11-10 b/‎R/utils_analysis.R
+11-10
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎inst/doc/GeoexperimentsResearch-manual.pdf
94 Bytes b/‎inst/doc/GeoexperimentsResearch-manual.pdf
94 Bytes
diff --git a/‎inst/doc/GeoexperimentsResearch-vignette.pdf
-213 Bytes b/‎inst/doc/GeoexperimentsResearch-vignette.pdf
-213 Bytes
diff --git a/‎man/ComputeLinearModelWeights.Rd
+6-6 b/‎man/ComputeLinearModelWeights.Rd
+6-6
@@ -15,7 +15,7 @@ Suggests:
     testthat (>= 0.10.0),
     knitr (>= 1.12.3)
 VignetteBuilder: knitr
-Date: 2017-03-06
+Date: 2017-03-22
 Version: 1.0.0
 License: Apache License 2.0 | file LICENSE
 Copyright: Copyright (C) 2017 Google, Inc.
 
@@ -51,7 +51,11 @@ kStratum <- "stratum"
 kSpendChange <- ".spend"
 # SetIncrementalResponse<-
 kResponse <- ".response"
-
+# DoGBRROASAnalysis, EstimateIncremental.
+kGbr1MinObs <- 4L
+kIncrCost <- "incr.cost"
+kWeight <- "weight"
+# Group ID indicating a geo to be omitted.
 kExcludeGeoGroup <- 0L
 
 # IDs for various models.
 
@@ -50,12 +50,36 @@ DoGBRROASAnalysis.GBRROASAnalysisData <- function(obj, ...) {
 
   # Find ad spend differential for each geo.
   incremental.cost <- EstimateIncremental(obj, variable="cost")
-  # Augment the original object with these two columns.
-  obj[["incr.cost"]] <- incremental.cost
-  obj[["lmweights"]] <- ComputeLinearModelWeights(obj[["resp.pre"]])
+  # Augment the original object with columns kIncrCost & kWeight.
+  obj[[kIncrCost]] <- incremental.cost
+  #
+  lmweights <- ComputeLinearModelWeights(obj[[kRespPre]])
+  power <- attr(lmweights, "power")
+  missing.weights <- structure(is.na(lmweights), names=obj[[kGeo]])
+  if (any(missing.weights)) {
+    warning(FormatText(missing.weights,
+                       "$N geo{|s} ($x) ha{s|ve} zero pretest response"))
+  }
+  obj[[kWeight]] <- lmweights
+  data <- obj[!missing.weights, , drop=FALSE]
+  n.data.points <- nrow(data)
+  # Cannot fit if there are fewer than kGbr1MinObs geos available with weights.
+  assert_that(n.data.points >= kGbr1MinObs,
+              msg=Message(FormatText(n.data.points,
+                  "Cannot fit GBR model:",
+                  " {no|only one|only $N} data point{|s} available")))
+  # Cannot fit if there are no test or control geos available.
+  assert_that(sum(data[["control"]]) > 0,
+              msg=Message(FormatText(n.data.points,
+                  "Cannot fit GBR model: no control geos to fit")))
+  assert_that(sum(!data[["control"]]) > 0,
+              msg=Message(FormatText(n.data.points,
+                  "Cannot fit GBR model: no treatment geos to fit")))
   # Fit the iROAS model.
-  lmfit <- lm(resp.test ~ resp.pre + incr.cost, data=obj,
-              weights=obj[["lmweights"]])
+  model <- as.formula(sprintf("%s ~ %s + %s", kRespTest, kRespPre, kIncrCost))
+  lmfit <- lm(model, data=data, weights=data[[kWeight]])
+  assert_that(!anyNA(coef(lmfit)),
+              msg=Message("GBR model fit failed: NA in coefficient"))
 
   .PosteriorBeta2Tail <- function(x) {
     # Calculates the posterior of Pr(beta2 > x | data) with the uniform prior
@@ -88,8 +112,9 @@ DoGBRROASAnalysis.GBRROASAnalysisData <- function(obj, ...) {
     return(prob)
   }
   obj.result <- list(lmfit=lmfit,
-                     data=obj,
+                     data=data,
                      iroas.post=.PosteriorBeta2Tail,
+                     power=power,
                      model=kGBRModel1)
   class(obj.result) <- c(kClassName, class(obj.result))
   return(obj.result)
 
@@ -67,28 +67,39 @@ EstimateIncremental.GBRROASAnalysisData <- function(obj, variable=
   # For some experiments, ad spend in all of the control geos should
   # be zero in the pre and test periods. But, there may be small
   # amounts of ad spend in some of these geos.
+
   variable <- match.arg(variable)
   df.control <- obj[obj[[kControl]], , drop=FALSE]
-  pre <- switch(variable, response="resp.pre", cost="cost.pre")
-  post <- switch(variable, response="resp.test", cost="cost.test")
+  pre <- switch(variable, response=kRespPre, cost=kCostPre)
+  post <- switch(variable, response=kRespTest, cost=kCostTest)
   lmweights <- ComputeLinearModelWeights(df.control[[pre]])
-  counterfactual.model <- lm(post ~ pre,
-                             data = list(
-                                 post=df.control[[post]],
-                                 pre=df.control[[pre]]),
-                             weights=lmweights)
-  # If all pre-test variables are constants, estimation is not possible.
-  if (is.na(coef(counterfactual.model)["pre"])) {
-    # In case estimation is not possible, the counterfactual is simply 'pre'.
-    counterfactual <- obj[[pre]]
+  df.control[[kWeight]] <- lmweights
+  missing.weights <- is.na(lmweights)
+  if (any(missing.weights)) {
+    df.control <- df.control[!missing.weights, , drop=FALSE]
+  }
+  if (nrow(df.control) >= kGbr1MinObs - 1L) {
+    counterfactual.model <- lm(post ~ pre,
+                               data = list(
+                                   post=df.control[[post]],
+                                   pre=df.control[[pre]]),
+                               weights=df.control[[kWeight]])
+    # If all pre-test variables are constants, estimation is not possible.
+    if (is.na(coef(counterfactual.model)["pre"])) {
+      # In case estimation is not possible, the counterfactual is simply 'pre'.
+      counterfactual <- obj[[pre]]
+    } else {
+      # Compute the counterfactual: what 'post' would have been, given 'pre'.
+      counterfactual <- predict(counterfactual.model,
+                                newdata = list(pre=obj[[pre]]))
+    }
   } else {
-    # Compute the counterfactual: what 'post' would have been, given 'pre'.
-    counterfactual <- predict(counterfactual.model,
-                              newdata = list(pre=obj[[pre]]))
+    # Estimation is not possible; the counterfactual is simply 'pre'.
+    counterfactual <- obj[[pre]]
   }
   incremental <- (obj[[post]] - counterfactual)
   # For Control geos, set the differential to zero.
-  is.control <- obj[["control"]]
+  is.control <- obj[[kControl]]
   incremental[is.control] <- 0
   return(incremental)
 }
@@ -85,8 +85,12 @@
 
   geo.group.column <- obj[[kGeoGroup]]
   all.old.group.ids <- sort(na.omit(unique(geo.group.column)))
-  n.old.groups <- max(all.old.group.ids)
-  if (length(all.old.group.ids) == 0 || n.old.groups == 0) {
+  if (length(all.old.group.ids) == 0) {
+    n.old.groups <- 0L
+  } else {
+    n.old.groups <- max(all.old.group.ids)
+  }
+  if (n.old.groups == 0) {
     assert_that(length(group.map) == 0,
                 msg=Message("Nothing to map: no geo group numbers in the data"))
     return(obj)
 
@@ -12,32 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Utilities for supporting analyses.
-
-ComputeLinearModelWeights <- function(response, epsilon=0.001, power=2.0) {
+ComputeLinearModelWeights <- function(response, power=2.0) {
   # Computes the weights to be used in the weighted linear model used to
   # estimate ROAS.
   #
   # Args:
   #   response: a vector of the response in the pre period. Length equal to the
   #     number of geos. Must be all nonnegative.
-  #   epsilon: (number) a small positive increment to add to
-  #     'response' to avoid 1 / 0.
   #   power: default power to which 'response' is raised to. Can be overridden
-  #     by setting the global option 'geoexperiments.gbr1.weight.power'.
+  #     by setting the global option 'geoexperiments.gbr1.weight.power'. Must be
+  #     nonnegative.
   #
   # Returns:
-  #   A vector of weights of the same length as 'response'.
+  #   A vector of weights of the same length as 'response'. Data points with
+  #   response == 0 have weight NA (indicating these need to be taken special
+  #   care of). There is an attribute 'power' corresponding to the exponent
+  #   used.
   #
   # Notes:
   #    If a component of 'response' tends to infinity, the
   #    corresponding weight tends to 0 (i.e., the corresponding data
   #    point is ignored).
 
   assert_that(is.numeric(response), !anyNA(response), all(response >= 0))
-  assert_that(is.real.number(epsilon), epsilon > 0)
   power <- getOption("geoexperiments.gbr1.weight.power", default=power)
-  assert_that(is.numeric(power), !is.na(power))
-  weights = 1 / (epsilon + response^power)
+  assert_that(is.numeric(power), !is.na(power), power >= 0)
+  weights <- 1 / (response^power)
+  weights[response == 0] <- NA_real_
+  attr(weights, "power") <- power
   return(weights)
 }
@@ -29,4 +29,4 @@ in the source package).
     [Measuring Ad Effectiveness Using Geo Experiments](http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/38355.pdf) 2011.
 
 [2] Kerman, J., Wang, P. and Vaver, J.
-    Measuring Ad Effectiveness Using Geo Experiments in a Time-Based Regression Framework. 2017.
+    Estimating Ad Effectiveness Using Geo Experiments in a Time-Based Regression Framework. 2017.
@@ -7,21 +7,21 @@
 \title{Computes the weights to be used in the weighted linear model used to
 estimate ROAS.}
 \usage{
-ComputeLinearModelWeights(response, epsilon = 0.001, power = 2)
+ComputeLinearModelWeights(response, power = 2)
 }
 \arguments{
 \item{response}{a vector of the response in the pre period. Length equal to
 the number of geos. Must be all nonnegative.}
 
-\item{epsilon}{(number) a small positive increment to add to 'response' to
-avoid 1 / 0.}
-
 \item{power}{default power to which 'response' is raised to. Can be
 overridden by setting the global option
-'geoexperiments.gbr1.weight.power'.}
+'geoexperiments.gbr1.weight.power'. Must be nonnegative.}
 }
 \value{
-A vector of weights of the same length as 'response'.
+A vector of weights of the same length as 'response'. Data points
+  with response == 0 have weight NA (indicating these need to be taken
+  special care of). There is an attribute 'power' corresponding to the
+  exponent used.
 }
 \description{
 Computes the weights to be used in the weighted linear model used to