diff --git a/DESCRIPTION b/DESCRIPTION
index d8342f9..5cfb4fd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: dgo
Title: Dynamic Estimation of Group-Level Opinion
-Version: 0.2.11
-Date: 2017-10-26
+Version: 0.2.12
+Date: 2017-11-13
Description: Fit dynamic group-level IRT and MRP models from individual or
aggregated item response data. This package handles common preprocessing
tasks and extends functions for inspecting results, poststratification, and
diff --git a/Makefile b/Makefile
index 752c33f..318230f 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ else
R := R
endif
-all: clean docs data readme build check install
+all: clean docs data readme build check install site
quick: clean
diff --git a/NEWS.md b/NEWS.md
index 3923f0b..9431966 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,19 @@
+## dgo 0.2.12
+
+* Allow modeling of unobserved groups using aggregated data. The previous
+ behavior was to drop rows in `aggregate_data` indicating zero trials. (They
+ don't represent item responses.) Preserving them has the effect that
+ unobserved groups, defined partially or entirely by the values of the grouping
+ variables in zero-trial rows in `aggregate_data`, can be included in a model.
+* Fix an unexpected error when 1) `aggregate_data` is used without `item_data`,
+ 2) no demographic groups are specified via `group_names`, and 3) geographic
+ `modifier_data` is used.
+* Fix the check for missing `modifier_data`. Geographic `modifier_data` must
+ cover all combinations of the geo and time variables in the item response data
+ (individual or aggregated), but because of a bug in the validation of the
+ geographic data, this requirement was not always enforced. In some cases a
+ warning would appear instead of an error.
+
## dgo 0.2.11
* Add poststratification over posterior samples (closes #21).
diff --git a/R/restrict_input_data.r b/R/restrict_input_data.r
index a509b14..c871b27 100644
--- a/R/restrict_input_data.r
+++ b/R/restrict_input_data.r
@@ -63,7 +63,7 @@ restrict_modifier <- function(modifier_data, group_grid, ctrl) {
modifier_data <- modifier_data[geo_time_grid, nomatch = 0]
# confirm that modifier data covers all modeled geo and time
- missing_geo_time <- modifier_data[!geo_time_grid]
+ missing_geo_time <- geo_time_grid[!modifier_data]
if (nrow(missing_geo_time)) {
stop("Not all pairs of time periods and geographic areas are in ",
"modifier_data. ", nrow(missing_geo_time), " missing.")
@@ -122,11 +122,6 @@ restrict_aggregates <- function(aggregate_data, ctrl) {
stop("no rows in aggregate data remaining after subsetting to items ",
"in `aggregate_item_names`")
- aggregate_data <- aggregate_data[get("n_grp") > 0]
- if (!nrow(aggregate_data))
- stop("no rows in aggregate data remaining after dropping unobserved ",
- "group-item combinations")
-
extra_colnames <- setdiff(names(aggregate_data),
c(ctrl@geo_name, ctrl@time_name, ctrl@group_names, "item", "s_grp", "n_grp"))
if (length(extra_colnames)) {
diff --git a/R/shape_hierarchical.r b/R/shape_hierarchical.r
index eb2b084..0cdb629 100644
--- a/R/shape_hierarchical.r
+++ b/R/shape_hierarchical.r
@@ -10,8 +10,10 @@ shape_hierarchical_data <- function(modifier_data, modifier_names, group_grid_t,
hierarchical <- data.table::copy(modifier_data)
hierarchical <- drop_extra_cols(hierarchical, modifier_names, ctrl)
data.table::setkeyv(hierarchical, c(ctrl@geo_name, ctrl@time_name))
- unmodeled <- zero_unmodeled(hierarchical, modifier_names, group_grid_t, ctrl)
- hierarchical <- rbind(hierarchical, unmodeled)
+ if (length(ctrl@group_names)) {
+ unmodeled <- zero_unmodeled(hierarchical, modifier_names, group_grid_t, ctrl)
+ hierarchical <- rbind(hierarchical, unmodeled)
+ }
zz <- create_zz(hierarchical, modifier_names, ctrl)
return(zz)
}
@@ -40,7 +42,8 @@ zero_unmodeled <- function(hierarchical, modifier_names, group_grid_t, ctrl) {
paste0(x, unique(group_grid_t[[x]]))[-1]
}))
unmodeled_frame <- expand.grid(c(list(unmodeled_param_levels,
- ctrl@time_filter), rep(list(0L), length(modifier_names))))
+ ctrl@time_filter), rep(list(0L), length(modifier_names))),
+ stringsAsFactors = FALSE)
unmodeled_frame <- setNames(unmodeled_frame, c(ctrl@geo_name, ctrl@time_name,
modifier_names))
data.table::setDT(unmodeled_frame, key = c(ctrl@geo_name, ctrl@time_name))
diff --git a/README.Rmd b/README.Rmd
index 88dc94c..2d084a2 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -1,4 +1,5 @@
---
+title: 'dgo: Dynamic Estimation of Group-Level Opinion'
output: github_document
---
[![Build Status](https://travis-ci.org/jamesdunham/dgo.svg?branch=master)](https://travis-ci.org/jamesdunham/dgo)
@@ -7,29 +8,29 @@ output: github_document
# Introduction
-dgo is an R package for the dynamic estimation of group-level opinion. The
-package can be used to estimate subpopulation groups' average latent
-conservatism (or other latent trait) from individuals' responses to dichotomous
-questions using a Bayesian group-level IRT approach developed by [Caughey and
-Warshaw
-2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html)
-that models latent traits at the level of demographic and/or geographic groups
-rather than individuals. This approach uses a hierarchical model to borrow
-strength cross-sectionally and dynamic linear models to do so across time. The
-group-level estimates can be weighted to generate estimates for geographic
-units, such as states.
-
-dgo can also be used to estimate smoothed estimates of subpopulation groups'
-average responses on individual survey questions using a dynamic multi-level
-regression and poststratification (MRP) model ([Park, Gelman, and Bafumi
+dgo is an R package for the dynamic estimation of group-level public opinion.
+You can use the package to estimate latent trait means in subpopulations from
+survey data. For example, dgo can estimate the average policy liberalism in each
+American state over time among Democrats, Independents, and Republicans, given
+their answers to survey questions about policy proposals.
+
+dgo accomplishes this using a Bayesian group-level IRT approach developed by
+[Caughey and Warshaw
+2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html).
+It models latent traits at the level of demographic and geographic groups rather
+than individuals. It uses a hierarchical model to borrow strength
+cross-sectionally and dynamic linear models to do so across time.
+
+The package can also be used to estimate smoothed estimates of subpopulations'
+average responses to single survey items, using a dynamic multi-level regression
+and poststratification (MRP) model ([Park, Gelman, and Bafumi
2004](http://stat.columbia.edu/~gelman/research/published/StateOpinionsNationalPolls.050712.dkp.pdf)).
-For instance, it could be used to estimate public opinion in each state on
+For instance, you can use dgo to estimate public opinion in each state on
same-sex marriage or the Affordable Care Act.
This model opens up new areas of research on historical public opinion in the
-United States at the subnational level. It also enables scholars of comparative
-politics to estimate dynamic models of public opinion opinion at the country or
-subnational level.
+United States at the subnational level. It also allows scholars of comparative
+politics to estimate dynamic cross-national models of public opinion.
```{r, knitr-options, echo = FALSE}
# rmarkdown::render("README.Rmd")
@@ -67,7 +68,7 @@ If you don't have already have RStan, follow its
Load the package and set RStan's recommended options for a local, multicore
machine with excess RAM:
-```{r, result = 'hide'}
+```{r, result = 'hide', message = FALSE}
library(dgo)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
diff --git a/README.md b/README.md
index aee9baf..29b5041 100644
--- a/README.md
+++ b/README.md
@@ -1,48 +1,73 @@
-
-[![Build Status](https://travis-ci.org/jamesdunham/dgo.svg?branch=master)](https://travis-ci.org/jamesdunham/dgo) [![Build status](https://ci.appveyor.com/api/projects/status/1ta36kmoqen98k87?svg=true)](https://ci.appveyor.com/project/jamesdunham/dgo) [![codecov](https://codecov.io/gh/jamesdunham/dgo/branch/master/graph/badge.svg)](https://codecov.io/gh/jamesdunham/dgo)
-
-Introduction
-============
-
-dgo is an R package for the dynamic estimation of group-level opinion. The package can be used to estimate subpopulation groups' average latent conservatism (or other latent trait) from individuals' responses to dichotomous questions using a Bayesian group-level IRT approach developed by [Caughey and Warshaw 2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html) that models latent traits at the level of demographic and/or geographic groups rather than individuals. This approach uses a hierarchical model to borrow strength cross-sectionally and dynamic linear models to do so across time. The group-level estimates can be weighted to generate estimates for geographic units, such as states.
-
-dgo can also be used to estimate smoothed estimates of subpopulation groups' average responses on individual survey questions using a dynamic multi-level regression and poststratification (MRP) model ([Park, Gelman, and Bafumi 2004](http://stat.columbia.edu/~gelman/research/published/StateOpinionsNationalPolls.050712.dkp.pdf)). For instance, it could be used to estimate public opinion in each state on same-sex marriage or the Affordable Care Act.
-
-This model opens up new areas of research on historical public opinion in the United States at the subnational level. It also enables scholars of comparative politics to estimate dynamic models of public opinion opinion at the country or subnational level.
-
-Installation
-============
-
-dgo can be installed from [CRAN](https://CRAN.R-project.org/package=dgo):
+dgo: Dynamic Estimation of Group-Level Opinion
+================
+
+[![Build
+Status](https://travis-ci.org/jamesdunham/dgo.svg?branch=master)](https://travis-ci.org/jamesdunham/dgo)
+[![Build
+status](https://ci.appveyor.com/api/projects/status/1ta36kmoqen98k87?svg=true)](https://ci.appveyor.com/project/jamesdunham/dgo)
+[![codecov](https://codecov.io/gh/jamesdunham/dgo/branch/master/graph/badge.svg)](https://codecov.io/gh/jamesdunham/dgo)
+
+# Introduction
+
+dgo is an R package for the dynamic estimation of group-level public
+opinion. You can use the package to estimate latent trait means in
+subpopulations from survey data. For example, dgo can estimate the
+average policy liberalism in each American state over time among
+Democrats, Independents, and Republicans, given their answers to survey
+questions about policy proposals.
+
+dgo accomplishes this using a Bayesian group-level IRT approach
+developed by [Caughey and Warshaw
+2015](http://pan.oxfordjournals.org/content/early/2015/02/04/pan.mpu021.full.pdf+html).
+It models latent traits at the level of demographic and geographic
+groups rather than individuals. It uses a hierarchical model to borrow
+strength cross-sectionally and dynamic linear models to do so across
+time.
+
+The package can also be used to estimate smoothed estimates of
+subpopulations’ average responses to single survey items, using a
+dynamic multi-level regression and poststratification (MRP) model
+([Park, Gelman, and Bafumi
+2004](http://stat.columbia.edu/~gelman/research/published/StateOpinionsNationalPolls.050712.dkp.pdf)).
+For instance, you can use dgo to estimate public opinion in each state
+on same-sex marriage or the Affordable Care Act.
+
+This model opens up new areas of research on historical public opinion
+in the United States at the subnational level. It also allows scholars
+of comparative politics to estimate dynamic cross-national models of
+public opinion.
+
+# Installation
+
+dgo can be installed from
+[CRAN](https://CRAN.R-project.org/package=dgo):
``` r
install.packages("dgo")
```
-Or get the latest version from [GitHub](https://github.com/jamesdunham/dgo) using [devtools](https://github.com/hadley/devtools/):
+Or get the latest version from
+[GitHub](https://github.com/jamesdunham/dgo) using
+[devtools](https://github.com/hadley/devtools/):
``` r
if (!require(devtools, quietly = TRUE)) install.packages("devtools")
devtools::install_github("jamesdunham/dgo")
```
-dgo requires a working installation of [RStan](http://mc-stan.org/interfaces/rstan.html). If you don't have already have RStan, follow its "[Getting Started](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started)" guide.
+dgo requires a working installation of
+[RStan](http://mc-stan.org/interfaces/rstan.html). If you don’t have
+already have RStan, follow its “[Getting
+Started](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started)”
+guide.
-Usage
-=====
+# Usage
-Load the package and set RStan's recommended options for a local, multicore machine with excess RAM:
+Load the package and set RStan’s recommended options for a local,
+multicore machine with excess RAM:
``` r
library(dgo)
-#> Loading required package: dgodata
-#> Loading required package: rstan
-#> Loading required package: ggplot2
-#> Loading required package: StanHeaders
-#> rstan (Version 2.16.2, packaged: 2017-07-03 09:24:58 UTC, GitRev: 2e1f913d3ca3)
-#> For execution on a local, multicore CPU with excess RAM we recommend calling
-#> rstan_options(auto_write = TRUE)
-#> options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
```
@@ -50,24 +75,44 @@ options(mc.cores = parallel::detectCores())
The minimal workflow from raw data to estimation is:
1. shape input data using the `shape()` function; and
-2. pass the result to the `dgirt()` function to estimate a latent trait (e.g., conservatism) or `dgmrp()` function to estimate opinion on a single survey question.
-
-Troubleshooting
-===============
-
-Please [report issues](https://github.com/jamesdunham/dgo/issues) that you encounter.
-
-- OS X only: RStan creates temporary files during estimation in a location given by `tempdir()`, typically an arbitrary location in `/var/folders`. If a model runs for days, these files can be cleaned up while still needed, which induces an error. A good solution is to set a safer path for temporary files, using an environment variable checked at session startup. For help setting environment variables, see the Stack Overflow question [here](https://stackoverflow.com/questions/17107206/change-temporary-directory). Confirm the new path before starting your model run by restarting R and checking the output from `tempdir()`.
-
-- Models fitted before October 2016 (specifically < [\#8e6a2cf](https://github.com/jamesdunham/dgo/commit/8e6a2cfbe00b2cd4a908b3067241e06124d143cd)) using dgirt are not fully compatible with dgo. Their contents can be extracted without using dgo, however, with the `$` indexing operator. For example: `as.data.frame(dgirtfit_object$stan.cmb)`.
-
-- Calling `dgirt()` or `dgmrp()` can generate [warnings](http://mc-stan.org/misc/warnings#compiler-warnings) during model compilation. These are safe to ignore, or can be suppressed by following the linked instructions.
-
-Contributing and citing
-=======================
-
-dgo is under development and we welcome [suggestions](https://github.com/jamesdunham/dgo/issues).
+2. pass the result to the `dgirt()` function to estimate a latent trait
+ (e.g., conservatism) or `dgmrp()` function to estimate opinion on a
+ single survey question.
+
+# Troubleshooting
+
+Please [report issues](https://github.com/jamesdunham/dgo/issues) that
+you encounter.
+
+ - OS X only: RStan creates temporary files during estimation in a
+ location given by `tempdir()`, typically an arbitrary location in
+ `/var/folders`. If a model runs for days, these files can be cleaned
+ up while still needed, which induces an error. A good solution is to
+ set a safer path for temporary files, using an environment variable
+ checked at session startup. For help setting environment variables,
+ see the Stack Overflow question
+ [here](https://stackoverflow.com/questions/17107206/change-temporary-directory).
+ Confirm the new path before starting your model run by restarting R
+ and checking the output from `tempdir()`.
+
+ - Models fitted before October 2016 (specifically \<
+ [\#8e6a2cf](https://github.com/jamesdunham/dgo/commit/8e6a2cfbe00b2cd4a908b3067241e06124d143cd))
+ using dgirt are not fully compatible with dgo. Their contents can be
+ extracted without using dgo, however, with the `$` indexing
+ operator. For example: `as.data.frame(dgirtfit_object$stan.cmb)`.
+
+ - Calling `dgirt()` or `dgmrp()` can generate
+ [warnings](http://mc-stan.org/misc/warnings#compiler-warnings)
+ during model compilation. These are safe to ignore, or can be
+ suppressed by following the linked instructions.
+
+# Contributing and citing
+
+dgo is under development and we welcome
+[suggestions](https://github.com/jamesdunham/dgo/issues).
The package citation is:
-Dunham, James, Devin Caughey, and Christopher Warshaw. 2017. dgo: Dynamic Estimation of Group-level Opinion. R package.
This vignette demonstrates estimation of public attitudes toward abortion from responses to a single survey item, using the dynamic multi-level regression and post-stratification (MRP) model implemented in dgmrp()
.
shape()
prepares input data for use with the modeling functions dgirt()
and dgmrp()
. Here we use the included opinion
dataset.
dgirt_in_abortion <- shape(opinion, item_names = "abortion", time_name = "year",
- geo_name = "state", group_names = "race3", geo_filter = c("CA", "GA", "LA",
- "MA"), id_vars = "source")
-#> Applying restrictions, pass 1...
-#> Dropped 5 rows for missingness in covariates
-#> Dropped 633 rows for lacking item responses
-#> Applying restrictions, pass 2...
-#> No changes
+ geo_name = "state", group_names = "race3", geo_filter = c("CA", "GA", "LA",
+ "MA"), id_vars = "source")
+#> Applying restrictions, pass 1...
+#> Dropped 5 rows for missingness in covariates
+#> Dropped 633 rows for lacking item responses
+#> Applying restrictions, pass 2...
+#> No changes
In this call to shape()
we specified:
abortion
);Notice that we named only one of these variables defining respondent groups using the group_names
argument. The geo_name
argument always takes the variable giving respondents’ local geographic area; it will be modeled differently.
Using the argument geo_filter
, we subset the input data to the given values of the geo_name
variable. And with the id_vars
argument, we named an identfier that we’d like to keep in the processed data. (Other unused variables will be dropped.)
summary()
gives a high-level description of the result.
summary(dgirt_in_abortion)
-#> Items:
-#> [1] "abortion"
-#> Respondents:
-#> 23,007 in `item_data`
-#> Grouping variables:
-#> [1] "year" "state" "race3"
-#> Time periods:
-#> [1] 2006 2007 2008 2009 2010
-#> Local geographic areas:
-#> [1] "CA" "GA" "LA" "MA"
-#> Hierarchical parameters:
-#> [1] "GA" "LA" "MA" "race3other" "race3white"
-#> Modifiers of hierarchical parameters:
-#> NULL
-#> Constants:
-#> Q T P N G H D
-#> 1 5 5 60 12 1 1
+summary(dgirt_in_abortion)
+#> Items:
+#> [1] "abortion"
+#> Respondents:
+#> 23,007 in `item_data`
+#> Grouping variables:
+#> [1] "year" "state" "race3"
+#> Time periods:
+#> [1] 2006 2007 2008 2009 2010
+#> Local geographic areas:
+#> [1] "CA" "GA" "LA" "MA"
+#> Hierarchical parameters:
+#> [1] "GA" "LA" "MA" "race3other" "race3white"
+#> Modifiers of hierarchical parameters:
+#> NULL
+#> Constants:
+#> Q T P N G H D
+#> 1 5 5 60 12 1 1
get_n()
and get_item_n()
give response counts.
get_n(dgirt_in_abortion, by = "state")
-#> state n
-#> 1: CA 14248
-#> 2: GA 4547
-#> 3: LA 1658
-#> 4: MA 2554
-get_item_n(dgirt_in_abortion, by = "year")
-#> year abortion
-#> 1: 2006 5275
-#> 2: 2007 1690
-#> 3: 2008 4697
-#> 4: 2009 2141
-#> 5: 2010 9204
+get_n(dgirt_in_abortion, by = "state")
+#> state n
+#> 1: CA 14248
+#> 2: GA 4547
+#> 3: LA 1658
+#> 4: MA 2554
+
+get_item_n(dgirt_in_abortion, by = "year")
+#> year abortion
+#> 1: 2006 5275
+#> 2: 2007 1690
+#> 3: 2008 4697
+#> 4: 2009 2141
+#> 5: 2010 9204
+dgmrp()
fits a dynamic multi-level regression and post-stratification (MRP) model to data processed by shape()
. Here, we’ll use it to estimate public attitudes toward abortion over time, for the groups defined by state
and race3
. (Specifically, by their Cartesian product.)
Under the hood, dgmrp()
uses RStan for MCMC sampling, and arguments can be passed to RStan’s stan()
via the ...
argument of dgmrp()
. This is almost always desirable. Here, we specify the number of sampler iterations, chains, and cores.
dgmrp_out_abortion <- dgmrp(dgirt_in_abortion, iter = 1500, chains = 4, cores =
- 4, seed = 42)
The model results are held in a dgmrp_fit
object. Methods from RStan like extract()
are available if needed because dgmrp_fit
is a subclass of stanfit
. But dgo provides its own methods for typical post-estimation tasks.
dgmrp_out_abortion <- dgmrp(dgirt_in_abortion, iter = 1500, chains = 4, cores =
-
-
-Work with results
+ 4, seed = 42)
+The model results are held in a dgmrp_fit
object. Methods from RStan like extract()
are available if needed because dgmrp_fit
is a subclass of stanfit
. But dgo provides its own methods for typical post-estimation tasks.
+Work with results
For a high-level summary of the result, use summary()
.
-summary(dgmrp_out_abortion)
-#> dgirt samples from 4 chains of 1500 iterations, 750 warmup, thinned every 1
-#> Drawn Mon May 29 23:27:34 2017
-#> Package version 0.2.10
-#> Model version 2017_01_04_singleissue
-#> 117 parameters; 60 theta_bars (year state race3)
-#> 5 periods 2006 to 2010
-#>
-#> n_eff
-#> Min. 1st Qu. Median Mean 3rd Qu. Max.
-#> 95.68 242.50 451.87 685.85 927.30 3000.00
-#>
-#> Rhat
-#> Min. 1st Qu. Median Mean 3rd Qu. Max.
-#> 0.9993 1.0028 1.0068 1.0081 1.0126 1.0406
-#>
-#> Elapsed time
-#> chain warmup sample total
-#> 1: 1 15S 16S 31S
-#> 2: 2 15S 11S 26S
-#> 3: 3 15S 19S 34S
-#> 4: 4 16S 10S 26S
+
+summary(dgmrp_out_abortion)
+#> dgirt samples from 4 chains of 1500 iterations, 750 warmup, thinned every 1
+#> Drawn Mon May 29 23:27:34 2017
+#> Package version 0.2.10
+#> Model version 2017_01_04_singleissue
+#> 117 parameters; 60 theta_bars (year state race3)
+#> 5 periods 2006 to 2010
+#>
+#> n_eff
+#> Min. 1st Qu. Median Mean 3rd Qu. Max.
+#> 95.68 242.50 451.87 685.85 927.30 3000.00
+#>
+#> Rhat
+#> Min. 1st Qu. Median Mean 3rd Qu. Max.
+#> 0.9993 1.0028 1.0068 1.0081 1.0126 1.0406
+#>
+#> Elapsed time
+#> chain warmup sample total
+#> 1: 1 15S 16S 31S
+#> 2: 2 15S 11S 26S
+#> 3: 3 15S 19S 34S
+#> 4: 4 16S 10S 26S
To apply scalar functions to posterior samples, use summarize()
. The default output gives summary statistics for the model’s theta_bar
parameters, which represent group means. These are indexed by time (year
) and group, where groups are again defined by local geographic area (state
) and any other respondent characteristics (race3
).
-head(summarize(dgmrp_out_abortion))
-#> param state race3 year mean sd median q_025
-#> 1: theta_bar CA black 2006 0.7739283 0.02098019 0.7749083 0.7307567
-#> 2: theta_bar CA black 2007 0.7980027 0.02771553 0.7979378 0.7439328
-#> 3: theta_bar CA black 2008 0.7232980 0.02362116 0.7231930 0.6786121
-#> 4: theta_bar CA black 2009 0.6863666 0.02128237 0.6863458 0.6463628
-#> 5: theta_bar CA black 2010 0.7407779 0.01682742 0.7414667 0.7058706
-#> 6: theta_bar CA other 2006 0.7347199 0.02322850 0.7354365 0.6872140
-#> q_975
-#> 1: 0.8144084
-#> 2: 0.8517811
-#> 3: 0.7693334
-#> 4: 0.7279652
-#> 5: 0.7717651
-#> 6: 0.7790182
+
+head(summarize(dgmrp_out_abortion))
+#> param state race3 year mean sd median q_025
+#> 1: theta_bar CA black 2006 0.7739283 0.02098019 0.7749083 0.7307567
+#> 2: theta_bar CA black 2007 0.7980027 0.02771553 0.7979378 0.7439328
+#> 3: theta_bar CA black 2008 0.7232980 0.02362116 0.7231930 0.6786121
+#> 4: theta_bar CA black 2009 0.6863666 0.02128237 0.6863458 0.6463628
+#> 5: theta_bar CA black 2010 0.7407779 0.01682742 0.7414667 0.7058706
+#> 6: theta_bar CA other 2006 0.7347199 0.02322850 0.7354365 0.6872140
+#> q_975
+#> 1: 0.8144084
+#> 2: 0.8517811
+#> 3: 0.7693334
+#> 4: 0.7279652
+#> 5: 0.7717651
+#> 6: 0.7790182
Alternatively, summarize()
can apply arbitrary functions to posterior samples for whatever parameter is given by its pars
argument.
-summarize(dgmrp_out_abortion, pars = "xi", funs = "var")
-#> param year var
-#> 1: xi 2006 0.01814362
-#> 2: xi 2007 0.05026942
-#> 3: xi 2008 0.05606188
-#> 4: xi 2009 0.04857038
-#> 5: xi 2010 0.04149793
+
+#> param year var
+#> 1: xi 2006 0.01814362
+#> 2: xi 2007 0.05026942
+#> 3: xi 2008 0.05606188
+#> 4: xi 2009 0.04857038
+#> 5: xi 2010 0.04149793
To access posterior samples in tabular form use as.data.frame()
. By default, this method returns post-warmup samples for the theta_bar
parameters, but like other methods takes a pars
argument.
-head(as.data.frame(dgmrp_out_abortion))
-#> param state race3 year iteration value
-#> 1: theta_bar CA black 2006 1 0.7661626
-#> 2: theta_bar CA black 2006 2 0.7690362
-#> 3: theta_bar CA black 2006 3 0.7656257
-#> 4: theta_bar CA black 2006 4 0.7935372
-#> 5: theta_bar CA black 2006 5 0.7544080
-#> 6: theta_bar CA black 2006 6 0.7819740
+
+head(as.data.frame(dgmrp_out_abortion))
+#> param state race3 year iteration value
+#> 1: theta_bar CA black 2006 1 0.7661626
+#> 2: theta_bar CA black 2006 2 0.7690362
+#> 3: theta_bar CA black 2006 3 0.7656257
+#> 4: theta_bar CA black 2006 4 0.7935372
+#> 5: theta_bar CA black 2006 5 0.7544080
+#> 6: theta_bar CA black 2006 6 0.7819740
To poststratify the results use poststratify()
. Here, we use the group population proportions bundled as annual_state_race_targets
to reweight and aggregate estimates to strata defined by state-years.
-poststratify(dgmrp_out_abortion, annual_state_race_targets, strata_names =
- c("state", "year"), aggregated_names = "race3")
-#> state year value
-#> 1: CA 2006 0.7187353
-#> 2: CA 2007 0.7469064
-#> 3: CA 2008 0.6562966
-#> 4: CA 2009 0.6272075
-#> 5: CA 2010 0.6754691
-#> 6: GA 2006 0.6339750
-#> 7: GA 2007 0.6225482
-#> 8: GA 2008 0.5232615
-#> 9: GA 2009 0.5095145
-#> 10: GA 2010 0.5705449
-#> 11: LA 2006 0.5266416
-#> 12: LA 2007 0.4769044
-#> 13: LA 2008 0.4142786
-#> 14: LA 2009 0.3985367
-#> 15: LA 2010 0.4229707
-#> 16: MA 2006 0.7629194
-#> 17: MA 2007 0.8099707
-#> 18: MA 2008 0.7058450
-#> 19: MA 2009 0.6624888
-#> 20: MA 2010 0.7078342
+
+poststratify(dgmrp_out_abortion, annual_state_race_targets, strata_names =
+
+ c("state", "year"), aggregated_names = "race3")
+#> state year value
+#> 1: CA 2006 0.7187353
+#> 2: CA 2007 0.7469064
+#> 3: CA 2008 0.6562966
+#> 4: CA 2009 0.6272075
+#> 5: CA 2010 0.6754691
+#> 6: GA 2006 0.6339750
+#> 7: GA 2007 0.6225482
+#> 8: GA 2008 0.5232615
+#> 9: GA 2009 0.5095145
+#> 10: GA 2010 0.5705449
+#> 11: LA 2006 0.5266416
+#> 12: LA 2007 0.4769044
+#> 13: LA 2008 0.4142786
+#> 14: LA 2009 0.3985367
+#> 15: LA 2010 0.4229707
+#> 16: MA 2006 0.7629194
+#> 17: MA 2007 0.8099707
+#> 18: MA 2008 0.7058450
+#> 19: MA 2009 0.6624888
+#> 20: MA 2010 0.7078342
To plot the results use dgirt_plot()
. This method plots summaries of posterior samples by time period. By default, it shows a 95% credible interval around posterior medians for the theta_bar
parameters, for each local geographic area. Here we omit the CIs.
-dgirt_plot(dgmrp_out_abortion, y_min = NULL, y_max = NULL)
+
dgirt_plot()
can also plot the data.frame
output from poststratify()
, given arguments that identify the relevant variables. Below, we aggregate over the demographic grouping variable race3
, resulting in a data.frame
of estimates by state-year.
-ps <- poststratify(dgmrp_out_abortion, annual_state_race_targets, strata_names =
- c("state", "year"), aggregated_names = "race3")
-head(ps)
-#> state year value
-#> 1: CA 2006 0.7187353
-#> 2: CA 2007 0.7469064
-#> 3: CA 2008 0.6562966
-#> 4: CA 2009 0.6272075
-#> 5: CA 2010 0.6754691
-#> 6: GA 2006 0.6339750
-dgirt_plot(ps, group_names = NULL, time_name = "year", geo_name = "state")
+
+ c("state", "year"), aggregated_names = "race3")
+
+head(ps)
+#> state year value
+#> 1: CA 2006 0.7187353
+#> 2: CA 2007 0.7469064
+#> 3: CA 2008 0.6562966
+#> 4: CA 2009 0.6272075
+#> 5: CA 2010 0.6754691
+#> 6: GA 2006 0.6339750
+
In the call to dgirt_plot()
, we passed the names of the state
and year
variables. The group_names
argument was then NULL
, because there were no grouping variables left after we aggregated over race3
.
-
+
This vignette demonstrates estimation of latent policy liberalism from individuals’ responses to five survey items, using the Bayesian group-level IRT model implemented in dgirt()
.
shape()
prepares input data for use with the modeling functions dgirt()
and dgmrp()
. Here we use the included opinion
dataset.
dgirt_in_liberalism <- shape(opinion, item_names = c("abortion",
- "affirmative_action","stemcell_research" , "gaymarriage_amendment",
- "partialbirth_abortion") , time_name = "year", geo_name = "state",
- group_names = "race3", geo_filter = c("CA", "GA", "LA", "MA"))
-#> Applying restrictions, pass 1...
-#> Dropped 5 rows for missingness in covariates
-#> Dropped 8 rows for lacking item responses
-#> Applying restrictions, pass 2...
-#> No changes
+ "affirmative_action","stemcell_research" , "gaymarriage_amendment",
+ "partialbirth_abortion") , time_name = "year", geo_name = "state",
+ group_names = "race3", geo_filter = c("CA", "GA", "LA", "MA"))
+#> Applying restrictions, pass 1...
+#> Dropped 5 rows for missingness in covariates
+#> Dropped 8 rows for lacking item responses
+#> Applying restrictions, pass 2...
+#> No changes
In this call to shape()
we specified:
item_names
;Notice that we named only one of these variables defining respondent groups using the group_names
argument. The geo_name
argument always takes the variable giving respondents’ local geographic area; it will be modeled differently.
Using the argument geo_filter
, we subset the input data to the given values of the geo_name
variable. And with the id_vars
argument, we named an identfier that we’d like to keep in the processed data. (Other unused variables will be dropped.)
Important: the dgirt()
model assumes consistent coding of the polarity of item responses for identification. This is already true for the opinion
data. Typically it requires manual recoding.
summary()
gives a high-level description of the result.
summary(dgirt_in_liberalism)
-#> Items:
-#> [1] "abortion" "affirmative_action" "gaymarriage_amendment"
-#> [4] "partialbirth_abortion" "stemcell_research"
-#> Respondents:
-#> 23,632 in `item_data`
-#> Grouping variables:
-#> [1] "year" "state" "race3"
-#> Time periods:
-#> [1] 2006 2007 2008 2009 2010
-#> Local geographic areas:
-#> [1] "CA" "GA" "LA" "MA"
-#> Hierarchical parameters:
-#> [1] "GA" "LA" "MA" "race3other" "race3white"
-#> Modifiers of hierarchical parameters:
-#> NULL
-#> Constants:
-#> Q T P N G H D
-#> 5 5 5 300 12 1 1
+summary(dgirt_in_liberalism)
+#> Items:
+#> [1] "abortion" "affirmative_action" "gaymarriage_amendment"
+#> [4] "partialbirth_abortion" "stemcell_research"
+#> Respondents:
+#> 23,632 in `item_data`
+#> Grouping variables:
+#> [1] "year" "state" "race3"
+#> Time periods:
+#> [1] 2006 2007 2008 2009 2010
+#> Local geographic areas:
+#> [1] "CA" "GA" "LA" "MA"
+#> Hierarchical parameters:
+#> [1] "GA" "LA" "MA" "race3other" "race3white"
+#> Modifiers of hierarchical parameters:
+#> NULL
+#> Constants:
+#> Q T P N G H D
+#> 5 5 5 300 12 1 1
get_n()
and get_item_n()
give response counts.
get_n(dgirt_in_liberalism, by = "state")
-#> state n
-#> 1: CA 14655
-#> 2: GA 4667
-#> 3: LA 1693
-#> 4: MA 2617
-get_item_n(dgirt_in_liberalism, by = "year")
-#> year abortion affirmative_action stemcell_research
-#> 1: 2006 5275 4750 2483
-#> 2: 2007 1690 1557 1705
-#> 3: 2008 4697 4704 4002
-#> 4: 2009 2141 2147 0
-#> 5: 2010 9204 9241 9146
-#> gaymarriage_amendment partialbirth_abortion
-#> 1: 2642 5064
-#> 2: 1163 1684
-#> 3: 4265 0
-#> 4: 0 0
-#> 5: 9226 0
+get_n(dgirt_in_liberalism, by = "state")
+#> state n
+#> 1: CA 14655
+#> 2: GA 4667
+#> 3: LA 1693
+#> 4: MA 2617
+
+get_item_n(dgirt_in_liberalism, by = "year")
+#> year abortion affirmative_action stemcell_research
+#> 1: 2006 5275 4750 2483
+#> 2: 2007 1690 1557 1705
+#> 3: 2008 4697 4704 4002
+#> 4: 2009 2141 2147 0
+#> 5: 2010 9204 9241 9146
+#> gaymarriage_amendment partialbirth_abortion
+#> 1: 2642 5064
+#> 2: 1163 1684
+#> 3: 4265 0
+#> 4: 0 0
+#> 5: 9226 0
+dgirt()
estimates a latent variable based on responses to multiple survey questions. Here, we’ll use it to estimate latent policy liberalism over time, for the groups defined by state
and race3
. (Specifically, by their Cartesian product.)
Under the hood, dgirt()
uses RStan for MCMC sampling, and arguments can be passed to RStan’s stan()
via the ...
argument of dgirt()
. This is almost always desirable. Here, we specify the number of sampler iterations, chains, and cores.
dgirt_out_liberalism <- dgirt(dgirt_in_liberalism, iter = 3000, chains = 4,
- cores = 4, seed = 42)
+ cores = 4, seed = 42)
The model results are held in a dgirt_fit
object. Methods from RStan like extract()
are available if needed because dgirt_fit
is a subclass of stanfit
. But dgo provides its own methods for typical post-estimation tasks.
For a high-level summary of the result, use summary()
.
summary(dgirt_out_liberalism)
-#> dgirt samples from 4 chains of 3000 iterations, 1500 warmup, thinned every 1
-#> Drawn Mon May 29 23:34:20 2017
-#> Package version 0.2.10
-#> Model version 2017_01_04
-#> 137 parameters; 60 theta_bars (year state race3)
-#> 5 periods 2006 to 2010
-#>
-#> n_eff
-#> Min. 1st Qu. Median Mean 3rd Qu. Max.
-#> 65.07 388.19 585.84 1083.11 1245.79 6000.00
-#>
-#> Rhat
-#> Min. 1st Qu. Median Mean 3rd Qu. Max.
-#> 0.9997 1.0020 1.0059 1.0084 1.0119 1.0553
-#>
-#> Elapsed time
-#> chain warmup sample total
-#> 1: 1 2M 4S 1M 52S 3M 56S
-#> 2: 2 1M 54S 3M 9S 4M 63S
-#> 3: 3 2M 19S 3M 7S 5M 26S
-#> 4: 4 2M 18S 3M 14S 5M 32S
+summary(dgirt_out_liberalism)
+#> dgirt samples from 4 chains of 3000 iterations, 1500 warmup, thinned every 1
+#> Drawn Mon May 29 23:34:20 2017
+#> Package version 0.2.10
+#> Model version 2017_01_04
+#> 137 parameters; 60 theta_bars (year state race3)
+#> 5 periods 2006 to 2010
+#>
+#> n_eff
+#> Min. 1st Qu. Median Mean 3rd Qu. Max.
+#> 65.07 388.19 585.84 1083.11 1245.79 6000.00
+#>
+#> Rhat
+#> Min. 1st Qu. Median Mean 3rd Qu. Max.
+#> 0.9997 1.0020 1.0059 1.0084 1.0119 1.0553
+#>
+#> Elapsed time
+#> chain warmup sample total
+#> 1: 1 2M 4S 1M 52S 3M 56S
+#> 2: 2 1M 54S 3M 9S 4M 63S
+#> 3: 3 2M 19S 3M 7S 5M 26S
+#> 4: 4 2M 18S 3M 14S 5M 32S
To apply scalar functions to posterior samples, use summarize()
. The default output gives summary statistics for the model’s theta_bar
parameters, which represent group means. These are indexed by time (year
) and group, where groups are again defined by local geographic area (state
) and any other respondent characteristics (race3
).
head(summarize(dgirt_out_liberalism))
-#> param state race3 year mean sd median q_025
-#> 1: theta_bar CA black 2006 0.5247367 0.06827003 0.5194593 0.40423566
-#> 2: theta_bar CA black 2007 0.5944296 0.08813494 0.5853362 0.44776239
-#> 3: theta_bar CA black 2008 0.5823158 0.10139495 0.5667726 0.43013247
-#> 4: theta_bar CA black 2009 0.5132426 0.08060875 0.5021405 0.38781331
-#> 5: theta_bar CA black 2010 0.4977903 0.05987256 0.4922105 0.39645056
-#> 6: theta_bar CA other 2006 0.1593904 0.05938098 0.1664194 0.02339847
-#> q_975
-#> 1: 0.6754660
-#> 2: 0.7932721
-#> 3: 0.8190219
-#> 4: 0.7062299
-#> 5: 0.6332728
-#> 6: 0.2583196
+head(summarize(dgirt_out_liberalism))
+#> param state race3 year mean sd median q_025
+#> 1: theta_bar CA black 2006 0.5247367 0.06827003 0.5194593 0.40423566
+#> 2: theta_bar CA black 2007 0.5944296 0.08813494 0.5853362 0.44776239
+#> 3: theta_bar CA black 2008 0.5823158 0.10139495 0.5667726 0.43013247
+#> 4: theta_bar CA black 2009 0.5132426 0.08060875 0.5021405 0.38781331
+#> 5: theta_bar CA black 2010 0.4977903 0.05987256 0.4922105 0.39645056
+#> 6: theta_bar CA other 2006 0.1593904 0.05938098 0.1664194 0.02339847
+#> q_975
+#> 1: 0.6754660
+#> 2: 0.7932721
+#> 3: 0.8190219
+#> 4: 0.7062299
+#> 5: 0.6332728
+#> 6: 0.2583196
Alternatively, summarize()
can apply arbitrary functions to posterior samples for whatever parameter is given by its pars
argument.
summarize(dgirt_out_liberalism, pars = "xi", funs = "var")
-#> param year var
-#> 1: xi 2006 0.013076032
-#> 2: xi 2007 0.008053516
-#> 3: xi 2008 0.006789127
-#> 4: xi 2009 0.006144990
-#> 5: xi 2010 0.005945176
+#> param year var
+#> 1: xi 2006 0.013076032
+#> 2: xi 2007 0.008053516
+#> 3: xi 2008 0.006789127
+#> 4: xi 2009 0.006144990
+#> 5: xi 2010 0.005945176
To access posterior samples in tabular form use as.data.frame()
. By default, this method returns post-warmup samples for the theta_bar
parameters, but like other methods takes a pars
argument.
head(as.data.frame(dgirt_out_liberalism))
-#> param state race3 year iteration value
-#> 1: theta_bar CA black 2006 1 0.6107959
-#> 2: theta_bar CA black 2006 2 0.4745799
-#> 3: theta_bar CA black 2006 3 0.4980549
-#> 4: theta_bar CA black 2006 4 0.4898826
-#> 5: theta_bar CA black 2006 5 0.4939210
-#> 6: theta_bar CA black 2006 6 0.4746524
+head(as.data.frame(dgirt_out_liberalism))
+#> param state race3 year iteration value
+#> 1: theta_bar CA black 2006 1 0.6107959
+#> 2: theta_bar CA black 2006 2 0.4745799
+#> 3: theta_bar CA black 2006 3 0.4980549
+#> 4: theta_bar CA black 2006 4 0.4898826
+#> 5: theta_bar CA black 2006 5 0.4939210
+#> 6: theta_bar CA black 2006 6 0.4746524
To poststratify the results use poststratify()
. Here, we use the group population proportions bundled as annual_state_race_targets
to reweight and aggregate estimates to strata defined by state-years.
poststratify(dgirt_out_liberalism, annual_state_race_targets, strata_names =
- c("state", "year"), aggregated_names = "race3")
-#> state year value
-#> 1: CA 2006 0.143321712
-#> 2: CA 2007 0.188969603
-#> 3: CA 2008 0.112907172
-#> 4: CA 2009 0.058219329
-#> 5: CA 2010 0.092557709
-#> 6: GA 2006 0.103355439
-#> 7: GA 2007 0.084458691
-#> 8: GA 2008 -0.011351441
-#> 9: GA 2009 -0.015584764
-#> 10: GA 2010 0.010578655
-#> 11: LA 2006 0.021248643
-#> 12: LA 2007 -0.003170117
-#> 13: LA 2008 -0.095756506
-#> 14: LA 2009 -0.124279123
-#> 15: LA 2010 -0.088613763
-#> 16: MA 2006 0.147235550
-#> 17: MA 2007 0.269984992
-#> 18: MA 2008 0.159194876
-#> 19: MA 2009 0.082495757
-#> 20: MA 2010 0.122864118
+poststratify(dgirt_out_liberalism, annual_state_race_targets, strata_names =
+
+ c("state", "year"), aggregated_names = "race3")
+#> state year value
+#> 1: CA 2006 0.143321712
+#> 2: CA 2007 0.188969603
+#> 3: CA 2008 0.112907172
+#> 4: CA 2009 0.058219329
+#> 5: CA 2010 0.092557709
+#> 6: GA 2006 0.103355439
+#> 7: GA 2007 0.084458691
+#> 8: GA 2008 -0.011351441
+#> 9: GA 2009 -0.015584764
+#> 10: GA 2010 0.010578655
+#> 11: LA 2006 0.021248643
+#> 12: LA 2007 -0.003170117
+#> 13: LA 2008 -0.095756506
+#> 14: LA 2009 -0.124279123
+#> 15: LA 2010 -0.088613763
+#> 16: MA 2006 0.147235550
+#> 17: MA 2007 0.269984992
+#> 18: MA 2008 0.159194876
+#> 19: MA 2009 0.082495757
+#> 20: MA 2010 0.122864118
To plot the results use dgirt_plot()
. This method plots summaries of posterior samples by time period. By default, it shows a 95% credible interval around posterior medians for the theta_bar
parameters, for each local geographic area. Here we omit the CIs.
dgirt_plot(dgirt_out_liberalism, y_min = NULL, y_max = NULL)
dgirt_plot()
can also plot the data.frame
output from poststratify()
, given arguments that identify the relevant variables. Below, we aggregate over the demographic grouping variable race3
, resulting in a data.frame
of estimates by state-year.
ps <- poststratify(dgirt_out_liberalism, annual_state_race_targets, strata_names
- = c("state", "year"), aggregated_names = "race3")
-head(ps)
-#> state year value
-#> 1: CA 2006 0.14332171
-#> 2: CA 2007 0.18896960
-#> 3: CA 2008 0.11290717
-#> 4: CA 2009 0.05821933
-#> 5: CA 2010 0.09255771
-#> 6: GA 2006 0.10335544
-dgirt_plot(ps, group_names = NULL, time_name = "year", geo_name = "state")
ps <- poststratify(dgirt_out_liberalism, annual_state_race_targets, strata_names
+ = c("state", "year"), aggregated_names = "race3")
+
+head(ps)
+#> state year value
+#> 1: CA 2006 0.14332171
+#> 2: CA 2007 0.18896960
+#> 3: CA 2008 0.11290717
+#> 4: CA 2009 0.05821933
+#> 5: CA 2010 0.09255771
+#> 6: GA 2006 0.10335544
+
In the call to dgirt_plot()
, we passed the names of the state
and year
variables. The group_names
argument was then NULL
, because there were no grouping variables left after we aggregated over race3
.
dgo is an R package for the dynamic estimation of group-level opinion. The package can be used to estimate subpopulation groups’ average latent conservatism (or other latent trait) from individuals’ responses to dichotomous questions using a Bayesian group-level IRT approach developed by Caughey and Warshaw 2015 that models latent traits at the level of demographic and/or geographic groups rather than individuals. This approach uses a hierarchical model to borrow strength cross-sectionally and dynamic linear models to do so across time. The group-level estimates can be weighted to generate estimates for geographic units, such as states.
-dgo can also be used to estimate smoothed estimates of subpopulation groups’ average responses on individual survey questions using a dynamic multi-level regression and poststratification (MRP) model (Park, Gelman, and Bafumi 2004). For instance, it could be used to estimate public opinion in each state on same-sex marriage or the Affordable Care Act.
-This model opens up new areas of research on historical public opinion in the United States at the subnational level. It also enables scholars of comparative politics to estimate dynamic models of public opinion opinion at the country or subnational level.
-dgo is an R package for the dynamic estimation of group-level public opinion. You can use the package to estimate latent trait means in subpopulations from survey data. For example, dgo can estimate the average policy liberalism in each American state over time among Democrats, Independents, and Republicans, given their answers to survey questions about policy proposals.
+dgo accomplishes this using a Bayesian group-level IRT approach developed by Caughey and Warshaw 2015. It models latent traits at the level of demographic and geographic groups rather than individuals. It uses a hierarchical model to borrow strength cross-sectionally and dynamic linear models to do so across time.
+The package can also be used to estimate smoothed estimates of subpopulations’ average responses to single survey items, using a dynamic multi-level regression and poststratification (MRP) model (Park, Gelman, and Bafumi 2004). For instance, you can use dgo to estimate public opinion in each state on same-sex marriage or the Affordable Care Act.
+This model opens up new areas of research on historical public opinion in the United States at the subnational level. It also allows scholars of comparative politics to estimate dynamic cross-national models of public opinion.
+dgo can be installed from CRAN:
-install.packages("dgo")
+install.packages("dgo")
Or get the latest version from GitHub using devtools:
-if (!require(devtools, quietly = TRUE)) install.packages("devtools")
-devtools::install_github("jamesdunham/dgo")
+if (!require(devtools, quietly = TRUE)) install.packages("devtools")
+
dgo requires a working installation of RStan. If you don’t have already have RStan, follow its “Getting Started” guide.
-Load the package and set RStan’s recommended options for a local, multicore machine with excess RAM:
-library(dgo)
-#> Loading required package: dgodata
-#> Loading required package: rstan
-#> Loading required package: ggplot2
-#> Loading required package: StanHeaders
-#> rstan (Version 2.16.2, packaged: 2017-07-03 09:24:58 UTC, GitRev: 2e1f913d3ca3)
-#> For execution on a local, multicore CPU with excess RAM we recommend calling
-#> rstan_options(auto_write = TRUE)
-#> options(mc.cores = parallel::detectCores())
-rstan_options(auto_write = TRUE)
-options(mc.cores = parallel::detectCores())
+library(dgo)
+
+rstan_options(auto_write = TRUE)
+
The minimal workflow from raw data to estimation is:
-Please report issues that you encounter.
OS X only: RStan creates temporary files during estimation in a location given by tempdir()
, typically an arbitrary location in /var/folders
. If a model runs for days, these files can be cleaned up while still needed, which induces an error. A good solution is to set a safer path for temporary files, using an environment variable checked at session startup. For help setting environment variables, see the Stack Overflow question here. Confirm the new path before starting your model run by restarting R and checking the output from tempdir()
.
Models fitted before October 2016 (specifically < #8e6a2cf) using dgirt are not fully compatible with dgo. Their contents can be extracted without using dgo, however, with the $
indexing operator. For example: as.data.frame(dgirtfit_object$stan.cmb)
.
Calling dgirt()
or dgmrp()
can generate warnings during model compilation. These are safe to ignore, or can be suppressed by following the linked instructions.
dgo is under development and we welcome suggestions.
The package citation is:
Dunham, James, Devin Caughey, and Christopher Warshaw. 2017. dgo: Dynamic Estimation of Group-level Opinion. R package. https://jdunham.io/dgo/.
-shape()
now accepts aggregated item response data unaccompanied by individual-level item response data. The item_data
and item_names
arguments are no longer required.max_raked_weight
argument to shape()
for trimming raked weights. Note that trimming occurs before raked weights are rescaled to have mean 1, and the rescaled weights can be larger than max_raked_weight
.expand_rownames()
.dichotomize()
in R.model
argument to dgirt()
and dgmrp()
taking for reuse a previously compiled Stan model, as found in the @stanmodel
slot of a dgirt_fit
- or dgmrp_fit
-class object.version
argument to dgirt()
and dgmrp()
can be used to specify arbitrary .stan
files on the disk in addition to those included with the package.by
to get_n()
and get_item_n()
methods properly accepts a vector of variable names when combined with aggregate
arguments.dgmrp()
for fitting single-issue MRP models with hierarchical covariatesdgmrp_fit
for models fitted with dgmrp()
, inheriting from a new virtual class dgo_fit
-dgirt()
now returns a dgirt_fit
-class object that also inherits from dgo_fit
classNAMESPACE
-P
, S
related to group_names
change in 0.2.5Error in .doLoadActions(where, attach)
)expand_rownames()
-group_names
is no longer required. If omitted, the geographic variable given by geo_name
will define groups.aggregate_item_names
is no longer required. It defaults to the observed values of the item
column in aggregate_data
.raking
argument to shape()
replaces strata_names
. It takes a formula or list of formulas and allows more complicated preweighting.id_vars
argument to shape()
specifies variables to be kept in item_data
.aggregate_data
may include geographic areas, demographics, or time periods that don’t appear in item_data
.dgirtfit
methods rhats()
and plot_rhats()
for model checking.dgirtfit
method get_time_elapsed
gives model run times. These also appear in summary
output.+head(as.data.frame(toy_dgirtfit, pars = 'theta_bar'))#> dgirt samples from 4 chains of 400 iterations, 200 warmup, thinned every 1 -#> Drawn Sat Oct 28 09:40:04 2017 -#> Package version 0.2.11 +#> Drawn Mon Nov 13 18:15:19 2017 +#> Package version 0.2.12 #> Model version 2017_01_04 #> 43 parameters; 12 theta_bars (year state race3) #> 2 periods 2009 to 2010 #> #> n_eff#>-#>#> +#>#> #> Rhat#>-#>#> +#>#> #> Elapsed time#>-#> -#> +#> +#> #> -#>+#># get posterior means with a convenience function -get_posterior_mean(toy_dgirtfit, pars = 'theta_bar')#> param state race3 year mean -#> 1: theta_bar SC black 2009 2.3366278 -#> 2: theta_bar SC black 2010 1.2377753 -#> 3: theta_bar SC other 2009 -1.3572482 -#> 4: theta_bar SC other 2010 0.0488835 -#> 5: theta_bar SC white 2009 -1.6299612 -#> 6: theta_bar SC white 2010 -1.3977524 -#> 7: theta_bar VA black 2009 2.0786160 -#> 8: theta_bar VA black 2010 1.4686529 -#> 9: theta_bar VA other 2009 -1.0419862 -#> 10: theta_bar VA other 2010 0.2789239 -#> 11: theta_bar VA white 2009 -0.9452116 -#> 12: theta_bar VA white 2010 -0.8000586+get_posterior_mean(toy_dgirtfit, pars = 'theta_bar')#> param state race3 year mean +#> 1: theta_bar SC black 2009 2.35662560 +#> 2: theta_bar SC black 2010 1.22256665 +#> 3: theta_bar SC other 2009 -1.36379358 +#> 4: theta_bar SC other 2010 0.04227331 +#> 5: theta_bar SC white 2009 -1.63196374 +#> 6: theta_bar SC white 2010 -1.47489402 +#> 7: theta_bar VA black 2009 1.90274077 +#> 8: theta_bar VA black 2010 1.52339960 +#> 9: theta_bar VA other 2009 -0.95920551 +#> 10: theta_bar VA other 2010 0.28518022 +#> 11: theta_bar VA white 2009 -0.92697350 +#> 12: theta_bar VA white 2010 -0.83419892# generally apply functions to posterior samples after warmup; n.b. # `as.array` is iterations x chains x parameters so `MARGIN = 3` applies # `FUN` over iterations and chains -apply(as.array(toy_dgirtfit, pars = 'xi'), 3, mean)#> xi[1] xi[2] -#> 1.632168 0.104962+apply(as.array(toy_dgirtfit, pars = 'xi'), 3, mean)#> xi[1] xi[2] +#> 1.35546402 0.04828843# access the posterior samples -head(as.data.frame(toy_dgirtfit, pars = 'theta_bar'))#> param state race3 year iteration value -#> 1: theta_bar SC black 2009 1 3.116552 -#> 2: theta_bar SC black 2009 2 4.538157 -#> 3: theta_bar SC black 2009 3 1.369630 -#> 4: theta_bar SC black 2009 4 2.999939 -#> 5: theta_bar SC black 2009 5 1.752752 -#> 6: theta_bar SC black 2009 6 1.553516