release 0.6.1.1 with new StanHeaders, resolve CRAN issue

ConnorDonegan · Jun 2, 2024 · b17e9b1 · b17e9b1
1 parent 351ff1f
commit b17e9b1
Show file tree

Hide file tree

Showing 73 changed files with 402 additions and 196 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: geostan
 Title: Bayesian Spatial Analysis
-Version: 0.6.1
-Date: 2024-05-08
+Version: 0.6.2
+Date: 2024-05-31
 URL: https://connordonegan.github.io/geostan/
 BugReports: https://github.com/ConnorDonegan/geostan/issues
 Authors@R: c(
@@ -10,7 +10,7 @@ Authors@R: c(
     person("Mitzi", "Morris", role = "ctb"),
     person("Amy", "Tims", role = "ctb")
   )
-Description: For Bayesian inference with spatial data, provides exploratory spatial analysis tools, multiple spatial model specifications, spatial model diagnostics, and special methods for inference with small area survey data (e.g., the America Community Survey (ACS)) and censored population health surveillance data. Models are pre-specified using the Stan programming language, a platform for Bayesian inference using Markov chain Monte Carlo (MCMC). References: Carpenter et al. (2017) <doi:10.18637/jss.v076.i01>; Donegan (2021) <doi:10.31219/osf.io/3ey65>; Donegan (2022) <doi:10.21105/joss.04716>; Donegan, Chun and Hughes (2020) <doi:10.1016/j.spasta.2020.100450>; Donegan, Chun and Griffith (2021) <doi:10.3390/ijerph18136856>; Morris et al. (2019) <doi:10.1016/j.sste.2019.100301>.
+Description: For spatial data analysis; provides exploratory spatial analysis tools, spatial regression models, disease mapping models, model diagnostics, and special methods for inference with small area survey data (e.g., the America Community Survey (ACS)) and censored population health surveillance data. Models are pre-specified using the Stan programming language, a platform for Bayesian inference using Markov chain Monte Carlo (MCMC). References: Carpenter et al. (2017) <doi:10.18637/jss.v076.i01>; Donegan (2021) <doi:10.31219/osf.io/3ey65>; Donegan (2022) <doi:10.21105/joss.04716>; Donegan, Chun and Hughes (2020) <doi:10.1016/j.spasta.2020.100450>; Donegan, Chun and Griffith (2021) <doi:10.3390/ijerph18136856>; Morris et al. (2019) <doi:10.1016/j.sste.2019.100301>.
 License: GPL (>= 3) 
 Encoding: UTF-8
 LazyData: true

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# geostan 0.6.2
+
+geostan was removed from CRAN for a moment due to an issue with the StanHeaders R package. This should be resolved now. This release puts geostan back on CRAN with only minimal internal changes to geostan. 
+
 # geostan 0.6.1
 
 Two updates:

diff --git a/R/convenience-functions.R b/R/convenience-functions.R
@@ -119,11 +119,11 @@ sim_sar <- function(m = 1, mu = rep(0, nrow(w)), w, rho, sigma = 1, ...) {
     check_sa_data(mu, w)
     stopifnot(all(round(Matrix::rowSums(w), 10) == 1))
     N <- nrow(w)
-    if (!inherits(rho, "numeric") || !length(rho) %in% c(1, N) || any(rho >= 1 || rho <= -1)) stop("rho must be numeric value within range (-1, 1), or a k-length numeric vector where K=nrow(W).")
+    if (!inherits(rho, "numeric") || !length(rho) %in% c(1, N)) stop("rho must be a single numeric value, or a k-length numeric vector where K=nrow(W).")
     if (!inherits(sigma, "numeric") || !length(sigma) %in% c(1, N) || !all(sigma > 0)) stop("sigma must be a positive numeric value, or k-length numeric vector, with K=nrow(W).")
     I <- diag(N)
     S <- sigma^2 * solve( (I - rho * Matrix::t(w)) %*% (I - rho * w) )
-##    S <- crossprod(solve(I - rho * w)) * sigma^2
+    # or: #  S <- crossprod(solve(I - rho * w)) * sigma^2
     x <- MASS::mvrnorm(n = m, mu = mu, Sigma = S, ...)
     return(x)
 }
@@ -154,13 +154,14 @@ sim_sar <- function(m = 1, mu = rep(0, nrow(w)), w, rho, sigma = 1, ...) {
 #' 
 #'
 #' @examples
-#' \donttest{
+#' 
 #' data(georgia)
 #' sp_diag(georgia$college, georgia)
 #'
 #' bin_fn <- function(y) mad(y, na.rm = TRUE)
 #' sp_diag(georgia$college, georgia, binwidth = bin_fn)
-#' 
+#'
+#' \donttest{
 #' fit <- stan_glm(log(rate.male) ~ log(income),
 #'                 data = georgia,
 #'                 chains = 2, iter = 800) # for speed only
@@ -368,23 +369,23 @@ sp_diag.numeric <- function(y,
 #' ## prepare data for the CAR model, using WCAR specification
 #' cars <- prep_car_data(A, style = "WCAR")
 #' ## provide list of data for the measurement error model
-#' ME <- prep_me_data(se = data.frame(ICE = georgia$ICE.se),
+#' ME <- prep_me_data(se = data.frame(college = georgia$college.se),
 #'                    car_parts = cars)
 #' ## sample from the prior probability model only, including the ME model
-#' fit <- stan_glm(log(rate.male) ~ ICE,
+#' fit <- stan_glm(log(rate.male) ~ college,
 #'                 ME = ME,
 #'                 data = georgia, 
 #'                 prior_only = TRUE,
-#'                 iter = 800, # for speed only
+#'                 iter = 1e3, # for speed only
 #'                 chains = 2, # for speed only
 #'                 refresh = 0 # silence some printing
 #'                 )
 #' 
 #' ## see ME diagnostics
-#' me_diag(fit, "ICE", georgia)
+#' me_diag(fit, "college", georgia)
 #' ## see index values for the largest (absolute) delta values
 #'  ## (differences between raw estimate and the posterior mean)
-#' me_diag(fit, "ICE", georgia, index = 3)
+#' me_diag(fit, "college", georgia, index = 3)
 #' }
 #' @export
 #' @md
@@ -655,7 +656,7 @@ shape2mat <- function(shape,
     st.style <- match.arg(st.style)
     ## temporary: handle deprecation of queen
     if (!missing(queen)) {
-        message("The 'queen' argument is deprecated. Use 'method' instead.")
+        warning("The 'queen' argument is deprecated. Use 'method' instead.")
         if (queen == TRUE) method <- 'queen'
         if (queen == FALSE) method <- 'rook'
     } else {
@@ -1036,6 +1037,8 @@ prep_icar_data <- function(C, scale_factor = NULL) {
 #' @param cmat If `cmat = TRUE`, return the full matrix C (in sparse matrix format).
 #'
 #' @param stan_fn Two computational methods are available for CAR models using \code{\link[geostan]{stan_car}}: \code{car\_normal\_lpdf} and \code{wcar\_normal\_lpdf}. For WCAR models, either method will work but \code{wcar\_normal\_lpdf} is faster. To force use \code{car\_normal\_lpdf} when `style = 'WCAR'`, provide `stan_fn = "car_normal_lpdf"`. 
+#'
+#' @param quiet Controls printing behavior. By default, `quiet = FALSE` and the range of permissible values for the spatial dependence parameter is printed to the console.
 #' 
 #' @details
 #' The CAR model is:
@@ -1073,6 +1076,7 @@ prep_icar_data <- function(C, scale_factor = NULL) {
 #' @return A list containing all of the data elements required by the CAR model in \code{\link[geostan]{stan_car}}.
 #'
 #' @examples
+#' 
 #' data(georgia)
 #'
 #' ## use a binary adjacency matrix
@@ -1090,6 +1094,7 @@ prep_icar_data <- function(C, scale_factor = NULL) {
 #' D <- sf::st_distance(sf::st_centroid(georgia))
 #' A <- D * A
 #' cp <- prep_car_data(A, "DCAR", k = 1)
+#' 
 #' @export
 #' @md
 #' @importFrom rstan extract_sparse_parts
@@ -1100,7 +1105,8 @@ prep_car_data <- function(A,
                           gamma = 0,
                           lambda = TRUE,
                           cmat = TRUE,
-                          stan_fn = ifelse(style == "WCAR", "wcar_normal_lpdf", "car_normal_lpdf")
+                          stan_fn = ifelse(style == "WCAR", "wcar_normal_lpdf", "car_normal_lpdf"),
+                          quiet = FALSE
                           ) {
     style = match.arg(style)
     if (style != "WCAR" & stan_fn == "wcar_normal_lpdf") stop("wcar_normal_lpdf only works with style = 'WCAR'.")
@@ -1162,8 +1168,13 @@ prep_car_data <- function(A,
         MCM <- Matrix::Diagonal(x = 1 / sqrt(M_diag)) %*% C %*% Matrix::Diagonal(x = sqrt(M_diag))
         stopifnot(Matrix::isSymmetric(MCM, check.attributes = FALSE))
         lambda <- sort(eigen(MCM)$values)
-        cat ("Range of permissible rho values: ", 1 / range(lambda), "\n")
+        rho_lims <- 1 / range(lambda)
+        if (!quiet) {
+            r_rho_lims <- round( rho_lims, 3)
+            message("Range of permissible rho values: ", r_rho_lims[1], ", ", r_rho_lims[2])
+        }
         car.dl$lambda <- lambda
+        car.dl$rho_lims <- rho_lims
     }
     if (cmat) car.dl$C <- C
     return (car.dl)
@@ -1175,8 +1186,10 @@ prep_car_data <- function(A,
 #' @description Given a spatial weights matrix \eqn{W}, this function prepares data for the simultaneous autoregressive (SAR) model (a.k.a spatial error model (SEM)) in Stan. This is used internally by \code{\link[geostan]{stan_sar}}, and may also be used for building custom SAR models in Stan. 
 #' 
 #' @param W Spatial weights matrix, typically row-standardized.
+#'
+#' @param quiet Controls printing behavior. By default, `quiet = FALSE` and the range of permissible values for the spatial dependence parameter is printed to the console.
 #' 
-#' @return list of data to add to a Stan data list:
+#' @return Return's a list of data required as input for geostan's SAR models, as implemented in Stan. The list contains:
 #' 
 #' \describe{
 #' \item{ImW_w}{Numeric vector containing the non-zero elements of matrix \eqn{(I - W)}.}
@@ -1191,11 +1204,11 @@ prep_car_data <- function(A,
 #' \item{rho_min}{Minimum permissible value of \eqn{\rho} (`1/min(eigenvalues_w)`).}
 #' \item{rho_max}{Maximum permissible value of \eqn{\rho} (`1/max(eigenvalues_w)`.}
 #' }
-#' The function will also print the range of permissible \eqn{\rho} values to the console.
+#' The function will also print the range of permissible \eqn{\rho} values to the console (unless `quiet = TRUE`).
 #'
 #' @details
 #'
-#' This is used internally to prepare data for \code{\link[geostan]{stan_sar}} models. It can also be helpful for fitting custom SAR models in Stan (outside of \code{geostan}). 
+#' This is used internally to prepare data for \code{\link[geostan]{stan_sar}} models. It can also be helpful for fitting custom SAR models in Stan (outside of \code{geostan}), as described in the geostan vignette on custom spatial models.
 #' 
 #' @seealso \link[geostan]{shape2mat}, \link[geostan]{stan_sar}, \link[geostan]{prep_car_data}, \link[geostan]{prep_icar_data}
 #'
@@ -1207,7 +1220,7 @@ prep_car_data <- function(A,
 #' @export
 #' @importFrom Matrix rowSums
 #' @importFrom rstan extract_sparse_parts
-prep_sar_data <- function(W) {
+prep_sar_data <- function(W, quiet = FALSE) {
     stopifnot(inherits(W, "matrix") | inherits(W, "Matrix"))
     N <- nrow(W)
     sar.dl <- rstan::extract_sparse_parts(Matrix::Diagonal(N) - W)
@@ -1219,7 +1232,10 @@ prep_sar_data <- function(W) {
     sar.dl$n <- N
     sar.dl$W <- W
     rho_lims <- 1/range(sar.dl$eigenvalues_w)
-    cat("Range of permissible rho values: ", rho_lims, "\n")
+    if (!quiet) {
+        r_rho_lims <- round(rho_lims, 3)
+        message("Range of permissible rho values: ", r_rho_lims[1], ", ", r_rho_lims[2])
+    }
     sar.dl$rho_min <- min(rho_lims)
     sar.dl$rho_max <- max(rho_lims)    
     return( sar.dl )
@@ -1236,7 +1252,7 @@ prep_sar_data <- function(W) {
 #' @return A folder in your working directory with the shapefile; filepaths are printed to the console.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' library(sf)
 #' url <- "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_state_20m.zip"
 #' folder <- tempdir()

diff --git a/R/geary.R b/R/geary.R
@@ -72,6 +72,8 @@ lg <- function(x, w, digits = 3, scale = TRUE, na.rm = FALSE, warn = TRUE) {
 #' @param warn If `FALSE`, no warning will be printed to inform you when observations with `NA` values have been dropped, or if any observations without neighbors have been found.
 #' @param na.rm If `na.rm = TRUE`, observations with `NA` values will be dropped from both `x` and `w`.
 #'
+#' @return Returns the Geary ratio (a single numeric value).
+#' 
 #' @details
 #'
 #' The Geary Ratio is an index of spatial autocorrelation. The

diff --git a/R/geostan_fit-methods.R b/R/geostan_fit-methods.R
@@ -12,6 +12,10 @@
 #' @param fill fill color for histograms and density plots.
 #'
 #' @param ... additional arguments to `rstan::plot` or `rstan::print.stanfit`.
+#'
+#' @return
+#'
+#' The print methods writes text to the console to summarize model results. The plot method resturns a `ggplot` (from `rstan::plot` for stanfit objects).
 #' 
 #' @examples
 #' data(georgia)
@@ -126,6 +130,12 @@ plot.geostan_fit <- function(x,
 #' 
 #' @param ... Not used
 #'
+#' @return
+#'
+#' By default, these methods return a `data.frame`. The column named `mean` is what most users will be looking for. These contain the fitted values (for the `fitted` method), the residuals (fitted values minus observed values, for the `resid` method), or the spatial trend (for the `spatial` method). The `mean` column is the posterior mean of each value, and the column `sd` contains the posterior standard deviation for each value. The posterior distributions are also summarized by select quantiles (including 2.5\% and 97.5\%). 
+#'
+#' If `summary = FALSE` then the method returns an S-by-N matrix of MCMC samples, where S is the number of MCMC samples and N is the number of observations in the data.
+#' 
 #' @details
 #'
 #' When \code{rates = FALSE} and the model is Poisson or Binomial, the fitted values returned by the \code{fitted} method are the expected value of the response variable. The \code{rates} argument is used to translate count outcomes to rates by dividing by the appropriate denominator. The behavior of the `rates` argument depends on the model specification. Consider a Poisson model of disease incidence, such as the following intercept-only case:

diff --git a/R/priors.R b/R/priors.R
@@ -210,3 +210,4 @@ print.prior <- function(x, digits = 2, ...) {
     if (nm == "hs")        df <- as.data.frame(x[c('global_scale', 'slab_df', 'slab_scale')])
     print(df, digits = digits, ...)           
 }
+
diff --git a/R/raster-analysis.R b/R/raster-analysis.R
@@ -11,6 +11,10 @@
 #'
 #' The purpose is to calculate eigenvalues of the spatial weights matrix for the CAR and SAR models, enabling spatial regression with large raster data sets. This function is used internally by \code{\link[geostan]{prep_sar_data2}} and \code{\link[geostan]{prep_car_data2}}. For more details, see: \code{vignette("raster-regression", package = "geostan")}.
 #'
+#' @return
+#'
+#' Returns the eigenvalues of the row-standardized spatial weights matrix (a numeric vector length `row * col`).
+#'  
 #' @seealso
 #' \code{\link[geostan]{prep_sar_data2}}, \code{\link[geostan]{prep_car_data2}}
 #' 
@@ -40,12 +44,16 @@ eigen_grid <- function(row = 5, col = 5) {
 #'
 #' @param row Number of rows in the raster 
 #' @param col Number of columns in the raster
-#'
+#' @param quiet Controls printing behavior. By default, `quiet = FALSE` and the range of permissible values for the spatial dependence parameter is printed to the console.
+#' 
 #' @details
 #'
 #' Prepare input data for the CAR model when your dataset consists of observations on a regular (rectangular) tessellation, such as a raster layer or remotely sensed imagery. The rook criteria is used to determine adjacency. This function uses Equation 5 from Griffith (2000) to generate approximate eigenvalues for a row-standardized spatial weights matrix from a P-by-Q dimension regular tessellation.
 #'
 #' This function can accommodate very large numbers of observations for use with \code{\link[geostan]{stan_car}}; for large N data, it is also recommended to use `slim = TRUE` or the `drop` argument. For more details, see: \code{vignette("raster-regression", package = "geostan")}.
+#'
+#'
+#' @return A list containing all of the data elements required by the CAR model in \code{\link[geostan]{stan_car}}.
 #' 
 #' @source
 #'
@@ -62,7 +70,7 @@ eigen_grid <- function(row = 5, col = 5) {
 #'
 #' @export
 #' @md
-prep_car_data2 <- function(row = 100, col = 100) {
+prep_car_data2 <- function(row = 100, col = 100, quiet = FALSE) {
     stopifnot(row > 2 & col > 2)
     N <- row * col
     Idx <- 1:N
@@ -97,7 +105,14 @@ prep_car_data2 <- function(row = 100, col = 100) {
 
     # eigenvalues of the WCAR connectivity matrix
     car.dl$lambda <- eigen_grid(row = row, col = col)
-    cat ("Range of permissible rho values: ", 1 / range(car.dl$lambda), "\n")
+
+    # limits of permissible/possible rho values
+    car.dl$rho_lims <- 1 / range(car.dl$lambda)
+    if (!quiet) {
+            r_rho_lims <- round( car.dl$rho_lims, 3)
+            message("Range of permissible rho values: ", r_rho_lims[1], ", ", r_rho_lims[2])
+    }
+
     car.dl$style <- "WCAR"
     car.dl$C <- C
     return (car.dl)
@@ -109,6 +124,7 @@ prep_car_data2 <- function(row = 100, col = 100) {
 #'
 #' @param row Number of rows in the raster
 #' @param col Number of columns in the raster
+#' @param quiet Controls printing behavior. By default, `quiet = FALSE` and the range of permissible values for the spatial dependence parameter is printed to the console.
 #'
 #' @details
 #'
@@ -118,6 +134,8 @@ prep_car_data2 <- function(row = 100, col = 100) {
 #'
 #' @seealso
 #' \code{\link[geostan]{prep_car_data2}}, \code{\link[geostan]{prep_sar_data}}, \code{\link[geostan]{stan_sar}}.
+#'
+#' @return A list containing all of the data elements required by the SAR model in \code{\link[geostan]{stan_sar}}.
 #' 
 #' @source
 #'
@@ -130,7 +148,7 @@ prep_car_data2 <- function(row = 100, col = 100) {
 #' sar_dl <- prep_sar_data2(row = row, col = col)
 #'
 #' @export 
-prep_sar_data2 <- function(row, col) {
+prep_sar_data2 <- function(row, col, quiet = FALSE) {
     stopifnot(row > 2 & col > 2)
     N <- row * col
     Idx <- 1:N
@@ -170,8 +188,13 @@ prep_sar_data2 <- function(row, col) {
     sar.dl$nW <- length(sar.dl$Widx)
     sar.dl$eigenvalues_w <- eigen_grid(row = row, col = col)
     sar.dl$n <- N
+
     rho_lims <- 1/range(sar.dl$eigenvalues_w)
-    cat("Range of permissible rho values: ", rho_lims, "\n")
+    if (!quiet) {
+            r_rho_lims <- round( rho_lims, 3)
+            message("Range of permissible rho values: ", r_rho_lims[1], ", ", r_rho_lims[2])
+    }
+
     sar.dl$rho_min <- min(rho_lims)
     sar.dl$rho_max <- max(rho_lims)
     sar.dl$W <- W

diff --git a/R/stan_car.R b/R/stan_car.R
@@ -62,6 +62,8 @@
 #' @param control A named list of parameters to control the sampler's behavior. See \code{\link[rstan]{stan}} for details. 
 #' 
 #' @param ... Other arguments passed to \code{\link[rstan]{sampling}}.
+#'
+#' @param quiet Controls (most) automatic printing to the console. By default, any prior distributions that have not been assigned by the user are printed to the console. If `quiet = TRUE`, these will not be printed. Using `quiet = TRUE` will also force `refresh = 0`.
 #' 
 #' @details
 #'
@@ -242,6 +244,7 @@ stan_car <- function(formula,
                      drop = NULL,
                      pars = NULL,
                      control = NULL,
+                     quiet = FALSE,                     
                      ...
                      ) {
     stopifnot(inherits(formula, "formula"))
@@ -251,6 +254,9 @@ stan_car <- function(formula,
     stopifnot(!missing(data))
     check_car_parts(car_parts)
     stopifnot(car_parts$n == nrow(data))
+    # silence?
+    if (quiet) refresh <- 0
+    # C    
     if (!missing(C)) {
         stopifnot(inherits(C, "Matrix") | inherits(C, "matrix"))
         stopifnot(all(dim(C) == nrow(data)))
@@ -423,7 +429,7 @@ stan_car <- function(formula,
     ## PARAMETERS TO KEEP, with CAR PARAMETERS [STOP] -------------
     if (me.list$has_me) priors_made_slim$ME_model <- ME$prior    
     ## PRINT PRIORS  -------------
-    print_priors(prior, priors_made_slim)
+    if (!quiet) print_priors(prior, priors_made_slim)
     ## MCMC INITIAL VALUES -------------
     inits <- "random"
     if (censor_point > 0 | (standata$has_me == 1 && any(standata$use_logit == 1))) {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -210,3 +210,4 @@ print.prior <- function(x, digits = 2, ...) {
		if (nm == "hs") df <- as.data.frame(x[c('global_scale', 'slab_df', 'slab_scale')])
		print(df, digits = digits, ...)
		}