Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export(extract_pairwise_interactions)
export(extract_pairwise_thresholds)
export(extract_posterior_inclusion_probabilities)
export(mrfSampler)
export(summary_SBM)
importFrom(Rcpp,evalCpp)
importFrom(Rdpack,reprompt)
importFrom(methods,hasArg)
Expand Down
8 changes: 6 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@ sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interact
.Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter)
}

gibbs_sampler <- function(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save = FALSE, display_progress = FALSE, edge_selection = TRUE) {
.Call(`_bgms_gibbs_sampler`, observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection)
gibbs_sampler <- function(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save = FALSE, display_progress = FALSE, edge_selection = TRUE) {
.Call(`_bgms_gibbs_sampler`, observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection)
}

compare_anova_gibbs_sampler <- function(observations, main_effect_indices, pairwise_effect_indices, projection, num_categories, num_groups, group_indices, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, num_obs_categories, sufficient_blume_capel, prior_threshold_alpha, prior_threshold_beta, na_impute, missing_data_indices, is_ordinal_variable, baseline_category, independent_thresholds, save_main = FALSE, save_pairwise = FALSE, save_indicator = FALSE, display_progress = FALSE, difference_selection = TRUE) {
.Call(`_bgms_compare_anova_gibbs_sampler`, observations, main_effect_indices, pairwise_effect_indices, projection, num_categories, num_groups, group_indices, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, num_obs_categories, sufficient_blume_capel, prior_threshold_alpha, prior_threshold_beta, na_impute, missing_data_indices, is_ordinal_variable, baseline_category, independent_thresholds, save_main, save_pairwise, save_indicator, display_progress, difference_selection)
}

compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) {
.Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max, lambda)
}

42 changes: 30 additions & 12 deletions R/bgm.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@
#' beta prior for the unknown inclusion probability with shape parameters
#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet
#' prior on the cluster assignment probabilities with a common concentration
#' parameter \code{dirichlet_alpha}. The default is
#' parameter \code{dirichlet_alpha} and a zero-truncated Poisson prior on the
#' number of clusters with a rate parameter \code{lambda}, indicating the
#' expected number of clusters.
#' The default is
#' \code{edge_prior = "Bernoulli"}.
#' @param inclusion_probability The prior edge inclusion probability for the
#' Bernoulli model. Can be a single probability, or a matrix of \code{p} rows
Expand All @@ -130,6 +133,8 @@
#' \code{beta_bernoulli_beta = 1}.
#' @param dirichlet_alpha The shape of the Dirichlet prior on the node-to-block
#' allocation probabilities for the Stochastic Block model.
#' @param lambda The rate parameter of the zero-truncated Poisson prior on the
#' number of cluster for the Stochastic Block model.
#' @param na_action How do you want the function to handle missing data? If
#' \code{na_action = "listwise"}, listwise deletion is used. If
#' \code{na_action = "impute"}, missing data are imputed iteratively during the
Expand Down Expand Up @@ -158,13 +163,20 @@
#' ``blume-capel'' variables, the first entry is the parameter for the linear
#' effect and the second entry is the parameter for the quadratic effect, which
#' models the offset to the reference category.
#' \item In the case of
#' \code{edge_prior = "Stochastic-Block"} two additional elements are returned:
#' a vector \code{allocations} with the estimated cluster assignments of the nodes and an
#' table \code{clusters} with the estimated posterior probability of the number
#' of clusters in the network. The vector of node allocations is calculated using
#' a method proposed by \insertCite{Dahl2009}{bgms} and also used by
#' \insertCite{GengEtAl_2019}{bgms}.
#' \item In the case of \code{edge_prior = "Stochastic-Block"}, two additional
#' elements are returned:
#' \itemize{
#' \item A vector \code{allocations} with the estimated cluster assignments
#' of the nodes, calculated using a method proposed by \insertCite{@Dahl2009}{bgms} and
#' also used by \insertCite{@GengEtAl_2019}{bgms}.
#' \item A matrix \code{components} with the estimated posterior probability
#' of the number of components (clusters) in the network. These probabilities
#' are calculated based on Equation 3.7 in \insertCite{@miller2018mixture}{bgms}, which computes
#' the conditional probability of the number of components given the
#' number of clusters. The number of clusters is derived from the
#' cardinality of the sampled \code{allocations} vector for each iteration of
#' the MCMC sampler (see \code{save = TRUE}).
#' }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use insertCite for the references.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I already fixed that

#' }
#' If \code{save = TRUE}, the result is a list of class ``bgms'' containing:
#' \itemize{
Expand All @@ -181,7 +193,7 @@
#' \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the
#' cluster assignments of the nodes from each iteration is returned. This matrix
#' can be used to calculate the posterior probability of the number of clusters
#' by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function.
#' by utilizing the \code{summary_SBM} function.
#' }
#' Column averages of these matrices provide the model-averaged posterior means.
#' Except for the \code{allocations} matrix, for which the \code{summary_SBM}
Expand Down Expand Up @@ -296,6 +308,7 @@ bgm = function(x,
beta_bernoulli_alpha = 1,
beta_bernoulli_beta = 1,
dirichlet_alpha = 1,
lambda = 1,
na_action = c("listwise", "impute"),
save = FALSE,
display_progress = TRUE) {
Expand All @@ -322,7 +335,8 @@ bgm = function(x,
inclusion_probability = inclusion_probability,
beta_bernoulli_alpha = beta_bernoulli_alpha,
beta_bernoulli_beta = beta_bernoulli_beta,
dirichlet_alpha = dirichlet_alpha)
dirichlet_alpha = dirichlet_alpha,
lambda = lambda)

# ----------------------------------------------------------------------------
# The vector variable_type is now coded as boolean.
Expand Down Expand Up @@ -451,6 +465,7 @@ bgm = function(x,
beta_bernoulli_alpha = beta_bernoulli_alpha ,
beta_bernoulli_beta = beta_bernoulli_beta,
dirichlet_alpha = dirichlet_alpha,
lambda = lambda,
na_action = na_action,
save = save,
version = packageVersion("bgms")
Expand All @@ -470,6 +485,7 @@ bgm = function(x,
beta_bernoulli_alpha = beta_bernoulli_alpha,
beta_bernoulli_beta = beta_bernoulli_beta,
dirichlet_alpha = dirichlet_alpha,
lambda = lambda,
Index = Index,
iter = iter,
burnin = burnin,
Expand Down Expand Up @@ -524,13 +540,15 @@ bgm = function(x,
if(edge_selection == TRUE) {
if(edge_prior == "Stochastic-Block"){

summarySbm <- summary_SBM(cluster_allocations = out$allocations)
summarySbm = summary_SBM(cluster_allocations = out$allocations,
dirichlet_alpha = dirichlet_alpha,
lambda = lambda)

output = list(indicator = indicator,
interactions = interactions,
thresholds = thresholds,
components = summarySbm$components,
allocations = summarySbm$allocations,
clusters = summarySbm$no_clusters,
arguments = arguments)
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to check if components has names.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1dfd1393b7b4d8b23d32a7029e67c289
This is how it looks! In easybgm it is printed in a more user-friendly way (as I showed you).


Expand Down
13 changes: 7 additions & 6 deletions R/function_input_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ check_model = function(x,
inclusion_probability = 0.5,
beta_bernoulli_alpha = 1,
beta_bernoulli_beta = 1,
dirichlet_alpha = dirichlet_alpha) {
dirichlet_alpha = dirichlet_alpha,
lambda = lambda) {

#Check variable type input ---------------------------------------------------
if(length(variable_type) == 1) {
Expand Down Expand Up @@ -207,14 +208,14 @@ check_model = function(x,
}
if(edge_prior == "Stochastic-Block") {
theta = matrix(0.5, nrow = ncol(x), ncol = ncol(x))
if(beta_bernoulli_alpha <= 0 || beta_bernoulli_beta <= 0 || dirichlet_alpha <= 0)
if(beta_bernoulli_alpha <= 0 || beta_bernoulli_beta <= 0 || dirichlet_alpha <= 0 || lambda <= 0)
stop("The scale parameters of the beta and Dirichlet distribution need to be positive.")
if(!is.finite(beta_bernoulli_alpha) || !is.finite(beta_bernoulli_beta) || !is.finite(dirichlet_alpha))
stop("The scale parameters of the beta and Dirichlet distribution need to be finite.")
if(!is.finite(beta_bernoulli_alpha) || !is.finite(beta_bernoulli_beta) || !is.finite(dirichlet_alpha) || !is.finite(lambda))
stop("The scale parameters of the beta distribution, the concentration parameter of the Dirichlet distribution, and the rate parameter of the Poisson distribution need to be finite.")
if(is.na(beta_bernoulli_alpha) || is.na(beta_bernoulli_beta) ||
is.null(beta_bernoulli_alpha) || is.null(beta_bernoulli_beta) ||
is.null(dirichlet_alpha) || is.null(dirichlet_alpha))
stop("Values for both scale parameters of the beta and Dirichlet distribution need to be specified.")
is.null(dirichlet_alpha) || is.null(dirichlet_alpha) || is.null(lambda) || is.null(lambda))
stop("Values for both scale parameters of the beta distribution, the concentration parameter of the Dirichlet distribution, and the rate parameter of the Poisson distribution need to be specified.")
}
} else {
theta = matrix(0.5, nrow = 1, ncol = 1)
Expand Down
110 changes: 103 additions & 7 deletions R/posterior_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,115 @@ getDahl = function(cluster_allocations) {
}


# A function that computes the cluster posterior probabilities and allocations
# Calculate the conditional probability of the number of
# components given the cardinality of a sampled
# allocation vector based on Equation (3.7) from
# Miller & Harrison (2018). Mixture Models With a Prior on the Number of
# Components, Journal of the American Statistical Association, 113:521, 340-356,
# DOI:10.1080/01621459.2016.1255636

summary_SBM = function(cluster_allocations) {
# Compute the number of unique clusters for each iteration
compute_p_k_given_t = function(t,
log_Vn,
dirichlet_alpha,
no_variables,
lambda) {
# Define the K_values
K_values = as.numeric(1:no_variables)

# Initialize vector for probabilities
p_k_given_t = numeric(length(K_values))

# Normalization constant for t
log_vn_t = log_Vn[t]

# Normalizing factor for the truncated Poisson distribution
norm_factor = 1 - dpois(0, lambda)
truncated_poisson_pmf = dpois(K_values, lambda) / norm_factor

# Loop through each value of K
for (i in seq_along(K_values)) {
K = K_values[i]
if (K >= t) {
# Falling factorial
falling_factorial = prod(K:(K - t + 1))
# Rising factorial
rising_factorial = prod((dirichlet_alpha * K) + 0:(no_variables - 1))
# Compute log probability
log_p_k = log(falling_factorial) - log(rising_factorial) + log(truncated_poisson_pmf[i]) - log_vn_t
# Convert log probability to probability
p_k_given_t[i] = exp(log_p_k)
} else {
p_k_given_t[i] = 0
}
}
# Normalize probabilities
p_k_given_t = p_k_given_t / sum(p_k_given_t)

return(p_k_given_t)
}

#' Function for summarizing the block allocation vectors
#'
#' This function summarizes the sampled allocation vectors from each iteration
#' of the Gibbs sampler from the output of \code{bgm}
#' when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}.
#'
#' @param cluster_allocations a matrix of \code{iter} rows and \code{no_variables}
#' columns containing the sampled cluster allocation vectors from each iteration.
#' @param dirichlet_alpha the concentration parameter of the Dirichlet prior
#' @param lambda the Poisson rate parameter
#' @return Returns a list of two elements: \code{components} and \code{allocations},
#' containing the posterior probabilties for the number of components (clusters)
#' and the estimated cluster allocation of the nodes using Dahl's method.
#' @examples
#'
#' # fit a model with the SBM prior
#' bgm_object = bgm(Wenchuan[, c(1:5)],
#' edge_prior = "Stochastic-Block",
#' save = TRUE,
#' iter = 1e3)
#'
#' s = summary_SBM(bgm_object$allocations,
#' bgm_object$arguments$dirichlet_alpha,
#' bgm_object$arguments$lambda)
#'
#' @export

summary_SBM = function(cluster_allocations,
dirichlet_alpha,
lambda) {

# precompute things
no_variables = ncol(cluster_allocations)
log_Vn = compute_Vn_mfm_sbm(no_variables,
dirichlet_alpha,
no_variables + 10,
lambda)
# Compute the number of unique clusters (t) for each iteration
# i.e., the cardinality of the partition z
clusters = apply(cluster_allocations, 1, function(row) length(unique(row)))
# Compute the conditional probabilities of the number of clusters
# for each row in clusters
p_k_given_t = matrix(NA, nrow = length(clusters), ncol = no_variables)

for (i in 1:length(clusters)) {
p_k_given_t[i, ] = compute_p_k_given_t(clusters[i],
log_Vn,
dirichlet_alpha,
no_variables,
lambda)
}
# average across all iterations
p_k_given_t = colMeans(p_k_given_t)

# Compute the posterior probabilities of the actual unique clusters
no_clusters = table(clusters) / length(clusters)
# make it as a matrix
no_components = 1:no_variables
components = cbind(no_components, p_k_given_t)
colnames(components) = c("no_components", "probability")

# Compute the allocations of the nodes based on Dahl's method
allocations = getDahl(cluster_allocations)

# Return the results
return(list(no_clusters = no_clusters,
return(list(components = components,
allocations = allocations))
}
12 changes: 12 additions & 0 deletions inst/REFERENCES.bib
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
@ARTICLE{miller2018mixture,
title={Mixture models with a prior on the number of components},
author={Miller, Jeffrey W and Harrison, Matthew T},
journal={Journal of the American Statistical Association},
volume={113},
number={521},
pages={340--356},
year={2018},
doi = {10.1080/01621459.2016.1255636},
publisher={Taylor \& Francis}
}

@article{Dahl2009,
author = {David B. Dahl},
title = {Modal clustering in a class of product partition models},
Expand Down
Loading
Loading