From 6caf7fc925b3e2062b60ba6109d4ef56f3dc0a04 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Thu, 5 Dec 2024 13:36:42 +0100 Subject: [PATCH 01/21] Update on the SBM prior: (1) now we save the sampled block allocation parameters and block matrices (2) we sample the number of clusters by first calculating the probability of a sampled cluster configuration using Equation 3.7 pf Miller & Harrison (2018) --- R/bgm.R | 39 +++++++++++++++----- src/gibbs_functions.cpp | 58 ++++++++++++++++++++++++++---- src/gibbs_functions_edge_prior.cpp | 55 ++++++++++++++++++++++++++++ src/gibbs_functions_edge_prior.h | 8 +++++ 4 files changed, 146 insertions(+), 14 deletions(-) diff --git a/R/bgm.R b/R/bgm.R index 88377f44..cc7c01c5 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -504,10 +504,22 @@ bgm = function(x, arguments$data_columnnames = data_columnnames if(edge_selection == TRUE) { - output = list(indicator = indicator, - interactions = interactions, - thresholds = thresholds, - arguments = arguments) + if(edge_prior == "Stochastic-Block"){ + output = list(indicator = indicator, + interactions = interactions, + thresholds = thresholds, + allocations = out$allocations, + cluster_edge_prob = out$cluster_edge_prob, + clusters = out$clusters, + arguments = arguments) + } else { + + output = list(indicator = indicator, + interactions = interactions, + thresholds = thresholds, + arguments = arguments) + } + } else { output = list(interactions = interactions, thresholds = thresholds, @@ -557,10 +569,21 @@ bgm = function(x, arguments$data_columnnames = data_columnnames if(edge_selection == TRUE) { - output = list(indicator = indicator, - interactions = interactions, - thresholds = thresholds, - arguments = arguments) + if(edge_prior == "Stochastic-Block"){ + output = list(indicator = indicator, + interactions = interactions, + thresholds = thresholds, + allocations = out$allocations, + cluster_edge_prob = out$cluster_edge_prob, + clusters = out$clusters, + arguments = arguments) + } else { + + output = list(indicator = indicator, + interactions = interactions, + thresholds = thresholds, + arguments = arguments) + } } else { output = list(interactions = interactions, thresholds = thresholds, diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index 50ad6529..915da7b4 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -812,6 +812,8 @@ List gibbs_sampler(IntegerMatrix observations, int no_interactions = Index.nrow(); int no_thresholds = sum(no_categories); int max_no_categories = max(no_categories); + IntegerVector K_values; // To store sampled K values + IntegerVector v = seq(0, no_interactions - 1); IntegerVector order(no_interactions); @@ -864,7 +866,12 @@ List gibbs_sampler(IntegerMatrix observations, NumericMatrix cluster_prob(1, 1); NumericVector log_Vn(1); - if(edge_prior == "Stochastic-Block") { + // store the allocation indices for each iteration + NumericMatrix out_allocations(iter, no_variables); + List out_cluster_prob(iter); // store the cluster probabilities for each iteration + IntegerVector out_K(iter); // store the number of clusters for each iteration + + if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations cluster_allocations[0] = 0; cluster_allocations[1] = 1; for(int i = 2; i < no_variables; i++) { @@ -1029,6 +1036,14 @@ List gibbs_sampler(IntegerMatrix observations, theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]); } } + + // Sample the number of clusters (K) + int sampled_K = sample_K_mfm_sbm(cluster_allocations, + dirichlet_alpha, + log_Vn); + + // Store the sampled K value + K_values.push_back(sampled_K); } } } @@ -1038,11 +1053,19 @@ List gibbs_sampler(IntegerMatrix observations, //The post burn-in iterations ------------------------------------------------ for(int iteration = 0; iteration < iter; iteration++) { if (Progress::check_abort()) { - if(edge_selection == true) { + if(edge_selection == true && edge_prior == "Stochastic-Block") { + return List::create(Named("indicator") = out_indicator, + Named("interactions") = out_interactions, + Named("thresholds") = out_thresholds, + Named("allocations") = out_allocations, + Named("cluster_edge_prob") = out_cluster_prob, + Named("clusters") = out_K); + } if(edge_selection == true && edge_prior != "Stochastic-Block") { return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, Named("thresholds") = out_thresholds); - } else { + } + else { return List::create(Named("interactions") = out_interactions, Named("thresholds") = out_thresholds); } @@ -1150,12 +1173,22 @@ List gibbs_sampler(IntegerMatrix observations, beta_bernoulli_alpha, beta_bernoulli_beta); + // store cluster_prob to out_cluster_prob + out_cluster_prob[iteration] = clone(cluster_prob); + for(int i = 0; i < no_variables - 1; i++) { for(int j = i + 1; j < no_variables; j++) { theta(i, j) = cluster_prob(cluster_allocations[i], cluster_allocations[j]); theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]); } } + + // Sample the number of clusters (K) + int sampled_K = sample_K_mfm_sbm(cluster_allocations, + dirichlet_alpha, + log_Vn); + // Store the sampled K value to out_K + out_K[iteration] = sampled_K; } } @@ -1234,15 +1267,28 @@ List gibbs_sampler(IntegerMatrix observations, out_thresholds(no_variables - 1, 1) /= iteration + 1; } } + + for(int i = 0; i < no_variables; i++) { + out_allocations(iteration, i) = cluster_allocations[i] + 1; + } } if(edge_selection == true) { + if(edge_prior == "Stochastic-Block"){ return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, - Named("thresholds") = out_thresholds); + Named("thresholds") = out_thresholds, + Named("allocations") = out_allocations, // Include z values + Named("cluster_edge_prob") = out_cluster_prob, // Include cluster probabilities (i.e., the block matrix) + Named("clusters") = out_K); // Include the sampled number of clusters + } else { + return List::create(Named("indicator") = out_indicator, + Named("interactions") = out_interactions, + Named("thresholds") = out_thresholds); + } } else { - return List::create(Named("interactions") = out_interactions, - Named("thresholds") = out_thresholds); + return List::create(Named("interactions") = out_interactions, + Named("thresholds") = out_thresholds); } } \ No newline at end of file diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index a476a41b..68fcc75f 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -337,8 +337,63 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign, } } return cluster_assign; + +} + +// ----------------------------------------------------------------------------| +// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison +// ----------------------------------------------------------------------------| +int sample_K_mfm_sbm(IntegerVector cluster_allocations, + double dirichlet_alpha, + NumericVector log_Vn, + int max_K = 12) { + // compute t = |C| (number of unique clusters i.e., the cardinality of C) + IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes + int t = cluster_size.size(); // Number of non-zero clusters + + // Step 2: Compute p(k|t) using Equation 3.7 in Miller and Harrison (2018) + NumericVector log_p_K(max_K, R_NegInf); // Log-probabilities of k + for (int k = t; k < max_K; ++k) { + double log_term1 = std::lgamma(k + 1) - std::lgamma(k - t + 1); // (k choose t) + double log_term2 = k * std::log(dirichlet_alpha); // dirichlet_alpha^k + double log_term3 = -log_Vn[t - 1]; // Normalize by Vn + log_p_K[k] = log_term1 + log_term2 + log_term3; // Combine terms + } + + // Normalize probabilities using log-sum-exp + double log_sum_exp = -INFINITY; + for (int k = t; k < max_K; ++k) { + log_sum_exp = std::log1p(std::exp(log_p_K[k] - log_sum_exp)) + log_sum_exp; + } + NumericVector p_K(max_K, 0.0); + for (int k = t; k < max_K; ++k) { + p_K[k] = std::exp(log_p_K[k] - log_sum_exp); // Convert log-p to p + } + + // Step 3: Exclude zero and adjust probabilities for truncated Poisson + double p_zero = std::exp(-1.0); // P(k = 0) for Poisson(lambda = 1) + double truncated_normalizer = 1.0 - p_zero; + double cumulative_probability = 0.0; + + for (int k = t; k < max_K; ++k) { + p_K[k] /= truncated_normalizer; // Adjust probabilities to exclude zero + cumulative_probability += p_K[k]; + } + + // Step 4: Draw a uniform random number within [0, cumulative_probability] + double u = R::runif(0, cumulative_probability); + + // Step 5: sample from the truncated Poisson distribution + double lambda = 1.0; // Poisson parameter (fixed to 1) + int sampled_k = R::qpois(u * truncated_normalizer, lambda, true, false); + + // Ensure k is at least t + return std::max(t, std::min(sampled_k, max_K - 1)); } + + + // ----------------------------------------------------------------------------| // Sample the block parameters for the MFM - SBM // ----------------------------------------------------------------------------| diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h index 827c4ea9..72dfd814 100644 --- a/src/gibbs_functions_edge_prior.h +++ b/src/gibbs_functions_edge_prior.h @@ -20,6 +20,14 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign double beta_bernoulli_alpha, double beta_bernoulli_beta); +// ----------------------------------------------------------------------------| +// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison +// ----------------------------------------------------------------------------| +int sample_K_mfm_sbm(IntegerVector cluster_assign, + double dirichlet_alpha, + NumericVector log_Vn, + int max_K = 12); + // ----------------------------------------------------------------------------| // Sample the block parameters for the MFM - SBM // ----------------------------------------------------------------------------| From 6633a70e798ab2f3da2c842fb233b39eea622ff9 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Thu, 5 Dec 2024 16:38:19 +0100 Subject: [PATCH 02/21] update the restriction on Kmax --- src/gibbs_functions.cpp | 6 ++++-- src/gibbs_functions_edge_prior.cpp | 2 +- src/gibbs_functions_edge_prior.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index 915da7b4..9f0a609c 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -1040,7 +1040,8 @@ List gibbs_sampler(IntegerMatrix observations, // Sample the number of clusters (K) int sampled_K = sample_K_mfm_sbm(cluster_allocations, dirichlet_alpha, - log_Vn); + log_Vn, + no_variables); // Store the sampled K value K_values.push_back(sampled_K); @@ -1186,7 +1187,8 @@ List gibbs_sampler(IntegerMatrix observations, // Sample the number of clusters (K) int sampled_K = sample_K_mfm_sbm(cluster_allocations, dirichlet_alpha, - log_Vn); + log_Vn, + no_variables); // Store the sampled K value to out_K out_K[iteration] = sampled_K; } diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index 68fcc75f..ef3d04f0 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -346,7 +346,7 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign, int sample_K_mfm_sbm(IntegerVector cluster_allocations, double dirichlet_alpha, NumericVector log_Vn, - int max_K = 12) { + int max_K = 500) { // compute t = |C| (number of unique clusters i.e., the cardinality of C) IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes int t = cluster_size.size(); // Number of non-zero clusters diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h index 72dfd814..4588def0 100644 --- a/src/gibbs_functions_edge_prior.h +++ b/src/gibbs_functions_edge_prior.h @@ -26,7 +26,7 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign int sample_K_mfm_sbm(IntegerVector cluster_assign, double dirichlet_alpha, NumericVector log_Vn, - int max_K = 12); + int max_K = 500); // ----------------------------------------------------------------------------| // Sample the block parameters for the MFM - SBM From f74e7230d82d8d4088723182df70182eecf7f428 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Fri, 6 Dec 2024 15:21:33 +0100 Subject: [PATCH 03/21] changes: (1) take out the option to save out_cluster_prob and (2) changes K_max = no_variables + 10 --- R/bgm.R | 2 -- src/gibbs_functions.cpp | 9 ++------- src/gibbs_functions_edge_prior.cpp | 2 +- src/gibbs_functions_edge_prior.h | 2 +- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/R/bgm.R b/R/bgm.R index cc7c01c5..096bb64d 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -509,7 +509,6 @@ bgm = function(x, interactions = interactions, thresholds = thresholds, allocations = out$allocations, - cluster_edge_prob = out$cluster_edge_prob, clusters = out$clusters, arguments = arguments) } else { @@ -574,7 +573,6 @@ bgm = function(x, interactions = interactions, thresholds = thresholds, allocations = out$allocations, - cluster_edge_prob = out$cluster_edge_prob, clusters = out$clusters, arguments = arguments) } else { diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index 9f0a609c..6c8c04ea 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -868,7 +868,6 @@ List gibbs_sampler(IntegerMatrix observations, // store the allocation indices for each iteration NumericMatrix out_allocations(iter, no_variables); - List out_cluster_prob(iter); // store the cluster probabilities for each iteration IntegerVector out_K(iter); // store the number of clusters for each iteration if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations @@ -1041,7 +1040,7 @@ List gibbs_sampler(IntegerMatrix observations, int sampled_K = sample_K_mfm_sbm(cluster_allocations, dirichlet_alpha, log_Vn, - no_variables); + no_variables + 10); // Store the sampled K value K_values.push_back(sampled_K); @@ -1059,7 +1058,6 @@ List gibbs_sampler(IntegerMatrix observations, Named("interactions") = out_interactions, Named("thresholds") = out_thresholds, Named("allocations") = out_allocations, - Named("cluster_edge_prob") = out_cluster_prob, Named("clusters") = out_K); } if(edge_selection == true && edge_prior != "Stochastic-Block") { return List::create(Named("indicator") = out_indicator, @@ -1174,8 +1172,6 @@ List gibbs_sampler(IntegerMatrix observations, beta_bernoulli_alpha, beta_bernoulli_beta); - // store cluster_prob to out_cluster_prob - out_cluster_prob[iteration] = clone(cluster_prob); for(int i = 0; i < no_variables - 1; i++) { for(int j = i + 1; j < no_variables; j++) { @@ -1188,7 +1184,7 @@ List gibbs_sampler(IntegerMatrix observations, int sampled_K = sample_K_mfm_sbm(cluster_allocations, dirichlet_alpha, log_Vn, - no_variables); + no_variables + 10); // Store the sampled K value to out_K out_K[iteration] = sampled_K; } @@ -1282,7 +1278,6 @@ List gibbs_sampler(IntegerMatrix observations, Named("interactions") = out_interactions, Named("thresholds") = out_thresholds, Named("allocations") = out_allocations, // Include z values - Named("cluster_edge_prob") = out_cluster_prob, // Include cluster probabilities (i.e., the block matrix) Named("clusters") = out_K); // Include the sampled number of clusters } else { return List::create(Named("indicator") = out_indicator, diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index ef3d04f0..f39bb3e1 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -346,7 +346,7 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign, int sample_K_mfm_sbm(IntegerVector cluster_allocations, double dirichlet_alpha, NumericVector log_Vn, - int max_K = 500) { + int max_K) { // compute t = |C| (number of unique clusters i.e., the cardinality of C) IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes int t = cluster_size.size(); // Number of non-zero clusters diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h index 4588def0..69db07d4 100644 --- a/src/gibbs_functions_edge_prior.h +++ b/src/gibbs_functions_edge_prior.h @@ -26,7 +26,7 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign int sample_K_mfm_sbm(IntegerVector cluster_assign, double dirichlet_alpha, NumericVector log_Vn, - int max_K = 500); + int max_K); // ----------------------------------------------------------------------------| // Sample the block parameters for the MFM - SBM From 4543486fab840cf5b0252ee0b738b803a68fceaa Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Fri, 6 Dec 2024 15:30:20 +0100 Subject: [PATCH 04/21] take out saving K within the Gibbs sampler --- R/bgm.R | 2 -- src/gibbs_functions.cpp | 15 ++------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/R/bgm.R b/R/bgm.R index 096bb64d..432f4653 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -509,7 +509,6 @@ bgm = function(x, interactions = interactions, thresholds = thresholds, allocations = out$allocations, - clusters = out$clusters, arguments = arguments) } else { @@ -573,7 +572,6 @@ bgm = function(x, interactions = interactions, thresholds = thresholds, allocations = out$allocations, - clusters = out$clusters, arguments = arguments) } else { diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index 6c8c04ea..897334b5 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -868,7 +868,6 @@ List gibbs_sampler(IntegerMatrix observations, // store the allocation indices for each iteration NumericMatrix out_allocations(iter, no_variables); - IntegerVector out_K(iter); // store the number of clusters for each iteration if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations cluster_allocations[0] = 0; @@ -1057,8 +1056,7 @@ List gibbs_sampler(IntegerMatrix observations, return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, Named("thresholds") = out_thresholds, - Named("allocations") = out_allocations, - Named("clusters") = out_K); + Named("allocations") = out_allocations); } if(edge_selection == true && edge_prior != "Stochastic-Block") { return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, @@ -1179,14 +1177,6 @@ List gibbs_sampler(IntegerMatrix observations, theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]); } } - - // Sample the number of clusters (K) - int sampled_K = sample_K_mfm_sbm(cluster_allocations, - dirichlet_alpha, - log_Vn, - no_variables + 10); - // Store the sampled K value to out_K - out_K[iteration] = sampled_K; } } @@ -1277,8 +1267,7 @@ List gibbs_sampler(IntegerMatrix observations, return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, Named("thresholds") = out_thresholds, - Named("allocations") = out_allocations, // Include z values - Named("clusters") = out_K); // Include the sampled number of clusters + Named("allocations") = out_allocations); // Include the sampled number of clusters } else { return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, From f25e2f0556a359b77d3aedf06ad52b8eaac60d43 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Mon, 9 Dec 2024 13:32:17 +0100 Subject: [PATCH 05/21] Include the option to calculate the number of clusters and cluster allocations --- R/bgm.R | 31 +++++++++++++---- R/utility_functions.R | 56 ++++++++++++++++++++++++++++++ man/bgm.Rd | 25 ++++++++++--- src/gibbs_functions.cpp | 11 +----- src/gibbs_functions_edge_prior.cpp | 56 +----------------------------- src/gibbs_functions_edge_prior.h | 8 ----- vignettes/refs.bib | 30 ++++++++++++---- 7 files changed, 127 insertions(+), 90 deletions(-) diff --git a/R/bgm.R b/R/bgm.R index 432f4653..799eb625 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -111,12 +111,14 @@ #' model \code{edge_prior = "Stochastic-Block"} assumes that nodes can be #' organized into blocks or clusters. In principle, the assignment of nodes to #' such clusters is unknown, and the model as implemented here considers all -#' possible options \insertCite{@i.e., specifies a Dirichlet process on the node to block -#' allocation as described by @GengEtAl_2019}{bgms}. This model is advantageous +#' possible options \insertCite{@i.e., specifies a Dirichlet prior on the probability +#' of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous #' when nodes are expected to fall into distinct clusters. The inclusion #' probabilities for the edges are defined at the level of the clusters, with a #' beta prior for the unknown inclusion probability with shape parameters -#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}. The default is +#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet +#' prior on the cluster assignment probabilities with a common concentration +#' parameter \code{dirichlet_alpha}. The default is #' \code{edge_prior = "Bernoulli"}. #' @param inclusion_probability The prior edge inclusion probability for the #' Bernoulli model. Can be a single probability, or a matrix of \code{p} rows @@ -127,7 +129,7 @@ #' positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and #' \code{beta_bernoulli_beta = 1}. #' @param dirichlet_alpha The shape of the Dirichlet prior on the node-to-block -#' allocation parameters for the Stochastic Block model. +#' allocation probabilities for the Stochastic Block model. #' @param na.action How do you want the function to handle missing data? If #' \code{na.action = "listwise"}, listwise deletion is used. If #' \code{na.action = "impute"}, missing data are imputed iteratively during the @@ -156,8 +158,14 @@ #' ``blume-capel'' variables, the first entry is the parameter for the linear #' effect and the second entry is the parameter for the quadratic effect, which #' models the offset to the reference category. +#' \item In the case of +#' \code{edge_prior = "Stochastic-Block"} two additional elements are returned: +#' a vector \code{allocations} with the estimated cluster assignments of the nodes and an +#' table \code{clusters} with the estimated posterior probability of the number +#' of clusters in the network. The vector of node allocations is calculated using +#' a method proposed by \insertCite{Dahl2009}{bgms} and also used by +#' \insertCite{GengEtAl_2019}{bgms}. #' } -#' #' If \code{save = TRUE}, the result is a list of class ``bgms'' containing: #' \itemize{ #' \item \code{indicator}: A matrix with \code{iter} rows and @@ -169,8 +177,15 @@ #' \item \code{thresholds}: A matrix with \code{iter} rows and #' \code{sum(m)} columns, containing parameter states from every iteration of #' the Gibbs sampler for the category thresholds. +#' \item In the case of +#' \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the +#' cluster assignments of the nodes from each iteration is returned. This matrix +#' can be used to calculate the posterior probability of the number of clusters +#' by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function. #' } #' Column averages of these matrices provide the model-averaged posterior means. +#' Except for the \code{allocations} matrix, for which the \code{summary_SBM} +#' needs to be utilized. #' #' In addition to the analysis results, the bgm output lists some of the #' arguments of its call. This is useful for post-processing the results. @@ -505,10 +520,14 @@ bgm = function(x, if(edge_selection == TRUE) { if(edge_prior == "Stochastic-Block"){ + + summarySbm <- summary_SBM(cluster_allocations = out$allocations) + output = list(indicator = indicator, interactions = interactions, thresholds = thresholds, - allocations = out$allocations, + allocations = summarySbm$allocations, + clusters = summarySbm$no_clusters, arguments = arguments) } else { diff --git a/R/utility_functions.R b/R/utility_functions.R index 3acbcb1a..4c12ba61 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1007,3 +1007,59 @@ compare_reformat_data = function(x, missing_index_gr2 = missing_index_gr2, na_impute = na_impute)) } + +# Dahl's method to summarize the samples of the cluster_allocations +# This function was adapted from the R code accompanying the paper: +# Geng, J., Bhattacharya, A., & Pati, D. (2019). Probabilistic Community +# Detection With Unknown Number of Communities, Journal of the American +# Statistical Association, 114:526, 893-905, DOI:10.1080/01621459.2018.1458618 + +getDahl <- function(cluster_allocations) { + # Dimensions of the input matrix + niters <- nrow(cluster_allocations) # Number of iterations + n <- ncol(cluster_allocations) # Number of nodes + + # Compute membership matrices for each iteration + membershipMatrices <- apply(cluster_allocations, 1, function(clusterAssign) { + outer(clusterAssign, clusterAssign, FUN = "==") + }) + + # Reshape membershipMatrices into a list of matrices + membershipMatrices <- lapply(seq_len(niters), function(i) { + matrix(membershipMatrices[, i], n, n) + }) + + # Compute the average membership matrix + membershipAverage <- Reduce("+", membershipMatrices) / niters + + # Compute squared error for each iteration + SqError <- sapply(membershipMatrices, function(x, av) sum((x - av)^2), + av = membershipAverage) + + # Find the iteration with the minimum squared error + DahlIndex <- which.min(SqError) + + # Extract the cluster assignment corresponding to the best iteration + DahlAns <- cluster_allocations[DahlIndex, , drop = TRUE] + + return(DahlAns) +} + + +# A function that computes the cluster posterior probabilities and allocations + +summary_SBM <- function(cluster_allocations) { + # Compute the number of unique clusters for each iteration + clusters <- apply(cluster_allocations, 1, function(row) length(unique(row))) + + # Compute the posterior probabilities of the actual unique clusters + no_clusters <- table(clusters) / length(clusters) + + # Compute the allocations of the nodes based on Dahl's method + allocations <- getDahl(cluster_allocations) + + # Return the results + return(list(no_clusters = no_clusters, + allocations = allocations)) +} + diff --git a/man/bgm.Rd b/man/bgm.Rd index 6c8fe3d5..d091164b 100644 --- a/man/bgm.Rd +++ b/man/bgm.Rd @@ -101,12 +101,14 @@ complexity (number of edges) get the same prior weight. The Stochastic Block model \code{edge_prior = "Stochastic-Block"} assumes that nodes can be organized into blocks or clusters. In principle, the assignment of nodes to such clusters is unknown, and the model as implemented here considers all -possible options \insertCite{@i.e., specifies a Dirichlet process on the node to block -allocation as described by @GengEtAl_2019}{bgms}. This model is advantageous +possible options \insertCite{@i.e., specifies a Dirichlet prior on the probability +of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous when nodes are expected to fall into distinct clusters. The inclusion probabilities for the edges are defined at the level of the clusters, with a beta prior for the unknown inclusion probability with shape parameters -\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}. The default is +\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet +prior on the cluster assignment probabilities with a common concentration +parameter \code{dirichlet_alpha}. The default is \code{edge_prior = "Bernoulli"}.} \item{inclusion_probability}{The prior edge inclusion probability for the @@ -120,7 +122,7 @@ positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and \code{beta_bernoulli_beta = 1}.} \item{dirichlet_alpha}{The shape of the Dirichlet prior on the node-to-block -allocation parameters for the Stochastic Block model.} +allocation probabilities for the Stochastic Block model.} \item{na.action}{How do you want the function to handle missing data? If \code{na.action = "listwise"}, listwise deletion is used. If @@ -153,8 +155,14 @@ columns, containing model-averaged category thresholds. In the case of ``blume-capel'' variables, the first entry is the parameter for the linear effect and the second entry is the parameter for the quadratic effect, which models the offset to the reference category. +\item In the case of +\code{edge_prior = "Stochastic-Block"} two additional elements are returned: +a vector \code{allocations} with the estimated cluster assignments of the nodes and an +table \code{clusters} with the estimated posterior probability of the number +of clusters in the network. The vector of node allocations is calculated using +a method proposed by \insertCite{Dahl2009}{bgms} and also used by +\insertCite{GengEtAl_2019}{bgms}. } - If \code{save = TRUE}, the result is a list of class ``bgms'' containing: \itemize{ \item \code{indicator}: A matrix with \code{iter} rows and @@ -166,8 +174,15 @@ iteration of the Gibbs sampler for the pairwise associations. \item \code{thresholds}: A matrix with \code{iter} rows and \code{sum(m)} columns, containing parameter states from every iteration of the Gibbs sampler for the category thresholds. +\item In the case of +\code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the +cluster assignments of the nodes from each iteration is returned. This matrix +can be used to calculate the posterior probability of the number of clusters +by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function. } Column averages of these matrices provide the model-averaged posterior means. +Except for the \code{allocations} matrix, for which the \code{summary_SBM} +needs to be utilized. In addition to the analysis results, the bgm output lists some of the arguments of its call. This is useful for post-processing the results. diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index 897334b5..56e06236 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -1034,15 +1034,6 @@ List gibbs_sampler(IntegerMatrix observations, theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]); } } - - // Sample the number of clusters (K) - int sampled_K = sample_K_mfm_sbm(cluster_allocations, - dirichlet_alpha, - log_Vn, - no_variables + 10); - - // Store the sampled K value - K_values.push_back(sampled_K); } } } @@ -1267,7 +1258,7 @@ List gibbs_sampler(IntegerMatrix observations, return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, Named("thresholds") = out_thresholds, - Named("allocations") = out_allocations); // Include the sampled number of clusters + Named("allocations") = out_allocations); } else { return List::create(Named("indicator") = out_indicator, Named("interactions") = out_interactions, diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index f39bb3e1..b1837ebb 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -340,60 +340,6 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign, } -// ----------------------------------------------------------------------------| -// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison -// ----------------------------------------------------------------------------| -int sample_K_mfm_sbm(IntegerVector cluster_allocations, - double dirichlet_alpha, - NumericVector log_Vn, - int max_K) { - // compute t = |C| (number of unique clusters i.e., the cardinality of C) - IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes - int t = cluster_size.size(); // Number of non-zero clusters - - // Step 2: Compute p(k|t) using Equation 3.7 in Miller and Harrison (2018) - NumericVector log_p_K(max_K, R_NegInf); // Log-probabilities of k - for (int k = t; k < max_K; ++k) { - double log_term1 = std::lgamma(k + 1) - std::lgamma(k - t + 1); // (k choose t) - double log_term2 = k * std::log(dirichlet_alpha); // dirichlet_alpha^k - double log_term3 = -log_Vn[t - 1]; // Normalize by Vn - log_p_K[k] = log_term1 + log_term2 + log_term3; // Combine terms - } - - // Normalize probabilities using log-sum-exp - double log_sum_exp = -INFINITY; - for (int k = t; k < max_K; ++k) { - log_sum_exp = std::log1p(std::exp(log_p_K[k] - log_sum_exp)) + log_sum_exp; - } - NumericVector p_K(max_K, 0.0); - for (int k = t; k < max_K; ++k) { - p_K[k] = std::exp(log_p_K[k] - log_sum_exp); // Convert log-p to p - } - - // Step 3: Exclude zero and adjust probabilities for truncated Poisson - double p_zero = std::exp(-1.0); // P(k = 0) for Poisson(lambda = 1) - double truncated_normalizer = 1.0 - p_zero; - double cumulative_probability = 0.0; - - for (int k = t; k < max_K; ++k) { - p_K[k] /= truncated_normalizer; // Adjust probabilities to exclude zero - cumulative_probability += p_K[k]; - } - - // Step 4: Draw a uniform random number within [0, cumulative_probability] - double u = R::runif(0, cumulative_probability); - - // Step 5: sample from the truncated Poisson distribution - double lambda = 1.0; // Poisson parameter (fixed to 1) - int sampled_k = R::qpois(u * truncated_normalizer, lambda, true, false); - - // Ensure k is at least t - return std::max(t, std::min(sampled_k, max_K - 1)); -} - - - - // ----------------------------------------------------------------------------| // Sample the block parameters for the MFM - SBM // ----------------------------------------------------------------------------| @@ -431,4 +377,4 @@ NumericMatrix block_probs_mfm_sbm(IntegerVector cluster_assign, } return block_probs; -} \ No newline at end of file +} diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h index 69db07d4..827c4ea9 100644 --- a/src/gibbs_functions_edge_prior.h +++ b/src/gibbs_functions_edge_prior.h @@ -20,14 +20,6 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign double beta_bernoulli_alpha, double beta_bernoulli_beta); -// ----------------------------------------------------------------------------| -// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison -// ----------------------------------------------------------------------------| -int sample_K_mfm_sbm(IntegerVector cluster_assign, - double dirichlet_alpha, - NumericVector log_Vn, - int max_K); - // ----------------------------------------------------------------------------| // Sample the block parameters for the MFM - SBM // ----------------------------------------------------------------------------| diff --git a/vignettes/refs.bib b/vignettes/refs.bib index 44c43c84..0a152448 100644 --- a/vignettes/refs.bib +++ b/vignettes/refs.bib @@ -1,11 +1,29 @@ -@article{SekulovskiEtAl_2023, - title={Testing conditional independence in psychometric networks: An analysis of three bayesian methods}, - author={Sekulovski, N. and Keetelaar, S. and Huth, K. and Wagenmakers, {E.-J.} and van Bork, R. and van den Bergh, D. and Marsman, M.}, - journal={Multivariate Behavioral Research}, - doi = {10.1080/00273171.2024.2345915}, - year={in press} + +@article{Dahl2009, + author = {David B. Dahl}, + title = {Modal clustering in a class of product partition models}, + journal = {Bayesian Analysis}, + volume = {4}, + number = {2}, + pages = {243--264}, + year = {2009}, + month = {June}, + doi = {10.1214/09-BA409}, + url = {https://doi.org/10.1214/09-BA409} } +@article{SekulovskiEtAl_2023, + author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.}, + date-added = {2023-06-12 16:18:37 +0200}, + date-modified = {2024-12-07 10:05:32 +0100}, + doi = {10.1080/00273171.2024.2345915}, + journal = {Multivariate Behavioral Research}, + pages = {913--933}, + title = {Testing conditional independence in psychometric networks: {A}n analysis of three {B}ayesian methods}, + volume = {59}, + year = {2024}, + bdsk-url-1 = {https://doi.org/10.1080/00273171.2024.2345915}} + @article{GengEtAl_2019, title={Probabilistic community detection with unknown number of communities}, author={Geng, J. and Bhattacharya, A. and Pati, D.}, From 02be0e9b4eb7dfb1d4c79fb9a4f7285078ba0186 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Mon, 9 Dec 2024 14:30:20 +0100 Subject: [PATCH 06/21] update the references --- inst/REFERENCES.bib | 27 +++++++++++++++++++++------ vignettes/refs.bib | 15 --------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib index 1d63bbfe..18afffc3 100644 --- a/inst/REFERENCES.bib +++ b/inst/REFERENCES.bib @@ -1,3 +1,16 @@ +@article{Dahl2009, + author = {David B. Dahl}, + title = {Modal clustering in a class of product partition models}, + journal = {Bayesian Analysis}, + volume = {4}, + number = {2}, + pages = {243--264}, + year = {2009}, + month = {June}, + doi = {10.1214/09-BA409}, + url = {https://doi.org/10.1214/09-BA409} +} + @article{GengEtAl_2019, title={Probabilistic community detection with unknown number of communities}, author={Geng, J. and Bhattacharya, A. and Pati, D.}, @@ -8,12 +21,14 @@ @article{GengEtAl_2019 year={2019}} @article{SekulovskiEtAl_2023, - title={Testing conditional independence in psychometric networks: An analysis of three bayesian methods}, - author={Sekulovski, N. and Keetelaar, S. and Huth, K. and Wagenmakers, {E.-J.} and van Bork, R. and van den Bergh, D. and Marsman, M.}, - journal={Multivariate Behavioral Research}, - doi = {10.1080/00273171.2024.2345915}, - year={in press} -} + author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.}, + doi = {10.1080/00273171.2024.2345915}, + journal = {Multivariate Behavioral Research}, + pages = {913--933}, + title = {Testing conditional independence in psychometric networks: {A}n analysis of three {B}ayesian methods}, + volume = {59}, + year = {2024}, + bdsk-url-1 = {https://doi.org/10.1080/00273171.2024.2345915}} @article{Dienes_2014, author = {Dienes, Z.}, diff --git a/vignettes/refs.bib b/vignettes/refs.bib index 0a152448..1e76345a 100644 --- a/vignettes/refs.bib +++ b/vignettes/refs.bib @@ -1,21 +1,6 @@ -@article{Dahl2009, - author = {David B. Dahl}, - title = {Modal clustering in a class of product partition models}, - journal = {Bayesian Analysis}, - volume = {4}, - number = {2}, - pages = {243--264}, - year = {2009}, - month = {June}, - doi = {10.1214/09-BA409}, - url = {https://doi.org/10.1214/09-BA409} -} - @article{SekulovskiEtAl_2023, author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.}, - date-added = {2023-06-12 16:18:37 +0200}, - date-modified = {2024-12-07 10:05:32 +0100}, doi = {10.1080/00273171.2024.2345915}, journal = {Multivariate Behavioral Research}, pages = {913--933}, From 73d42b8b177f19bdd9adf0387014a1acedc8acfd Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Thu, 19 Dec 2024 13:43:14 +0100 Subject: [PATCH 07/21] Update: Include the Rao-Rlackwellized estimates for the probabilities of the number of components --- R/RcppExports.R | 4 ++ R/bgm.R | 4 +- R/utility_functions.R | 77 +++++++++++++++++++++++++++--- src/RcppExports.cpp | 14 ++++++ src/gibbs_functions_edge_prior.cpp | 1 + 5 files changed, 93 insertions(+), 7 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index e1dcd9d6..c5a186f9 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -17,3 +17,7 @@ compare_gibbs_sampler <- function(observations_gr1, observations_gr2, no_categor .Call(`_bgms_compare_gibbs_sampler`, observations_gr1, observations_gr2, no_categories_gr1, no_categories_gr2, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, n_cat_obs_gr1, n_cat_obs_gr2, sufficient_blume_capel_gr1, sufficient_blume_capel_gr2, threshold_alpha, threshold_beta, na_impute, missing_index_gr1, missing_index_gr2, ordinal_variable, reference_category, independent_thresholds, save, display_progress, difference_selection) } +compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max) { + .Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max) +} + diff --git a/R/bgm.R b/R/bgm.R index 799eb625..ce2ef829 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -521,13 +521,15 @@ bgm = function(x, if(edge_selection == TRUE) { if(edge_prior == "Stochastic-Block"){ - summarySbm <- summary_SBM(cluster_allocations = out$allocations) + summarySbm <- summary_SBM(cluster_allocations = out$allocations, + dirichlet_alpha = dirichlet_alpha) output = list(indicator = indicator, interactions = interactions, thresholds = thresholds, allocations = summarySbm$allocations, clusters = summarySbm$no_clusters, + r_b_cluster_probability = summarySbm$r_b_cluster_prob, arguments = arguments) } else { diff --git a/R/utility_functions.R b/R/utility_functions.R index 4c12ba61..56476564 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1013,7 +1013,6 @@ compare_reformat_data = function(x, # Geng, J., Bhattacharya, A., & Pati, D. (2019). Probabilistic Community # Detection With Unknown Number of Communities, Journal of the American # Statistical Association, 114:526, 893-905, DOI:10.1080/01621459.2018.1458618 - getDahl <- function(cluster_allocations) { # Dimensions of the input matrix niters <- nrow(cluster_allocations) # Number of iterations @@ -1045,21 +1044,87 @@ getDahl <- function(cluster_allocations) { return(DahlAns) } +# Calculate the conditional probability of the number of +# components given the cardinality of a sampled +# allocation vector based on Equation (3.7) from +# Miller & Harrison (2018). Mixture Models With a Prior on the Number of +# Components, Journal of the American Statistical Association, 113:521, 340-356, +# DOI:10.1080/01621459.2016.1255636 + +compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) { + # Define the K_values + K_values <- as.numeric(1:no_variables) + # Initialize vector for probabilities + p_k_given_t <- numeric(length(K_values)) + # The rate hyperparameter of the Poisson is fixed to 1 + lambda <- 1 + # Normalization constant for t + log_vn_t <- log_Vn[t] + + # Loop through each value of K + for (i in seq_along(K_values)) { + K <- K_values[i] + + if (K >= t) { + # Falling factorial k_(t) = prod(k - i) for i = 0:(t-1) + falling_factorial <- prod(K:(K - t + 1)) + + # Rising factorial (dirichlet_alpha k)^(n) = prod(dirichlet_alpha k + i) for i = 0:(no_variables-1) + rising_factorial <- prod((dirichlet_alpha * K) + 0:(no_variables - 1)) + + # Poisson probability p_K(k) = (lambda^k * exp(-lambda)) / k! + poisson_pmf <- dpois(K, lambda) + + # Compute log probability + log_p_k <- log(falling_factorial) - log(rising_factorial) + log(poisson_pmf) - log_vn_t + + # Convert log probability to probability + p_k_given_t[i] <- exp(log_p_k) + } else { + # Set probability to zero if K < t + p_k_given_t[i] <- 0 + } + } + # Normalize probabilities + p_k_given_t <- p_k_given_t / sum(p_k_given_t) -# A function that computes the cluster posterior probabilities and allocations + # Return a vector of length no_variables with the probabilities + return(p_k_given_t) +} -summary_SBM <- function(cluster_allocations) { - # Compute the number of unique clusters for each iteration - clusters <- apply(cluster_allocations, 1, function(row) length(unique(row))) +# A function that computes the posterior probabilities of the +# number of sampled clusters, the Rao-Blackwellized probabilities +# and the allocations of the nodes based on Dahl's method +summary_SBM <- function(cluster_allocations, dirichlet_alpha) { + + # precompute things + no_variables <- ncol(cluster_allocations) + log_Vn <- compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, no_variables) + # Compute the number of unique clusters (t) for each iteration + # i.e., the cardinality of the partition z + clusters <- apply(cluster_allocations, 1, function(row) length(unique(row))) # Compute the posterior probabilities of the actual unique clusters no_clusters <- table(clusters) / length(clusters) + # Compute the conditional probabilities of the number of clusters + # for each row in clusters + p_k_given_t <- matrix(NA, nrow = length(clusters), ncol = no_variables) + + for (i in 1:length(clusters)) { + p_k_given_t[i, ] <- compute_p_k_given_t(clusters[i], + log_Vn, + dirichlet_alpha, + no_variables) + } + # average across all iterations + p_k_given_t <- colMeans(p_k_given_t) + # Compute the allocations of the nodes based on Dahl's method allocations <- getDahl(cluster_allocations) # Return the results return(list(no_clusters = no_clusters, + r_b_cluster_prob = p_k_given_t, # the Rao-Blackwellized prob allocations = allocations)) } - diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d1858a8f..20931308 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -123,12 +123,26 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// compute_Vn_mfm_sbm +NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max); +RcppExport SEXP _bgms_compute_Vn_mfm_sbm(SEXP no_variablesSEXP, SEXP dirichlet_alphaSEXP, SEXP t_maxSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< int >::type no_variables(no_variablesSEXP); + Rcpp::traits::input_parameter< double >::type dirichlet_alpha(dirichlet_alphaSEXP); + Rcpp::traits::input_parameter< int >::type t_max(t_maxSEXP); + rcpp_result_gen = Rcpp::wrap(compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, t_max)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_bgms_sample_omrf_gibbs", (DL_FUNC) &_bgms_sample_omrf_gibbs, 6}, {"_bgms_sample_bcomrf_gibbs", (DL_FUNC) &_bgms_sample_bcomrf_gibbs, 8}, {"_bgms_gibbs_sampler", (DL_FUNC) &_bgms_gibbs_sampler, 27}, {"_bgms_compare_gibbs_sampler", (DL_FUNC) &_bgms_compare_gibbs_sampler, 32}, + {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 3}, {NULL, NULL, 0} }; diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index 0cdbc535..be7ed6eb 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -82,6 +82,7 @@ NumericMatrix add_row_col_block_prob_matrix(NumericMatrix X, // ----------------------------------------------------------------------------| // Compute partition coefficient for the MFM - SBM // ----------------------------------------------------------------------------| +// [[Rcpp::export]] NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max) { From 28394fe12f391ac9c7b1ec82cd81f68ec5c788a4 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Fri, 20 Dec 2024 16:42:36 +0100 Subject: [PATCH 08/21] Update: Include the proper calculation of the probability of the number of components. Also, some fixes in the roxygen code. --- R/bgm.R | 33 ++++++++++++++++++++------------- R/utility_functions.R | 13 +++++++------ inst/REFERENCES.bib | 12 ++++++++++++ man/bgm.Rd | 30 +++++++++++++++++++----------- 4 files changed, 58 insertions(+), 30 deletions(-) diff --git a/R/bgm.R b/R/bgm.R index ce2ef829..07e4d566 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -147,7 +147,7 @@ #' The default is \code{TRUE}. #' #' @return If \code{save = FALSE} (the default), the result is a list of class -#' ``bgms'' containing the following matrices with model-averaged quantities: +#' \code{"bgms"} containing the following matrices with model-averaged quantities: #' \itemize{ #' \item \code{indicator}: A matrix with \code{p} rows and \code{p} columns, #' containing the posterior inclusion probabilities of individual edges. @@ -155,16 +155,23 @@ #' containing model-averaged posterior means of the pairwise associations. #' \item \code{thresholds}: A matrix with \code{p} rows and \code{max(m)} #' columns, containing model-averaged category thresholds. In the case of -#' ``blume-capel'' variables, the first entry is the parameter for the linear +#' \code{"blume-capel"} variables, the first entry is the parameter for the linear #' effect and the second entry is the parameter for the quadratic effect, which #' models the offset to the reference category. -#' \item In the case of -#' \code{edge_prior = "Stochastic-Block"} two additional elements are returned: -#' a vector \code{allocations} with the estimated cluster assignments of the nodes and an -#' table \code{clusters} with the estimated posterior probability of the number -#' of clusters in the network. The vector of node allocations is calculated using -#' a method proposed by \insertCite{Dahl2009}{bgms} and also used by -#' \insertCite{GengEtAl_2019}{bgms}. +#' \item In the case of \code{edge_prior = "Stochastic-Block"}, two additional +#' elements are returned: +#' \itemize{ +#' \item A vector \code{allocations} with the estimated cluster assignments +#' of the nodes, calculated using a method proposed by \code{Dahl (2009)} and +#' also used by \code{Geng et al. (2019)}. +#' \item A matrix \code{components} with the estimated posterior probability +#' of the number of components (clusters) in the network. These probabilities +#' are calculated based on Equation 3.7 in \code{Miller (2018)}, which computes +#' the conditional probability of the number of components given the +#' number of clusters. The number of clusters is derived from the +#' cardinality of the sampled \code{allocations} vector for each iteration of +#' the MCMC sampler (see \code{save = TRUE}). +#' } #' } #' If \code{save = TRUE}, the result is a list of class ``bgms'' containing: #' \itemize{ @@ -180,8 +187,9 @@ #' \item In the case of #' \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the #' cluster assignments of the nodes from each iteration is returned. This matrix -#' can be used to calculate the posterior probability of the number of clusters -#' by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function. +#' can be used to calculate the posterior probability of the number of components +#' and estimated cluster assignments of the nodes, by and utilizing the +#' \code{summary_SBM()} function. #' } #' Column averages of these matrices provide the model-averaged posterior means. #' Except for the \code{allocations} matrix, for which the \code{summary_SBM} @@ -527,9 +535,8 @@ bgm = function(x, output = list(indicator = indicator, interactions = interactions, thresholds = thresholds, + components = summarySbm$components, allocations = summarySbm$allocations, - clusters = summarySbm$no_clusters, - r_b_cluster_probability = summarySbm$r_b_cluster_prob, arguments = arguments) } else { diff --git a/R/utility_functions.R b/R/utility_functions.R index 56476564..cef1a076 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1094,7 +1094,7 @@ compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) { # A function that computes the posterior probabilities of the -# number of sampled clusters, the Rao-Blackwellized probabilities +# number of components K given the cardinality of the partition t # and the allocations of the nodes based on Dahl's method summary_SBM <- function(cluster_allocations, dirichlet_alpha) { @@ -1104,9 +1104,6 @@ summary_SBM <- function(cluster_allocations, dirichlet_alpha) { # Compute the number of unique clusters (t) for each iteration # i.e., the cardinality of the partition z clusters <- apply(cluster_allocations, 1, function(row) length(unique(row))) - # Compute the posterior probabilities of the actual unique clusters - no_clusters <- table(clusters) / length(clusters) - # Compute the conditional probabilities of the number of clusters # for each row in clusters p_k_given_t <- matrix(NA, nrow = length(clusters), ncol = no_variables) @@ -1120,11 +1117,15 @@ summary_SBM <- function(cluster_allocations, dirichlet_alpha) { # average across all iterations p_k_given_t <- colMeans(p_k_given_t) + # make it as a matrix + no_components <- 1:no_variables + components <- cbind(no_components, p_k_given_t) + colnames(components) <- c("no_components", "probability") + # Compute the allocations of the nodes based on Dahl's method allocations <- getDahl(cluster_allocations) # Return the results - return(list(no_clusters = no_clusters, - r_b_cluster_prob = p_k_given_t, # the Rao-Blackwellized prob + return(list(components = components, allocations = allocations)) } diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib index 18afffc3..ee85e281 100644 --- a/inst/REFERENCES.bib +++ b/inst/REFERENCES.bib @@ -1,3 +1,15 @@ +@ARTICLE{miller2018mixture, + title={Mixture models with a prior on the number of components}, + author={Miller, Jeffrey W and Harrison, Matthew T}, + journal={Journal of the American Statistical Association}, + volume={113}, + number={521}, + pages={340--356}, + year={2018}, + doi = {10.1080/01621459.2016.1255636}, + publisher={Taylor \& Francis} +} + @article{Dahl2009, author = {David B. Dahl}, title = {Modal clustering in a class of product partition models}, diff --git a/man/bgm.Rd b/man/bgm.Rd index d091164b..0b573a11 100644 --- a/man/bgm.Rd +++ b/man/bgm.Rd @@ -144,7 +144,7 @@ The default is \code{TRUE}.} } \value{ If \code{save = FALSE} (the default), the result is a list of class -``bgms'' containing the following matrices with model-averaged quantities: +\code{"bgms"} containing the following matrices with model-averaged quantities: \itemize{ \item \code{indicator}: A matrix with \code{p} rows and \code{p} columns, containing the posterior inclusion probabilities of individual edges. @@ -152,16 +152,23 @@ containing the posterior inclusion probabilities of individual edges. containing model-averaged posterior means of the pairwise associations. \item \code{thresholds}: A matrix with \code{p} rows and \code{max(m)} columns, containing model-averaged category thresholds. In the case of -``blume-capel'' variables, the first entry is the parameter for the linear +\code{"blume-capel"} variables, the first entry is the parameter for the linear effect and the second entry is the parameter for the quadratic effect, which models the offset to the reference category. -\item In the case of -\code{edge_prior = "Stochastic-Block"} two additional elements are returned: -a vector \code{allocations} with the estimated cluster assignments of the nodes and an -table \code{clusters} with the estimated posterior probability of the number -of clusters in the network. The vector of node allocations is calculated using -a method proposed by \insertCite{Dahl2009}{bgms} and also used by -\insertCite{GengEtAl_2019}{bgms}. +\item In the case of \code{edge_prior = "Stochastic-Block"}, two additional +elements are returned: + \itemize{ + \item A vector \code{allocations} with the estimated cluster assignments + of the nodes, calculated using a method proposed by \code{Dahl (2009)} and + also used by \code{Geng et al. (2019)}. + \item A table \code{components} with the estimated posterior probability + of the number of components (clusters) in the network. These probabilities + are calculated based on Equation 3.7 in \code{Miller (2018)}, which computes + the conditional probability of the number of components given the + number of clusters. The number of clusters is derived from the + cardinality of the sampled \code{allocations} vector for each iteration of + the MCMC sampler (see \code{save = TRUE}). + } } If \code{save = TRUE}, the result is a list of class ``bgms'' containing: \itemize{ @@ -177,8 +184,9 @@ the Gibbs sampler for the category thresholds. \item In the case of \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the cluster assignments of the nodes from each iteration is returned. This matrix -can be used to calculate the posterior probability of the number of clusters -by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function. +can be used to calculate the posterior probability of the number of components +and estimated cluster assignments of the nodes, by and utilizing the +\code{summary_SBM()} function. } Column averages of these matrices provide the model-averaged posterior means. Except for the \code{allocations} matrix, for which the \code{summary_SBM} From b8f3de58200e2d31a37d2dd8e61ffbdce1f4841d Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Mon, 23 Dec 2024 13:50:57 +0100 Subject: [PATCH 09/21] update: initial configuration of the cluster memberships. --- src/gibbs_functions.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index 56e06236..df82d3c1 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -870,15 +870,14 @@ List gibbs_sampler(IntegerMatrix observations, NumericMatrix out_allocations(iter, no_variables); if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations - cluster_allocations[0] = 0; - cluster_allocations[1] = 1; - for(int i = 2; i < no_variables; i++) { + // Randomly choose the number of clusters (k) between 1 and no_variables + int k = static_cast(R::unif_rand() * no_variables) + 1; + + // Assign each variable to a random cluster + for (int i = 0; i < no_variables; i++) { double U = R::unif_rand(); - if(U > 0.5){ - cluster_allocations[i] = 1; - } else { - cluster_allocations[i] = 0; - } + int cluster = static_cast(U * k); // Randomly select a cluster (0 to k-1) + cluster_allocations[i] = cluster; } cluster_prob = block_probs_mfm_sbm(cluster_allocations, @@ -896,7 +895,7 @@ List gibbs_sampler(IntegerMatrix observations, log_Vn = compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, - no_variables + 10); + no_variables); } //The Gibbs sampler ---------------------------------------------------------- From 7149ae1b5f34f72e53dabd75428418900fff4e1f Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Thu, 2 Jan 2025 14:29:49 +0100 Subject: [PATCH 10/21] delete unnecessary object --- src/gibbs_functions.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index df82d3c1..b88ab003 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -812,7 +812,6 @@ List gibbs_sampler(IntegerMatrix observations, int no_interactions = Index.nrow(); int no_thresholds = sum(no_categories); int max_no_categories = max(no_categories); - IntegerVector K_values; // To store sampled K values IntegerVector v = seq(0, no_interactions - 1); From 39e3ab0f4e2e45c69144783ccc85588fa017d574 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Fri, 3 Jan 2025 13:35:47 +0100 Subject: [PATCH 11/21] correction to the truncated Poisson distribution --- R/utility_functions.R | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/R/utility_functions.R b/R/utility_functions.R index cef1a076..b9d4302e 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1061,34 +1061,28 @@ compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) { # Normalization constant for t log_vn_t <- log_Vn[t] + # Normalizing factor for the truncated Poisson distribution + norm_factor <- 1 - dpois(0, lambda) + truncated_poisson_pmf <- dpois(K_values, lambda) / norm_factor + # Loop through each value of K for (i in seq_along(K_values)) { K <- K_values[i] - if (K >= t) { - # Falling factorial k_(t) = prod(k - i) for i = 0:(t-1) + # Falling factorial falling_factorial <- prod(K:(K - t + 1)) - - # Rising factorial (dirichlet_alpha k)^(n) = prod(dirichlet_alpha k + i) for i = 0:(no_variables-1) + # Rising factorial rising_factorial <- prod((dirichlet_alpha * K) + 0:(no_variables - 1)) - - # Poisson probability p_K(k) = (lambda^k * exp(-lambda)) / k! - poisson_pmf <- dpois(K, lambda) - # Compute log probability - log_p_k <- log(falling_factorial) - log(rising_factorial) + log(poisson_pmf) - log_vn_t - + log_p_k <- log(falling_factorial) - log(rising_factorial) + log(truncated_poisson_pmf[i]) - log_vn_t # Convert log probability to probability p_k_given_t[i] <- exp(log_p_k) } else { - # Set probability to zero if K < t p_k_given_t[i] <- 0 } } # Normalize probabilities p_k_given_t <- p_k_given_t / sum(p_k_given_t) - - # Return a vector of length no_variables with the probabilities return(p_k_given_t) } From 6a8ed38bb4dc20fea1210f170b938d98a500c875 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Fri, 10 Jan 2025 14:10:28 +0100 Subject: [PATCH 12/21] update: randomize the order of the variables in the for loopp in the block_allocations_mfm_sbm function --- src/gibbs_functions_edge_prior.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index be7ed6eb..6da4100a 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -228,7 +228,11 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign, double loglike; double logmarg; - for(int node = 0; node < no_variables; node++) { + // Generate a randomized order using Rcpp's sample function + IntegerVector indices = sample(no_variables, no_variables); + + for (int idx = 0; idx < no_variables; ++idx) { + int node = indices[idx] - 1; // Convert to zero-based index old = cluster_assign[node]; IntegerVector cluster_size = table_cpp(cluster_assign); From 662d1c69efb6aa9e6750bc90836451da32b3f0d2 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Mon, 13 Jan 2025 15:01:38 +0100 Subject: [PATCH 13/21] update: include a user-specified lambda --- R/RcppExports.R | 8 ++++---- R/bgm.R | 6 +++++- R/utility_functions.R | 22 ++++++++++++++++------ src/RcppExports.cpp | 18 ++++++++++-------- src/gibbs_functions.cpp | 4 +++- src/gibbs_functions_edge_prior.cpp | 7 ++++++- src/gibbs_functions_edge_prior.h | 3 ++- 7 files changed, 46 insertions(+), 22 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index c5a186f9..ff281bfa 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,15 +9,15 @@ sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interact .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter) } -gibbs_sampler <- function(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save = FALSE, display_progress = FALSE, edge_selection = TRUE) { - .Call(`_bgms_gibbs_sampler`, observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection) +gibbs_sampler <- function(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save = FALSE, display_progress = FALSE, edge_selection = TRUE) { + .Call(`_bgms_gibbs_sampler`, observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection) } compare_gibbs_sampler <- function(observations_gr1, observations_gr2, no_categories_gr1, no_categories_gr2, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, n_cat_obs_gr1, n_cat_obs_gr2, sufficient_blume_capel_gr1, sufficient_blume_capel_gr2, threshold_alpha, threshold_beta, na_impute, missing_index_gr1, missing_index_gr2, ordinal_variable, reference_category, independent_thresholds, save = FALSE, display_progress = FALSE, difference_selection = TRUE) { .Call(`_bgms_compare_gibbs_sampler`, observations_gr1, observations_gr2, no_categories_gr1, no_categories_gr2, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, n_cat_obs_gr1, n_cat_obs_gr2, sufficient_blume_capel_gr1, sufficient_blume_capel_gr2, threshold_alpha, threshold_beta, na_impute, missing_index_gr1, missing_index_gr2, ordinal_variable, reference_category, independent_thresholds, save, display_progress, difference_selection) } -compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max) { - .Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max) +compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) { + .Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max, lambda) } diff --git a/R/bgm.R b/R/bgm.R index 07e4d566..3ead1e00 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -300,6 +300,7 @@ bgm = function(x, beta_bernoulli_alpha = 1, beta_bernoulli_beta = 1, dirichlet_alpha = 1, + lambda = 1, na.action = c("listwise", "impute"), save = FALSE, display_progress = TRUE) { @@ -452,6 +453,7 @@ bgm = function(x, beta_bernoulli_alpha = beta_bernoulli_alpha, beta_bernoulli_beta = beta_bernoulli_beta, dirichlet_alpha = dirichlet_alpha, + lambda = lambda, Index = Index, iter = iter, burnin = burnin, @@ -485,6 +487,7 @@ bgm = function(x, beta_bernoulli_alpha = beta_bernoulli_alpha , beta_bernoulli_beta = beta_bernoulli_beta, dirichlet_alpha = dirichlet_alpha, + lambda = lambda, na.action = na.action, save = save, version = packageVersion("bgms") @@ -530,7 +533,8 @@ bgm = function(x, if(edge_prior == "Stochastic-Block"){ summarySbm <- summary_SBM(cluster_allocations = out$allocations, - dirichlet_alpha = dirichlet_alpha) + dirichlet_alpha = dirichlet_alpha, + lambda = lambda) output = list(indicator = indicator, interactions = interactions, diff --git a/R/utility_functions.R b/R/utility_functions.R index b9d4302e..2829959a 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1051,13 +1051,17 @@ getDahl <- function(cluster_allocations) { # Components, Journal of the American Statistical Association, 113:521, 340-356, # DOI:10.1080/01621459.2016.1255636 -compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) { +compute_p_k_given_t <- function(t, + log_Vn, + dirichlet_alpha, + no_variables, + lambda) { # Define the K_values K_values <- as.numeric(1:no_variables) + # Initialize vector for probabilities p_k_given_t <- numeric(length(K_values)) - # The rate hyperparameter of the Poisson is fixed to 1 - lambda <- 1 + # Normalization constant for t log_vn_t <- log_Vn[t] @@ -1090,11 +1094,16 @@ compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) { # A function that computes the posterior probabilities of the # number of components K given the cardinality of the partition t # and the allocations of the nodes based on Dahl's method -summary_SBM <- function(cluster_allocations, dirichlet_alpha) { +summary_SBM <- function(cluster_allocations, + dirichlet_alpha, + lambda) { # precompute things no_variables <- ncol(cluster_allocations) - log_Vn <- compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, no_variables) + log_Vn <- compute_Vn_mfm_sbm(no_variables, + dirichlet_alpha, + no_variables, + lambda) # Compute the number of unique clusters (t) for each iteration # i.e., the cardinality of the partition z clusters <- apply(cluster_allocations, 1, function(row) length(unique(row))) @@ -1106,7 +1115,8 @@ summary_SBM <- function(cluster_allocations, dirichlet_alpha) { p_k_given_t[i, ] <- compute_p_k_given_t(clusters[i], log_Vn, dirichlet_alpha, - no_variables) + no_variables, + lambda) } # average across all iterations p_k_given_t <- colMeans(p_k_given_t) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 20931308..e777dff1 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -45,8 +45,8 @@ BEGIN_RCPP END_RCPP } // gibbs_sampler -List gibbs_sampler(IntegerMatrix observations, IntegerMatrix indicator, NumericMatrix interactions, NumericMatrix thresholds, IntegerVector no_categories, double interaction_scale, NumericMatrix proposal_sd, NumericMatrix proposal_sd_blumecapel, String edge_prior, NumericMatrix theta, double beta_bernoulli_alpha, double beta_bernoulli_beta, double dirichlet_alpha, IntegerMatrix Index, int iter, int burnin, IntegerMatrix n_cat_obs, IntegerMatrix sufficient_blume_capel, double threshold_alpha, double threshold_beta, bool na_impute, IntegerMatrix missing_index, LogicalVector variable_bool, IntegerVector reference_category, bool save, bool display_progress, bool edge_selection); -RcppExport SEXP _bgms_gibbs_sampler(SEXP observationsSEXP, SEXP indicatorSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP no_categoriesSEXP, SEXP interaction_scaleSEXP, SEXP proposal_sdSEXP, SEXP proposal_sd_blumecapelSEXP, SEXP edge_priorSEXP, SEXP thetaSEXP, SEXP beta_bernoulli_alphaSEXP, SEXP beta_bernoulli_betaSEXP, SEXP dirichlet_alphaSEXP, SEXP IndexSEXP, SEXP iterSEXP, SEXP burninSEXP, SEXP n_cat_obsSEXP, SEXP sufficient_blume_capelSEXP, SEXP threshold_alphaSEXP, SEXP threshold_betaSEXP, SEXP na_imputeSEXP, SEXP missing_indexSEXP, SEXP variable_boolSEXP, SEXP reference_categorySEXP, SEXP saveSEXP, SEXP display_progressSEXP, SEXP edge_selectionSEXP) { +List gibbs_sampler(IntegerMatrix observations, IntegerMatrix indicator, NumericMatrix interactions, NumericMatrix thresholds, IntegerVector no_categories, double interaction_scale, NumericMatrix proposal_sd, NumericMatrix proposal_sd_blumecapel, String edge_prior, NumericMatrix theta, double beta_bernoulli_alpha, double beta_bernoulli_beta, double dirichlet_alpha, double lambda, IntegerMatrix Index, int iter, int burnin, IntegerMatrix n_cat_obs, IntegerMatrix sufficient_blume_capel, double threshold_alpha, double threshold_beta, bool na_impute, IntegerMatrix missing_index, LogicalVector variable_bool, IntegerVector reference_category, bool save, bool display_progress, bool edge_selection); +RcppExport SEXP _bgms_gibbs_sampler(SEXP observationsSEXP, SEXP indicatorSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP no_categoriesSEXP, SEXP interaction_scaleSEXP, SEXP proposal_sdSEXP, SEXP proposal_sd_blumecapelSEXP, SEXP edge_priorSEXP, SEXP thetaSEXP, SEXP beta_bernoulli_alphaSEXP, SEXP beta_bernoulli_betaSEXP, SEXP dirichlet_alphaSEXP, SEXP lambdaSEXP, SEXP IndexSEXP, SEXP iterSEXP, SEXP burninSEXP, SEXP n_cat_obsSEXP, SEXP sufficient_blume_capelSEXP, SEXP threshold_alphaSEXP, SEXP threshold_betaSEXP, SEXP na_imputeSEXP, SEXP missing_indexSEXP, SEXP variable_boolSEXP, SEXP reference_categorySEXP, SEXP saveSEXP, SEXP display_progressSEXP, SEXP edge_selectionSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -63,6 +63,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type beta_bernoulli_alpha(beta_bernoulli_alphaSEXP); Rcpp::traits::input_parameter< double >::type beta_bernoulli_beta(beta_bernoulli_betaSEXP); Rcpp::traits::input_parameter< double >::type dirichlet_alpha(dirichlet_alphaSEXP); + Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP); Rcpp::traits::input_parameter< IntegerMatrix >::type Index(IndexSEXP); Rcpp::traits::input_parameter< int >::type iter(iterSEXP); Rcpp::traits::input_parameter< int >::type burnin(burninSEXP); @@ -77,7 +78,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< bool >::type save(saveSEXP); Rcpp::traits::input_parameter< bool >::type display_progress(display_progressSEXP); Rcpp::traits::input_parameter< bool >::type edge_selection(edge_selectionSEXP); - rcpp_result_gen = Rcpp::wrap(gibbs_sampler(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection)); + rcpp_result_gen = Rcpp::wrap(gibbs_sampler(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection)); return rcpp_result_gen; END_RCPP } @@ -124,15 +125,16 @@ BEGIN_RCPP END_RCPP } // compute_Vn_mfm_sbm -NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max); -RcppExport SEXP _bgms_compute_Vn_mfm_sbm(SEXP no_variablesSEXP, SEXP dirichlet_alphaSEXP, SEXP t_maxSEXP) { +NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max, double lambda); +RcppExport SEXP _bgms_compute_Vn_mfm_sbm(SEXP no_variablesSEXP, SEXP dirichlet_alphaSEXP, SEXP t_maxSEXP, SEXP lambdaSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< int >::type no_variables(no_variablesSEXP); Rcpp::traits::input_parameter< double >::type dirichlet_alpha(dirichlet_alphaSEXP); Rcpp::traits::input_parameter< int >::type t_max(t_maxSEXP); - rcpp_result_gen = Rcpp::wrap(compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, t_max)); + Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP); + rcpp_result_gen = Rcpp::wrap(compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, t_max, lambda)); return rcpp_result_gen; END_RCPP } @@ -140,9 +142,9 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_bgms_sample_omrf_gibbs", (DL_FUNC) &_bgms_sample_omrf_gibbs, 6}, {"_bgms_sample_bcomrf_gibbs", (DL_FUNC) &_bgms_sample_bcomrf_gibbs, 8}, - {"_bgms_gibbs_sampler", (DL_FUNC) &_bgms_gibbs_sampler, 27}, + {"_bgms_gibbs_sampler", (DL_FUNC) &_bgms_gibbs_sampler, 28}, {"_bgms_compare_gibbs_sampler", (DL_FUNC) &_bgms_compare_gibbs_sampler, 32}, - {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 3}, + {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 4}, {NULL, NULL, 0} }; diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp index b88ab003..6a8512e6 100644 --- a/src/gibbs_functions.cpp +++ b/src/gibbs_functions.cpp @@ -792,6 +792,7 @@ List gibbs_sampler(IntegerMatrix observations, double beta_bernoulli_alpha, double beta_bernoulli_beta, double dirichlet_alpha, + double lambda, IntegerMatrix Index, int iter, int burnin, @@ -894,7 +895,8 @@ List gibbs_sampler(IntegerMatrix observations, log_Vn = compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, - no_variables); + no_variables, + lambda); } //The Gibbs sampler ---------------------------------------------------------- diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp index 6da4100a..fef05628 100644 --- a/src/gibbs_functions_edge_prior.cpp +++ b/src/gibbs_functions_edge_prior.cpp @@ -85,7 +85,8 @@ NumericMatrix add_row_col_block_prob_matrix(NumericMatrix X, // [[Rcpp::export]] NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, - int t_max) { + int t_max, + double lambda) { NumericVector log_Vn(t_max); double r; double tmp; @@ -102,6 +103,10 @@ NumericVector compute_Vn_mfm_sbm(int no_variables, } tmp -= std::lgamma(k + 1); + // Add the poisson term + double log_norm_factor = log(1.0 - exp(R::dpois(0, lambda, true))); + tmp += R::dpois(k-1, lambda, true) - log_norm_factor; + // Compute the maximum between r and tmp if (tmp > r) { r = std::log(std::exp(r - tmp) + 1) + tmp; diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h index 827c4ea9..6818dbb8 100644 --- a/src/gibbs_functions_edge_prior.h +++ b/src/gibbs_functions_edge_prior.h @@ -6,7 +6,8 @@ using namespace Rcpp; // ----------------------------------------------------------------------------| Rcpp::NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, - int t_max); + int t_max, + double lambda); // ----------------------------------------------------------------------------| // Sample the block allocations for the MFM - SBM From 12fd23bcde73311867633a6312c19805bcda068e Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Tue, 21 Jan 2025 18:50:17 +0100 Subject: [PATCH 14/21] update SBM --- R/bgm.R | 10 +++++++--- R/utility_functions.R | 15 ++++++++------- bgms.Rproj | 1 + man/bgm.Rd | 10 +++++++--- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/R/bgm.R b/R/bgm.R index 3ead1e00..82426849 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -116,9 +116,12 @@ #' when nodes are expected to fall into distinct clusters. The inclusion #' probabilities for the edges are defined at the level of the clusters, with a #' beta prior for the unknown inclusion probability with shape parameters -#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet +#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, a Dirichlet #' prior on the cluster assignment probabilities with a common concentration -#' parameter \code{dirichlet_alpha}. The default is +#' parameter \code{dirichlet_alpha} and a zero-truncated Poisson prior on the +#' number of clusters with a rate parameter \code{lambda}, indicating the +#' expected number of clusters. +#' The default is #' \code{edge_prior = "Bernoulli"}. #' @param inclusion_probability The prior edge inclusion probability for the #' Bernoulli model. Can be a single probability, or a matrix of \code{p} rows @@ -327,7 +330,8 @@ bgm = function(x, inclusion_probability = inclusion_probability, beta_bernoulli_alpha = beta_bernoulli_alpha, beta_bernoulli_beta = beta_bernoulli_beta, - dirichlet_alpha = dirichlet_alpha) + dirichlet_alpha = dirichlet_alpha, + lambda = lambda) # ---------------------------------------------------------------------------- # The vector variable_type is now coded as boolean. diff --git a/R/utility_functions.R b/R/utility_functions.R index 2829959a..27f9a811 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -13,7 +13,8 @@ check_model = function(x, inclusion_probability = 0.5, beta_bernoulli_alpha = 1, beta_bernoulli_beta = 1, - dirichlet_alpha = dirichlet_alpha) { + dirichlet_alpha = dirichlet_alpha, + lambda = lambda) { #Check variable type input --------------------------------------------------- if(length(variable_type) == 1) { @@ -187,14 +188,14 @@ check_model = function(x, } if(edge_prior == "Stochastic-Block") { theta = matrix(0.5, nrow = ncol(x), ncol = ncol(x)) - if(beta_bernoulli_alpha <= 0 || beta_bernoulli_beta <= 0 || dirichlet_alpha <= 0) - stop("The scale parameters of the beta and Dirichlet distribution need to be positive.") - if(!is.finite(beta_bernoulli_alpha) || !is.finite(beta_bernoulli_beta) || !is.finite(dirichlet_alpha)) - stop("The scale parameters of the beta and Dirichlet distribution need to be finite.") + if(beta_bernoulli_alpha <= 0 || beta_bernoulli_beta <= 0 || dirichlet_alpha <= 0 || lambda <= 0) + stop("The scale parameters the beta distrubution, the concentration parameter of the Dirichlet distribution and the rate parameter of the Poisson distribution need to be positive.") + if(!is.finite(beta_bernoulli_alpha) || !is.finite(beta_bernoulli_beta) || !is.finite(dirichlet_alpha) || !is.finite(lambda)) + stop("The scale parameters of the beta distrubution, the concentration parameter of the Dirichlet distribution and the rate parameter of the Poisson distribution need to be finite.") if(is.na(beta_bernoulli_alpha) || is.na(beta_bernoulli_beta) || is.null(beta_bernoulli_alpha) || is.null(beta_bernoulli_beta) || - is.null(dirichlet_alpha) || is.null(dirichlet_alpha)) - stop("Values for both scale parameters of the beta and Dirichlet distribution need to be specified.") + is.null(dirichlet_alpha) || is.null(dirichlet_alpha) || is.null(lambda) || is.null(lambda)) + stop("Values for both scale parameters of the betathe concentration parameter of the Dirichlet distribution and the rate parameter of the Poisson distribution need to be specified.") } } else { theta = matrix(0.5, nrow = 1, ncol = 1) diff --git a/bgms.Rproj b/bgms.Rproj index 6291372e..1c78b1f3 100644 --- a/bgms.Rproj +++ b/bgms.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 2fb4a59f-6132-41a6-9659-f1d3297dc9d0 RestoreWorkspace: Default SaveWorkspace: Default diff --git a/man/bgm.Rd b/man/bgm.Rd index 0b573a11..635b7e1c 100644 --- a/man/bgm.Rd +++ b/man/bgm.Rd @@ -20,6 +20,7 @@ bgm( beta_bernoulli_alpha = 1, beta_bernoulli_beta = 1, dirichlet_alpha = 1, + lambda = 1, na.action = c("listwise", "impute"), save = FALSE, display_progress = TRUE @@ -106,9 +107,12 @@ of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous when nodes are expected to fall into distinct clusters. The inclusion probabilities for the edges are defined at the level of the clusters, with a beta prior for the unknown inclusion probability with shape parameters -\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet +\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, a Dirichlet prior on the cluster assignment probabilities with a common concentration -parameter \code{dirichlet_alpha}. The default is +parameter \code{dirichlet_alpha} and a zero-truncated Poisson prior on the +number of clusters with a rate parameter \code{lambda}, indicating the +expected number of clusters. +The default is \code{edge_prior = "Bernoulli"}.} \item{inclusion_probability}{The prior edge inclusion probability for the @@ -161,7 +165,7 @@ elements are returned: \item A vector \code{allocations} with the estimated cluster assignments of the nodes, calculated using a method proposed by \code{Dahl (2009)} and also used by \code{Geng et al. (2019)}. - \item A table \code{components} with the estimated posterior probability + \item A matrix \code{components} with the estimated posterior probability of the number of components (clusters) in the network. These probabilities are calculated based on Equation 3.7 in \code{Miller (2018)}, which computes the conditional probability of the number of components given the From 42f0e86fca78b2e1c3074ef7d1ee70a5fc2c34ae Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Tue, 21 Jan 2025 18:54:44 +0100 Subject: [PATCH 15/21] update --- R/utility_functions.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utility_functions.R b/R/utility_functions.R index 27f9a811..3064c929 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1088,6 +1088,7 @@ compute_p_k_given_t <- function(t, } # Normalize probabilities p_k_given_t <- p_k_given_t / sum(p_k_given_t) + return(p_k_given_t) } @@ -1095,6 +1096,7 @@ compute_p_k_given_t <- function(t, # A function that computes the posterior probabilities of the # number of components K given the cardinality of the partition t # and the allocations of the nodes based on Dahl's method + summary_SBM <- function(cluster_allocations, dirichlet_alpha, lambda) { @@ -1130,7 +1132,6 @@ summary_SBM <- function(cluster_allocations, # Compute the allocations of the nodes based on Dahl's method allocations <- getDahl(cluster_allocations) - # Return the results return(list(components = components, allocations = allocations)) } From 2d4a8ac83710464639748df21e5850a8312f3d48 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Tue, 21 Jan 2025 20:05:52 +0100 Subject: [PATCH 16/21] sloving note: "Undefined global functions or variables: dpois" --- R/utility_functions.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utility_functions.R b/R/utility_functions.R index 3064c929..14875b93 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1,6 +1,7 @@ #' @importFrom Rcpp evalCpp #' @importFrom Rdpack reprompt #' @importFrom methods hasArg +#' @importFrom stats dpois check_model = function(x, variable_type, From ce8871d71ae19fc96f4326ef4f80a4342ea97be3 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Tue, 21 Jan 2025 20:15:46 +0100 Subject: [PATCH 17/21] solving notes and warning messages for the checks --- NAMESPACE | 1 + R/bgm.R | 2 ++ R/utility_functions.R | 2 +- man/bgm.Rd | 3 +++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 077d91fe..400b5666 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,5 +26,6 @@ export(mrfSampler) importFrom(Rcpp,evalCpp) importFrom(Rdpack,reprompt) importFrom(methods,hasArg) +importFrom(stats,dpois) importFrom(utils,packageVersion) useDynLib(bgms, .registration=TRUE) diff --git a/R/bgm.R b/R/bgm.R index 82426849..4af346a9 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -133,6 +133,8 @@ #' \code{beta_bernoulli_beta = 1}. #' @param dirichlet_alpha The shape of the Dirichlet prior on the node-to-block #' allocation probabilities for the Stochastic Block model. +#' @param lambda The rate parameter of the zero-truncated Poisson prior on the +#' number of cluster for the Stochastic Block model. #' @param na.action How do you want the function to handle missing data? If #' \code{na.action = "listwise"}, listwise deletion is used. If #' \code{na.action = "impute"}, missing data are imputed iteratively during the diff --git a/R/utility_functions.R b/R/utility_functions.R index 14875b93..12b08278 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1,7 +1,7 @@ #' @importFrom Rcpp evalCpp #' @importFrom Rdpack reprompt #' @importFrom methods hasArg -#' @importFrom stats dpois +#' importFrom("stats", "dpois") check_model = function(x, variable_type, diff --git a/man/bgm.Rd b/man/bgm.Rd index 635b7e1c..9f306879 100644 --- a/man/bgm.Rd +++ b/man/bgm.Rd @@ -128,6 +128,9 @@ positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and \item{dirichlet_alpha}{The shape of the Dirichlet prior on the node-to-block allocation probabilities for the Stochastic Block model.} +\item{lambda}{The rate parameter of the zero-truncated Poisson prior on the +number of cluster for the Stochastic Block model.} + \item{na.action}{How do you want the function to handle missing data? If \code{na.action = "listwise"}, listwise deletion is used. If \code{na.action = "impute"}, missing data are imputed iteratively during the From 1f09f8de6b71ce0da76c9e4d8390478ad9cb79c3 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Wed, 22 Jan 2025 13:04:20 +0100 Subject: [PATCH 18/21] export the summary_SBM function --- R/utility_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utility_functions.R b/R/utility_functions.R index 12b08278..8e5490f0 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1097,7 +1097,7 @@ compute_p_k_given_t <- function(t, # A function that computes the posterior probabilities of the # number of components K given the cardinality of the partition t # and the allocations of the nodes based on Dahl's method - +#' @export summary_SBM <- function(cluster_allocations, dirichlet_alpha, lambda) { From 75cca47a1076d29452c4b6b04c2ee04a09c89d0b Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Wed, 22 Jan 2025 13:33:06 +0100 Subject: [PATCH 19/21] export summary_SBM function (needed when save = TRUE, as well as for the easybgm implementation) --- NAMESPACE | 1 + R/utility_functions.R | 24 +++++++++++++++++++----- man/summary_SBM.Rd | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 5 deletions(-) create mode 100644 man/summary_SBM.Rd diff --git a/NAMESPACE b/NAMESPACE index 400b5666..83f1c915 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(extract_pairwise_interactions) export(extract_pairwise_thresholds) export(extract_posterior_inclusion_probabilities) export(mrfSampler) +export(summary_SBM) importFrom(Rcpp,evalCpp) importFrom(Rdpack,reprompt) importFrom(methods,hasArg) diff --git a/R/utility_functions.R b/R/utility_functions.R index 8e5490f0..beaa94f8 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1,7 +1,7 @@ #' @importFrom Rcpp evalCpp #' @importFrom Rdpack reprompt #' @importFrom methods hasArg -#' importFrom("stats", "dpois") +#' @importFrom stats dpois check_model = function(x, variable_type, @@ -1093,10 +1093,24 @@ compute_p_k_given_t <- function(t, return(p_k_given_t) } - -# A function that computes the posterior probabilities of the -# number of components K given the cardinality of the partition t -# and the allocations of the nodes based on Dahl's method +#' Function for summarizing the block allocation vectors +#' +#' This function summarizes the sampled allocation vectors from each iteration +#' of the Gibbs sampler from the output of \code{bgm} +#' when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}. +#' +#' @param cluster_allocations a matrix of \code{iter} rows and \code{no_variables} +#' columns containing the sampled cluster allocation vectors from each iteration. +#' @param dirichlet_alpha the concentration parameter of the Dirichlet prior +#' @param lambda the Poisson rate parameter +#' @return Returns a list of two elements: \code{components} and \code{allocations}, +#' containing the posterior probabilties for the number of components (clusters) +#' and the estimated cluster allocation of the nodes using Dahl's method. +#' @examples +#' x <- summary_SBM(bgm_object$allocations, +#' bgm_object$arguments$dirichlet_alpha, +#' bgm_object$arguments$dirichlet_alpha) +#' #' @export summary_SBM <- function(cluster_allocations, dirichlet_alpha, diff --git a/man/summary_SBM.Rd b/man/summary_SBM.Rd new file mode 100644 index 00000000..967910f0 --- /dev/null +++ b/man/summary_SBM.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utility_functions.R +\name{summary_SBM} +\alias{summary_SBM} +\title{Function for summarizing the block allocation vectors} +\usage{ +summary_SBM(cluster_allocations, dirichlet_alpha, lambda) +} +\arguments{ +\item{cluster_allocations}{a matrix of \code{iter} rows and \code{no_variables} +columns containing the sampled cluster allocation vectors from each iteration.} + +\item{dirichlet_alpha}{the concentration parameter of the Dirichlet prior} + +\item{lambda}{the Poisson rate parameter} +} +\value{ +Returns a list of two elements: \code{components} and \code{allocations}, +containing the posterior probabilties for the number of components (clusters) +and the estimated cluster allocation of the nodes using Dahl's method. +} +\description{ +This function summarizes the sampled allocation vectors from each iteration +of the Gibbs sampler from the output of \code{bgm} +when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}. +} +\examples{ +x <- summary_SBM(bgm_object$allocations, + bgm_object$arguments$dirichlet_alpha, + bgm_object$arguments$dirichlet_alpha) + +} From 8564e286b1b11a4edd428e7016076bf23de74fd8 Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Wed, 22 Jan 2025 13:45:43 +0100 Subject: [PATCH 20/21] Udate in the documentation --- R/utility_functions.R | 9 ++++++++- man/summary_SBM.Rd | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/R/utility_functions.R b/R/utility_functions.R index beaa94f8..bf38f41f 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1107,7 +1107,14 @@ compute_p_k_given_t <- function(t, #' containing the posterior probabilties for the number of components (clusters) #' and the estimated cluster allocation of the nodes using Dahl's method. #' @examples -#' x <- summary_SBM(bgm_object$allocations, +#' +#' # fit a model with the SBM prior +#' bgm_object <- bgm(Wenchuan[, c(1:5)], +#' edge_prior = "Stochastic-Block", +#' save = TRUE, +#' iter = 1e3) +#' +#' s <- summary_SBM(bgm_object$allocations, #' bgm_object$arguments$dirichlet_alpha, #' bgm_object$arguments$dirichlet_alpha) #' diff --git a/man/summary_SBM.Rd b/man/summary_SBM.Rd index 967910f0..2e74a130 100644 --- a/man/summary_SBM.Rd +++ b/man/summary_SBM.Rd @@ -25,7 +25,14 @@ of the Gibbs sampler from the output of \code{bgm} when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}. } \examples{ -x <- summary_SBM(bgm_object$allocations, + +# fit a model with the SBM prior +bgm_object <- bgm(Wenchuan[, c(1:5)], + edge_prior = "Stochastic-Block", + save = TRUE, + iter = 1e3) + +s <- summary_SBM(bgm_object$allocations, bgm_object$arguments$dirichlet_alpha, bgm_object$arguments$dirichlet_alpha) From c7c2568aaf844b1a14770cb57c1bf1bb7319c6ff Mon Sep 17 00:00:00 2001 From: Nikola Sekulovski Date: Wed, 22 Jan 2025 13:58:22 +0100 Subject: [PATCH 21/21] final update in the documentation --- R/utility_functions.R | 2 +- man/summary_SBM.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utility_functions.R b/R/utility_functions.R index bf38f41f..11047669 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -1116,7 +1116,7 @@ compute_p_k_given_t <- function(t, #' #' s <- summary_SBM(bgm_object$allocations, #' bgm_object$arguments$dirichlet_alpha, -#' bgm_object$arguments$dirichlet_alpha) +#' bgm_object$arguments$lambda) #' #' @export summary_SBM <- function(cluster_allocations, diff --git a/man/summary_SBM.Rd b/man/summary_SBM.Rd index 2e74a130..fde02886 100644 --- a/man/summary_SBM.Rd +++ b/man/summary_SBM.Rd @@ -34,6 +34,6 @@ bgm_object <- bgm(Wenchuan[, c(1:5)], s <- summary_SBM(bgm_object$allocations, bgm_object$arguments$dirichlet_alpha, - bgm_object$arguments$dirichlet_alpha) + bgm_object$arguments$lambda) }