From 6caf7fc925b3e2062b60ba6109d4ef56f3dc0a04 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Thu, 5 Dec 2024 13:36:42 +0100
Subject: [PATCH 01/21] Update on the SBM prior: (1) now we save the sampled
 block allocation parameters and block matrices (2) we sample the number of
 clusters by first calculating the probability of a sampled cluster
 configuration using Equation 3.7 pf Miller & Harrison (2018)

---
 R/bgm.R                            | 39 +++++++++++++++-----
 src/gibbs_functions.cpp            | 58 ++++++++++++++++++++++++++----
 src/gibbs_functions_edge_prior.cpp | 55 ++++++++++++++++++++++++++++
 src/gibbs_functions_edge_prior.h   |  8 +++++
 4 files changed, 146 insertions(+), 14 deletions(-)

diff --git a/R/bgm.R b/R/bgm.R
index 88377f44..cc7c01c5 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -504,10 +504,22 @@ bgm = function(x,
     arguments$data_columnnames = data_columnnames
 
     if(edge_selection == TRUE) {
-      output = list(indicator = indicator,
-                    interactions = interactions,
-                    thresholds = thresholds,
-                    arguments = arguments)
+      if(edge_prior == "Stochastic-Block"){
+        output = list(indicator = indicator,
+                      interactions = interactions,
+                      thresholds = thresholds,
+                      allocations = out$allocations,
+                      cluster_edge_prob = out$cluster_edge_prob,
+                      clusters = out$clusters,
+                      arguments = arguments)
+      } else {
+
+        output = list(indicator = indicator,
+                           interactions = interactions,
+                           thresholds = thresholds,
+                           arguments = arguments)
+        }
+
     } else {
       output = list(interactions = interactions,
                     thresholds = thresholds,
@@ -557,10 +569,21 @@ bgm = function(x,
     arguments$data_columnnames = data_columnnames
 
     if(edge_selection == TRUE) {
-      output = list(indicator = indicator,
-                    interactions = interactions,
-                    thresholds = thresholds,
-                    arguments = arguments)
+      if(edge_prior == "Stochastic-Block"){
+        output = list(indicator = indicator,
+                      interactions = interactions,
+                      thresholds = thresholds,
+                      allocations = out$allocations,
+                      cluster_edge_prob = out$cluster_edge_prob,
+                      clusters = out$clusters,
+                      arguments = arguments)
+      } else {
+
+        output = list(indicator = indicator,
+                      interactions = interactions,
+                      thresholds = thresholds,
+                      arguments = arguments)
+      }
     } else {
       output = list(interactions = interactions,
                     thresholds = thresholds,
diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index 50ad6529..915da7b4 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -812,6 +812,8 @@ List gibbs_sampler(IntegerMatrix observations,
   int no_interactions = Index.nrow();
   int no_thresholds = sum(no_categories);
   int max_no_categories = max(no_categories);
+  IntegerVector K_values;  // To store sampled K values
+
 
   IntegerVector v = seq(0, no_interactions - 1);
   IntegerVector order(no_interactions);
@@ -864,7 +866,12 @@ List gibbs_sampler(IntegerMatrix observations,
   NumericMatrix cluster_prob(1, 1);
   NumericVector log_Vn(1);
 
-  if(edge_prior == "Stochastic-Block") {
+  // store the allocation indices for each iteration
+  NumericMatrix out_allocations(iter, no_variables);
+  List out_cluster_prob(iter); // store the cluster probabilities for each iteration
+  IntegerVector out_K(iter); // store the number of clusters for each iteration
+
+  if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations
     cluster_allocations[0] = 0;
     cluster_allocations[1] = 1;
     for(int i = 2; i < no_variables; i++) {
@@ -1029,6 +1036,14 @@ List gibbs_sampler(IntegerMatrix observations,
             theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]);
           }
         }
+
+        // Sample the number of clusters (K)
+        int sampled_K = sample_K_mfm_sbm(cluster_allocations,
+                                         dirichlet_alpha,
+                                         log_Vn);
+
+        // Store the sampled K value
+        K_values.push_back(sampled_K);
       }
     }
   }
@@ -1038,11 +1053,19 @@ List gibbs_sampler(IntegerMatrix observations,
   //The post burn-in iterations ------------------------------------------------
   for(int iteration = 0; iteration < iter; iteration++) {
     if (Progress::check_abort()) {
-      if(edge_selection == true) {
+      if(edge_selection == true && edge_prior == "Stochastic-Block") {
+        return List::create(Named("indicator") = out_indicator,
+                            Named("interactions") = out_interactions,
+                            Named("thresholds") = out_thresholds,
+                            Named("allocations") = out_allocations,
+                            Named("cluster_edge_prob") = out_cluster_prob,
+                            Named("clusters") = out_K);
+      } if(edge_selection == true && edge_prior != "Stochastic-Block") {
         return List::create(Named("indicator") = out_indicator,
                             Named("interactions") = out_interactions,
                             Named("thresholds") = out_thresholds);
-      } else {
+      }
+      else {
         return List::create(Named("interactions") = out_interactions,
                             Named("thresholds") = out_thresholds);
       }
@@ -1150,12 +1173,22 @@ List gibbs_sampler(IntegerMatrix observations,
                                            beta_bernoulli_alpha,
                                            beta_bernoulli_beta);
 
+        // store cluster_prob to out_cluster_prob
+        out_cluster_prob[iteration] = clone(cluster_prob);
+
         for(int i = 0; i < no_variables - 1; i++) {
           for(int j = i + 1; j < no_variables; j++) {
             theta(i, j) = cluster_prob(cluster_allocations[i], cluster_allocations[j]);
             theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]);
           }
         }
+
+        // Sample the number of clusters (K)
+        int sampled_K = sample_K_mfm_sbm(cluster_allocations,
+                                         dirichlet_alpha,
+                                         log_Vn);
+        // Store the sampled K value to out_K
+        out_K[iteration] = sampled_K;
       }
     }
 
@@ -1234,15 +1267,28 @@ List gibbs_sampler(IntegerMatrix observations,
         out_thresholds(no_variables - 1, 1) /= iteration + 1;
       }
     }
+
+    for(int i = 0; i < no_variables; i++) {
+      out_allocations(iteration, i) = cluster_allocations[i] + 1;
+    }
   }
 
 
   if(edge_selection == true) {
+    if(edge_prior == "Stochastic-Block"){
     return List::create(Named("indicator") = out_indicator,
                         Named("interactions") = out_interactions,
-                        Named("thresholds") = out_thresholds);
+                        Named("thresholds") = out_thresholds,
+                        Named("allocations") = out_allocations,  // Include z values
+                        Named("cluster_edge_prob") = out_cluster_prob, // Include cluster probabilities (i.e., the block matrix)
+                        Named("clusters") = out_K);  // Include the sampled number of clusters
+    } else {
+      return List::create(Named("indicator") = out_indicator,
+                          Named("interactions") = out_interactions,
+                          Named("thresholds") = out_thresholds);
+    }
   } else {
-    return List::create(Named("interactions") = out_interactions,
-                        Named("thresholds") = out_thresholds);
+      return List::create(Named("interactions") = out_interactions,
+                          Named("thresholds") = out_thresholds);
   }
 }
\ No newline at end of file
diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index a476a41b..68fcc75f 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -337,8 +337,63 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign,
     }
   }
   return cluster_assign;
+
+}
+
+// ----------------------------------------------------------------------------|
+// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison
+// ----------------------------------------------------------------------------|
+int sample_K_mfm_sbm(IntegerVector cluster_allocations,
+                     double dirichlet_alpha,
+                     NumericVector log_Vn,
+                     int max_K = 12) {
+  // compute t = |C| (number of unique clusters i.e., the cardinality of C)
+  IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes
+  int t = cluster_size.size(); // Number of non-zero clusters
+
+  // Step 2: Compute p(k|t) using Equation 3.7 in Miller and Harrison (2018)
+  NumericVector log_p_K(max_K, R_NegInf); // Log-probabilities of k
+  for (int k = t; k < max_K; ++k) {
+    double log_term1 = std::lgamma(k + 1) - std::lgamma(k - t + 1); // (k choose t)
+    double log_term2 = k * std::log(dirichlet_alpha);              // dirichlet_alpha^k
+    double log_term3 = -log_Vn[t - 1];                             // Normalize by Vn
+    log_p_K[k] = log_term1 + log_term2 + log_term3;                // Combine terms
+  }
+
+  // Normalize probabilities using log-sum-exp
+  double log_sum_exp = -INFINITY;
+  for (int k = t; k < max_K; ++k) {
+    log_sum_exp = std::log1p(std::exp(log_p_K[k] - log_sum_exp)) + log_sum_exp;
+  }
+  NumericVector p_K(max_K, 0.0);
+  for (int k = t; k < max_K; ++k) {
+    p_K[k] = std::exp(log_p_K[k] - log_sum_exp); // Convert log-p to p
+  }
+
+  // Step 3: Exclude zero and adjust probabilities for truncated Poisson
+  double p_zero = std::exp(-1.0); // P(k = 0) for Poisson(lambda = 1)
+  double truncated_normalizer = 1.0 - p_zero;
+  double cumulative_probability = 0.0;
+
+  for (int k = t; k < max_K; ++k) {
+    p_K[k] /= truncated_normalizer; // Adjust probabilities to exclude zero
+    cumulative_probability += p_K[k];
+  }
+
+  // Step 4: Draw a uniform random number within [0, cumulative_probability]
+  double u = R::runif(0, cumulative_probability);
+
+  // Step 5: sample from the truncated Poisson distribution
+  double lambda = 1.0; // Poisson parameter (fixed to 1)
+  int sampled_k = R::qpois(u * truncated_normalizer, lambda, true, false);
+
+  // Ensure k is at least t
+  return std::max(t, std::min(sampled_k, max_K - 1));
 }
 
+
+
+
 // ----------------------------------------------------------------------------|
 // Sample the block parameters for the MFM - SBM
 // ----------------------------------------------------------------------------|
diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h
index 827c4ea9..72dfd814 100644
--- a/src/gibbs_functions_edge_prior.h
+++ b/src/gibbs_functions_edge_prior.h
@@ -20,6 +20,14 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign
                                               double beta_bernoulli_alpha,
                                               double beta_bernoulli_beta);
 
+// ----------------------------------------------------------------------------|
+// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison
+// ----------------------------------------------------------------------------|
+int sample_K_mfm_sbm(IntegerVector cluster_assign,
+                     double dirichlet_alpha,
+                     NumericVector log_Vn,
+                     int max_K = 12);
+
 // ----------------------------------------------------------------------------|
 // Sample the block parameters for the MFM - SBM
 // ----------------------------------------------------------------------------|

From 6633a70e798ab2f3da2c842fb233b39eea622ff9 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Thu, 5 Dec 2024 16:38:19 +0100
Subject: [PATCH 02/21] update the restriction on Kmax

---
 src/gibbs_functions.cpp            | 6 ++++--
 src/gibbs_functions_edge_prior.cpp | 2 +-
 src/gibbs_functions_edge_prior.h   | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index 915da7b4..9f0a609c 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -1040,7 +1040,8 @@ List gibbs_sampler(IntegerMatrix observations,
         // Sample the number of clusters (K)
         int sampled_K = sample_K_mfm_sbm(cluster_allocations,
                                          dirichlet_alpha,
-                                         log_Vn);
+                                         log_Vn,
+                                         no_variables);
 
         // Store the sampled K value
         K_values.push_back(sampled_K);
@@ -1186,7 +1187,8 @@ List gibbs_sampler(IntegerMatrix observations,
         // Sample the number of clusters (K)
         int sampled_K = sample_K_mfm_sbm(cluster_allocations,
                                          dirichlet_alpha,
-                                         log_Vn);
+                                         log_Vn,
+                                         no_variables);
         // Store the sampled K value to out_K
         out_K[iteration] = sampled_K;
       }
diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index 68fcc75f..ef3d04f0 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -346,7 +346,7 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign,
 int sample_K_mfm_sbm(IntegerVector cluster_allocations,
                      double dirichlet_alpha,
                      NumericVector log_Vn,
-                     int max_K = 12) {
+                     int max_K = 500) {
   // compute t = |C| (number of unique clusters i.e., the cardinality of C)
   IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes
   int t = cluster_size.size(); // Number of non-zero clusters
diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h
index 72dfd814..4588def0 100644
--- a/src/gibbs_functions_edge_prior.h
+++ b/src/gibbs_functions_edge_prior.h
@@ -26,7 +26,7 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign
 int sample_K_mfm_sbm(IntegerVector cluster_assign,
                      double dirichlet_alpha,
                      NumericVector log_Vn,
-                     int max_K = 12);
+                     int max_K = 500);
 
 // ----------------------------------------------------------------------------|
 // Sample the block parameters for the MFM - SBM

From f74e7230d82d8d4088723182df70182eecf7f428 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Fri, 6 Dec 2024 15:21:33 +0100
Subject: [PATCH 03/21] changes: (1) take out the option to save
 out_cluster_prob and (2) changes K_max = no_variables + 10

---
 R/bgm.R                            | 2 --
 src/gibbs_functions.cpp            | 9 ++-------
 src/gibbs_functions_edge_prior.cpp | 2 +-
 src/gibbs_functions_edge_prior.h   | 2 +-
 4 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/R/bgm.R b/R/bgm.R
index cc7c01c5..096bb64d 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -509,7 +509,6 @@ bgm = function(x,
                       interactions = interactions,
                       thresholds = thresholds,
                       allocations = out$allocations,
-                      cluster_edge_prob = out$cluster_edge_prob,
                       clusters = out$clusters,
                       arguments = arguments)
       } else {
@@ -574,7 +573,6 @@ bgm = function(x,
                       interactions = interactions,
                       thresholds = thresholds,
                       allocations = out$allocations,
-                      cluster_edge_prob = out$cluster_edge_prob,
                       clusters = out$clusters,
                       arguments = arguments)
       } else {
diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index 9f0a609c..6c8c04ea 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -868,7 +868,6 @@ List gibbs_sampler(IntegerMatrix observations,
 
   // store the allocation indices for each iteration
   NumericMatrix out_allocations(iter, no_variables);
-  List out_cluster_prob(iter); // store the cluster probabilities for each iteration
   IntegerVector out_K(iter); // store the number of clusters for each iteration
 
   if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations
@@ -1041,7 +1040,7 @@ List gibbs_sampler(IntegerMatrix observations,
         int sampled_K = sample_K_mfm_sbm(cluster_allocations,
                                          dirichlet_alpha,
                                          log_Vn,
-                                         no_variables);
+                                         no_variables + 10);
 
         // Store the sampled K value
         K_values.push_back(sampled_K);
@@ -1059,7 +1058,6 @@ List gibbs_sampler(IntegerMatrix observations,
                             Named("interactions") = out_interactions,
                             Named("thresholds") = out_thresholds,
                             Named("allocations") = out_allocations,
-                            Named("cluster_edge_prob") = out_cluster_prob,
                             Named("clusters") = out_K);
       } if(edge_selection == true && edge_prior != "Stochastic-Block") {
         return List::create(Named("indicator") = out_indicator,
@@ -1174,8 +1172,6 @@ List gibbs_sampler(IntegerMatrix observations,
                                            beta_bernoulli_alpha,
                                            beta_bernoulli_beta);
 
-        // store cluster_prob to out_cluster_prob
-        out_cluster_prob[iteration] = clone(cluster_prob);
 
         for(int i = 0; i < no_variables - 1; i++) {
           for(int j = i + 1; j < no_variables; j++) {
@@ -1188,7 +1184,7 @@ List gibbs_sampler(IntegerMatrix observations,
         int sampled_K = sample_K_mfm_sbm(cluster_allocations,
                                          dirichlet_alpha,
                                          log_Vn,
-                                         no_variables);
+                                         no_variables + 10);
         // Store the sampled K value to out_K
         out_K[iteration] = sampled_K;
       }
@@ -1282,7 +1278,6 @@ List gibbs_sampler(IntegerMatrix observations,
                         Named("interactions") = out_interactions,
                         Named("thresholds") = out_thresholds,
                         Named("allocations") = out_allocations,  // Include z values
-                        Named("cluster_edge_prob") = out_cluster_prob, // Include cluster probabilities (i.e., the block matrix)
                         Named("clusters") = out_K);  // Include the sampled number of clusters
     } else {
       return List::create(Named("indicator") = out_indicator,
diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index ef3d04f0..f39bb3e1 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -346,7 +346,7 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign,
 int sample_K_mfm_sbm(IntegerVector cluster_allocations,
                      double dirichlet_alpha,
                      NumericVector log_Vn,
-                     int max_K = 500) {
+                     int max_K) {
   // compute t = |C| (number of unique clusters i.e., the cardinality of C)
   IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes
   int t = cluster_size.size(); // Number of non-zero clusters
diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h
index 4588def0..69db07d4 100644
--- a/src/gibbs_functions_edge_prior.h
+++ b/src/gibbs_functions_edge_prior.h
@@ -26,7 +26,7 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign
 int sample_K_mfm_sbm(IntegerVector cluster_assign,
                      double dirichlet_alpha,
                      NumericVector log_Vn,
-                     int max_K = 500);
+                     int max_K);
 
 // ----------------------------------------------------------------------------|
 // Sample the block parameters for the MFM - SBM

From 4543486fab840cf5b0252ee0b738b803a68fceaa Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Fri, 6 Dec 2024 15:30:20 +0100
Subject: [PATCH 04/21] take out saving K within the Gibbs sampler

---
 R/bgm.R                 |  2 --
 src/gibbs_functions.cpp | 15 ++-------------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/R/bgm.R b/R/bgm.R
index 096bb64d..432f4653 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -509,7 +509,6 @@ bgm = function(x,
                       interactions = interactions,
                       thresholds = thresholds,
                       allocations = out$allocations,
-                      clusters = out$clusters,
                       arguments = arguments)
       } else {
 
@@ -573,7 +572,6 @@ bgm = function(x,
                       interactions = interactions,
                       thresholds = thresholds,
                       allocations = out$allocations,
-                      clusters = out$clusters,
                       arguments = arguments)
       } else {
 
diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index 6c8c04ea..897334b5 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -868,7 +868,6 @@ List gibbs_sampler(IntegerMatrix observations,
 
   // store the allocation indices for each iteration
   NumericMatrix out_allocations(iter, no_variables);
-  IntegerVector out_K(iter); // store the number of clusters for each iteration
 
   if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations
     cluster_allocations[0] = 0;
@@ -1057,8 +1056,7 @@ List gibbs_sampler(IntegerMatrix observations,
         return List::create(Named("indicator") = out_indicator,
                             Named("interactions") = out_interactions,
                             Named("thresholds") = out_thresholds,
-                            Named("allocations") = out_allocations,
-                            Named("clusters") = out_K);
+                            Named("allocations") = out_allocations);
       } if(edge_selection == true && edge_prior != "Stochastic-Block") {
         return List::create(Named("indicator") = out_indicator,
                             Named("interactions") = out_interactions,
@@ -1179,14 +1177,6 @@ List gibbs_sampler(IntegerMatrix observations,
             theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]);
           }
         }
-
-        // Sample the number of clusters (K)
-        int sampled_K = sample_K_mfm_sbm(cluster_allocations,
-                                         dirichlet_alpha,
-                                         log_Vn,
-                                         no_variables + 10);
-        // Store the sampled K value to out_K
-        out_K[iteration] = sampled_K;
       }
     }
 
@@ -1277,8 +1267,7 @@ List gibbs_sampler(IntegerMatrix observations,
     return List::create(Named("indicator") = out_indicator,
                         Named("interactions") = out_interactions,
                         Named("thresholds") = out_thresholds,
-                        Named("allocations") = out_allocations,  // Include z values
-                        Named("clusters") = out_K);  // Include the sampled number of clusters
+                        Named("allocations") = out_allocations);  // Include the sampled number of clusters
     } else {
       return List::create(Named("indicator") = out_indicator,
                           Named("interactions") = out_interactions,

From f25e2f0556a359b77d3aedf06ad52b8eaac60d43 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Mon, 9 Dec 2024 13:32:17 +0100
Subject: [PATCH 05/21] Include the option to calculate the number of clusters
 and cluster allocations

---
 R/bgm.R                            | 31 +++++++++++++----
 R/utility_functions.R              | 56 ++++++++++++++++++++++++++++++
 man/bgm.Rd                         | 25 ++++++++++---
 src/gibbs_functions.cpp            | 11 +-----
 src/gibbs_functions_edge_prior.cpp | 56 +-----------------------------
 src/gibbs_functions_edge_prior.h   |  8 -----
 vignettes/refs.bib                 | 30 ++++++++++++----
 7 files changed, 127 insertions(+), 90 deletions(-)

diff --git a/R/bgm.R b/R/bgm.R
index 432f4653..799eb625 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -111,12 +111,14 @@
 #' model \code{edge_prior = "Stochastic-Block"} assumes that nodes can be
 #' organized into blocks or clusters. In principle, the assignment of nodes to
 #' such clusters is unknown, and the model as implemented here considers all
-#' possible options \insertCite{@i.e., specifies a Dirichlet process on the node to block
-#' allocation as described by @GengEtAl_2019}{bgms}. This model is advantageous
+#' possible options \insertCite{@i.e., specifies a Dirichlet prior on the probability
+#' of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous
 #' when nodes are expected to fall into distinct clusters. The inclusion
 #' probabilities for the edges are defined at the level of the clusters, with a
 #' beta prior for the unknown inclusion probability with shape parameters
-#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}. The default is
+#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet
+#' prior on the cluster assignment probabilities with a common concentration
+#' parameter \code{dirichlet_alpha}. The default is
 #' \code{edge_prior = "Bernoulli"}.
 #' @param inclusion_probability The prior edge inclusion probability for the
 #' Bernoulli model. Can be a single probability, or a matrix of \code{p} rows
@@ -127,7 +129,7 @@
 #' positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and
 #' \code{beta_bernoulli_beta = 1}.
 #' @param dirichlet_alpha The shape of the Dirichlet prior on the node-to-block
-#' allocation parameters for the Stochastic Block model.
+#' allocation probabilities for the Stochastic Block model.
 #' @param na.action How do you want the function to handle missing data? If
 #' \code{na.action = "listwise"}, listwise deletion is used. If
 #' \code{na.action = "impute"}, missing data are imputed iteratively during the
@@ -156,8 +158,14 @@
 #' ``blume-capel'' variables, the first entry is the parameter for the linear
 #' effect and the second entry is the parameter for the quadratic effect, which
 #' models the offset to the reference category.
+#' \item In the case of
+#' \code{edge_prior = "Stochastic-Block"} two additional elements are returned:
+#' a vector \code{allocations} with the estimated cluster assignments of the nodes and an
+#' table \code{clusters} with the estimated posterior probability of the number
+#' of clusters in the network. The vector of node allocations is calculated using
+#' a method proposed by \insertCite{Dahl2009}{bgms} and also used by
+#' \insertCite{GengEtAl_2019}{bgms}.
 #' }
-#'
 #' If \code{save = TRUE}, the result is a list of class ``bgms'' containing:
 #' \itemize{
 #' \item \code{indicator}: A matrix with \code{iter} rows and
@@ -169,8 +177,15 @@
 #' \item \code{thresholds}: A matrix with \code{iter} rows and
 #' \code{sum(m)} columns, containing parameter states from every iteration of
 #' the Gibbs sampler for the category thresholds.
+#' \item In the case of
+#' \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the
+#' cluster assignments of the nodes from each iteration is returned. This matrix
+#' can be used to calculate the posterior probability of the number of clusters
+#' by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function.
 #' }
 #' Column averages of these matrices provide the model-averaged posterior means.
+#' Except for the \code{allocations} matrix, for which the \code{summary_SBM}
+#' needs to be utilized.
 #'
 #' In addition to the analysis results, the bgm output lists some of the
 #' arguments of its call. This is useful for post-processing the results.
@@ -505,10 +520,14 @@ bgm = function(x,
 
     if(edge_selection == TRUE) {
       if(edge_prior == "Stochastic-Block"){
+
+        summarySbm <- summary_SBM(cluster_allocations = out$allocations)
+
         output = list(indicator = indicator,
                       interactions = interactions,
                       thresholds = thresholds,
-                      allocations = out$allocations,
+                      allocations = summarySbm$allocations,
+                      clusters = summarySbm$no_clusters,
                       arguments = arguments)
       } else {
 
diff --git a/R/utility_functions.R b/R/utility_functions.R
index 3acbcb1a..4c12ba61 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1007,3 +1007,59 @@ compare_reformat_data = function(x,
               missing_index_gr2 = missing_index_gr2,
               na_impute = na_impute))
 }
+
+# Dahl's method to summarize the samples of the cluster_allocations
+#  This function was adapted from the R code accompanying the paper:
+#  Geng, J., Bhattacharya, A., & Pati, D. (2019). Probabilistic Community
+#  Detection With Unknown Number of Communities, Journal of the American
+#  Statistical Association, 114:526, 893-905, DOI:10.1080/01621459.2018.1458618
+
+getDahl <- function(cluster_allocations) {
+  # Dimensions of the input matrix
+  niters <- nrow(cluster_allocations)  # Number of iterations
+  n <- ncol(cluster_allocations)       # Number of nodes
+
+  # Compute membership matrices for each iteration
+  membershipMatrices <- apply(cluster_allocations, 1, function(clusterAssign) {
+    outer(clusterAssign, clusterAssign, FUN = "==")
+  })
+
+  # Reshape membershipMatrices into a list of matrices
+  membershipMatrices <- lapply(seq_len(niters), function(i) {
+    matrix(membershipMatrices[, i], n, n)
+  })
+
+  # Compute the average membership matrix
+  membershipAverage <- Reduce("+", membershipMatrices) / niters
+
+  # Compute squared error for each iteration
+  SqError <- sapply(membershipMatrices, function(x, av) sum((x - av)^2),
+                    av = membershipAverage)
+
+  # Find the iteration with the minimum squared error
+  DahlIndex <- which.min(SqError)
+
+  # Extract the cluster assignment corresponding to the best iteration
+  DahlAns <- cluster_allocations[DahlIndex, , drop = TRUE]
+
+  return(DahlAns)
+}
+
+
+# A function that computes the cluster posterior probabilities and allocations
+
+summary_SBM <- function(cluster_allocations) {
+  # Compute the number of unique clusters for each iteration
+  clusters <- apply(cluster_allocations, 1, function(row) length(unique(row)))
+
+  # Compute the posterior probabilities of the actual unique clusters
+  no_clusters <- table(clusters) / length(clusters)
+
+  # Compute the allocations of the nodes based on Dahl's method
+  allocations <- getDahl(cluster_allocations)
+
+  # Return the results
+  return(list(no_clusters = no_clusters,
+              allocations = allocations))
+}
+
diff --git a/man/bgm.Rd b/man/bgm.Rd
index 6c8fe3d5..d091164b 100644
--- a/man/bgm.Rd
+++ b/man/bgm.Rd
@@ -101,12 +101,14 @@ complexity (number of edges) get the same prior weight. The Stochastic Block
 model \code{edge_prior = "Stochastic-Block"} assumes that nodes can be
 organized into blocks or clusters. In principle, the assignment of nodes to
 such clusters is unknown, and the model as implemented here considers all
-possible options \insertCite{@i.e., specifies a Dirichlet process on the node to block
-allocation as described by @GengEtAl_2019}{bgms}. This model is advantageous
+possible options \insertCite{@i.e., specifies a Dirichlet prior on the probability
+of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous
 when nodes are expected to fall into distinct clusters. The inclusion
 probabilities for the edges are defined at the level of the clusters, with a
 beta prior for the unknown inclusion probability with shape parameters
-\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}. The default is
+\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet
+prior on the cluster assignment probabilities with a common concentration
+parameter \code{dirichlet_alpha}. The default is
 \code{edge_prior = "Bernoulli"}.}
 
 \item{inclusion_probability}{The prior edge inclusion probability for the
@@ -120,7 +122,7 @@ positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and
 \code{beta_bernoulli_beta = 1}.}
 
 \item{dirichlet_alpha}{The shape of the Dirichlet prior on the node-to-block
-allocation parameters for the Stochastic Block model.}
+allocation probabilities for the Stochastic Block model.}
 
 \item{na.action}{How do you want the function to handle missing data? If
 \code{na.action = "listwise"}, listwise deletion is used. If
@@ -153,8 +155,14 @@ columns, containing model-averaged category thresholds. In the case of
 ``blume-capel'' variables, the first entry is the parameter for the linear
 effect and the second entry is the parameter for the quadratic effect, which
 models the offset to the reference category.
+\item In the case of
+\code{edge_prior = "Stochastic-Block"} two additional elements are returned:
+a vector \code{allocations} with the estimated cluster assignments of the nodes and an
+table \code{clusters} with the estimated posterior probability of the number
+of clusters in the network. The vector of node allocations is calculated using
+a method proposed by \insertCite{Dahl2009}{bgms} and also used by
+\insertCite{GengEtAl_2019}{bgms}.
 }
-
 If \code{save = TRUE}, the result is a list of class ``bgms'' containing:
 \itemize{
 \item \code{indicator}: A matrix with \code{iter} rows and
@@ -166,8 +174,15 @@ iteration of the Gibbs sampler for the pairwise associations.
 \item \code{thresholds}: A matrix with \code{iter} rows and
 \code{sum(m)} columns, containing parameter states from every iteration of
 the Gibbs sampler for the category thresholds.
+\item In the case of
+\code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the
+cluster assignments of the nodes from each iteration is returned. This matrix
+can be used to calculate the posterior probability of the number of clusters
+by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function.
 }
 Column averages of these matrices provide the model-averaged posterior means.
+Except for the \code{allocations} matrix, for which the \code{summary_SBM}
+needs to be utilized.
 
 In addition to the analysis results, the bgm output lists some of the
 arguments of its call. This is useful for post-processing the results.
diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index 897334b5..56e06236 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -1034,15 +1034,6 @@ List gibbs_sampler(IntegerMatrix observations,
             theta(j, i) = cluster_prob(cluster_allocations[i], cluster_allocations[j]);
           }
         }
-
-        // Sample the number of clusters (K)
-        int sampled_K = sample_K_mfm_sbm(cluster_allocations,
-                                         dirichlet_alpha,
-                                         log_Vn,
-                                         no_variables + 10);
-
-        // Store the sampled K value
-        K_values.push_back(sampled_K);
       }
     }
   }
@@ -1267,7 +1258,7 @@ List gibbs_sampler(IntegerMatrix observations,
     return List::create(Named("indicator") = out_indicator,
                         Named("interactions") = out_interactions,
                         Named("thresholds") = out_thresholds,
-                        Named("allocations") = out_allocations);  // Include the sampled number of clusters
+                        Named("allocations") = out_allocations);
     } else {
       return List::create(Named("indicator") = out_indicator,
                           Named("interactions") = out_interactions,
diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index f39bb3e1..b1837ebb 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -340,60 +340,6 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign,
 
 }
 
-// ----------------------------------------------------------------------------|
-// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison
-// ----------------------------------------------------------------------------|
-int sample_K_mfm_sbm(IntegerVector cluster_allocations,
-                     double dirichlet_alpha,
-                     NumericVector log_Vn,
-                     int max_K) {
-  // compute t = |C| (number of unique clusters i.e., the cardinality of C)
-  IntegerVector cluster_size = table_cpp(cluster_allocations); // Cluster sizes
-  int t = cluster_size.size(); // Number of non-zero clusters
-
-  // Step 2: Compute p(k|t) using Equation 3.7 in Miller and Harrison (2018)
-  NumericVector log_p_K(max_K, R_NegInf); // Log-probabilities of k
-  for (int k = t; k < max_K; ++k) {
-    double log_term1 = std::lgamma(k + 1) - std::lgamma(k - t + 1); // (k choose t)
-    double log_term2 = k * std::log(dirichlet_alpha);              // dirichlet_alpha^k
-    double log_term3 = -log_Vn[t - 1];                             // Normalize by Vn
-    log_p_K[k] = log_term1 + log_term2 + log_term3;                // Combine terms
-  }
-
-  // Normalize probabilities using log-sum-exp
-  double log_sum_exp = -INFINITY;
-  for (int k = t; k < max_K; ++k) {
-    log_sum_exp = std::log1p(std::exp(log_p_K[k] - log_sum_exp)) + log_sum_exp;
-  }
-  NumericVector p_K(max_K, 0.0);
-  for (int k = t; k < max_K; ++k) {
-    p_K[k] = std::exp(log_p_K[k] - log_sum_exp); // Convert log-p to p
-  }
-
-  // Step 3: Exclude zero and adjust probabilities for truncated Poisson
-  double p_zero = std::exp(-1.0); // P(k = 0) for Poisson(lambda = 1)
-  double truncated_normalizer = 1.0 - p_zero;
-  double cumulative_probability = 0.0;
-
-  for (int k = t; k < max_K; ++k) {
-    p_K[k] /= truncated_normalizer; // Adjust probabilities to exclude zero
-    cumulative_probability += p_K[k];
-  }
-
-  // Step 4: Draw a uniform random number within [0, cumulative_probability]
-  double u = R::runif(0, cumulative_probability);
-
-  // Step 5: sample from the truncated Poisson distribution
-  double lambda = 1.0; // Poisson parameter (fixed to 1)
-  int sampled_k = R::qpois(u * truncated_normalizer, lambda, true, false);
-
-  // Ensure k is at least t
-  return std::max(t, std::min(sampled_k, max_K - 1));
-}
-
-
-
-
 // ----------------------------------------------------------------------------|
 // Sample the block parameters for the MFM - SBM
 // ----------------------------------------------------------------------------|
@@ -431,4 +377,4 @@ NumericMatrix block_probs_mfm_sbm(IntegerVector cluster_assign,
   }
 
   return block_probs;
-}
\ No newline at end of file
+}
diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h
index 69db07d4..827c4ea9 100644
--- a/src/gibbs_functions_edge_prior.h
+++ b/src/gibbs_functions_edge_prior.h
@@ -20,14 +20,6 @@ Rcpp::IntegerVector block_allocations_mfm_sbm(Rcpp::IntegerVector cluster_assign
                                               double beta_bernoulli_alpha,
                                               double beta_bernoulli_beta);
 
-// ----------------------------------------------------------------------------|
-// Sample the number of clusters (K) based on Equation (3.7) from Miller & Harrison
-// ----------------------------------------------------------------------------|
-int sample_K_mfm_sbm(IntegerVector cluster_assign,
-                     double dirichlet_alpha,
-                     NumericVector log_Vn,
-                     int max_K);
-
 // ----------------------------------------------------------------------------|
 // Sample the block parameters for the MFM - SBM
 // ----------------------------------------------------------------------------|
diff --git a/vignettes/refs.bib b/vignettes/refs.bib
index 44c43c84..0a152448 100644
--- a/vignettes/refs.bib
+++ b/vignettes/refs.bib
@@ -1,11 +1,29 @@
-@article{SekulovskiEtAl_2023,
-  title={Testing conditional independence in psychometric networks: An analysis of three bayesian methods},
-  author={Sekulovski, N. and Keetelaar, S. and Huth, K. and Wagenmakers, {E.-J.} and van Bork, R. and van den Bergh, D. and Marsman, M.},
-  journal={Multivariate Behavioral Research},
-  doi = {10.1080/00273171.2024.2345915},
-  year={in press}
+
+@article{Dahl2009,
+  author       = {David B. Dahl},
+  title        = {Modal clustering in a class of product partition models},
+  journal      = {Bayesian Analysis},
+  volume       = {4},
+  number       = {2},
+  pages        = {243--264},
+  year         = {2009},
+  month        = {June},
+  doi          = {10.1214/09-BA409},
+  url          = {https://doi.org/10.1214/09-BA409}
 }
 
+@article{SekulovskiEtAl_2023,
+	author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.},
+	date-added = {2023-06-12 16:18:37 +0200},
+	date-modified = {2024-12-07 10:05:32 +0100},
+	doi = {10.1080/00273171.2024.2345915},
+	journal = {Multivariate Behavioral Research},
+	pages = {913--933},
+	title = {Testing conditional independence in psychometric networks: {A}n analysis of three {B}ayesian methods},
+	volume = {59},
+	year = {2024},
+	bdsk-url-1 = {https://doi.org/10.1080/00273171.2024.2345915}}
+
 @article{GengEtAl_2019,
   title={Probabilistic community detection with unknown number of communities},
   author={Geng, J. and Bhattacharya, A. and Pati, D.},

From 02be0e9b4eb7dfb1d4c79fb9a4f7285078ba0186 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Mon, 9 Dec 2024 14:30:20 +0100
Subject: [PATCH 06/21] update the references

---
 inst/REFERENCES.bib | 27 +++++++++++++++++++++------
 vignettes/refs.bib  | 15 ---------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 1d63bbfe..18afffc3 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -1,3 +1,16 @@
+@article{Dahl2009,
+  author       = {David B. Dahl},
+  title        = {Modal clustering in a class of product partition models},
+  journal      = {Bayesian Analysis},
+  volume       = {4},
+  number       = {2},
+  pages        = {243--264},
+  year         = {2009},
+  month        = {June},
+  doi          = {10.1214/09-BA409},
+  url          = {https://doi.org/10.1214/09-BA409}
+}
+
 @article{GengEtAl_2019,
   title={Probabilistic community detection with unknown number of communities},
   author={Geng, J. and Bhattacharya, A. and Pati, D.},
@@ -8,12 +21,14 @@ @article{GengEtAl_2019
   year={2019}}
 
 @article{SekulovskiEtAl_2023,
-  title={Testing conditional independence in psychometric networks: An analysis of three bayesian methods},
-  author={Sekulovski, N. and Keetelaar, S. and Huth, K. and Wagenmakers, {E.-J.} and van Bork, R. and van den Bergh, D. and Marsman, M.},
-  journal={Multivariate Behavioral Research},
-  doi = {10.1080/00273171.2024.2345915},
-  year={in press}
-}
+	author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.},
+	doi = {10.1080/00273171.2024.2345915},
+	journal = {Multivariate Behavioral Research},
+	pages = {913--933},
+	title = {Testing conditional independence in psychometric networks: {A}n analysis of three {B}ayesian methods},
+	volume = {59},
+	year = {2024},
+	bdsk-url-1 = {https://doi.org/10.1080/00273171.2024.2345915}}
 
 @article{Dienes_2014,
 	author = {Dienes, Z.},
diff --git a/vignettes/refs.bib b/vignettes/refs.bib
index 0a152448..1e76345a 100644
--- a/vignettes/refs.bib
+++ b/vignettes/refs.bib
@@ -1,21 +1,6 @@
 
-@article{Dahl2009,
-  author       = {David B. Dahl},
-  title        = {Modal clustering in a class of product partition models},
-  journal      = {Bayesian Analysis},
-  volume       = {4},
-  number       = {2},
-  pages        = {243--264},
-  year         = {2009},
-  month        = {June},
-  doi          = {10.1214/09-BA409},
-  url          = {https://doi.org/10.1214/09-BA409}
-}
-
 @article{SekulovskiEtAl_2023,
 	author = {Sekulovski, N. and Keetelaar, S. and Huth, K. B. S. and Wagenmakers, Eric-Jan and {van }Bork, R. and {van den }Bergh, D. and Marsman, M.},
-	date-added = {2023-06-12 16:18:37 +0200},
-	date-modified = {2024-12-07 10:05:32 +0100},
 	doi = {10.1080/00273171.2024.2345915},
 	journal = {Multivariate Behavioral Research},
 	pages = {913--933},

From 73d42b8b177f19bdd9adf0387014a1acedc8acfd Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Thu, 19 Dec 2024 13:43:14 +0100
Subject: [PATCH 07/21] Update: Include the Rao-Rlackwellized estimates for the
 probabilities of the number of components

---
 R/RcppExports.R                    |  4 ++
 R/bgm.R                            |  4 +-
 R/utility_functions.R              | 77 +++++++++++++++++++++++++++---
 src/RcppExports.cpp                | 14 ++++++
 src/gibbs_functions_edge_prior.cpp |  1 +
 5 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/R/RcppExports.R b/R/RcppExports.R
index e1dcd9d6..c5a186f9 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -17,3 +17,7 @@ compare_gibbs_sampler <- function(observations_gr1, observations_gr2, no_categor
     .Call(`_bgms_compare_gibbs_sampler`, observations_gr1, observations_gr2, no_categories_gr1, no_categories_gr2, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, n_cat_obs_gr1, n_cat_obs_gr2, sufficient_blume_capel_gr1, sufficient_blume_capel_gr2, threshold_alpha, threshold_beta, na_impute, missing_index_gr1, missing_index_gr2, ordinal_variable, reference_category, independent_thresholds, save, display_progress, difference_selection)
 }
 
+compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max) {
+    .Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max)
+}
+
diff --git a/R/bgm.R b/R/bgm.R
index 799eb625..ce2ef829 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -521,13 +521,15 @@ bgm = function(x,
     if(edge_selection == TRUE) {
       if(edge_prior == "Stochastic-Block"){
 
-        summarySbm <- summary_SBM(cluster_allocations = out$allocations)
+        summarySbm <- summary_SBM(cluster_allocations = out$allocations,
+                                  dirichlet_alpha = dirichlet_alpha)
 
         output = list(indicator = indicator,
                       interactions = interactions,
                       thresholds = thresholds,
                       allocations = summarySbm$allocations,
                       clusters = summarySbm$no_clusters,
+                      r_b_cluster_probability = summarySbm$r_b_cluster_prob,
                       arguments = arguments)
       } else {
 
diff --git a/R/utility_functions.R b/R/utility_functions.R
index 4c12ba61..56476564 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1013,7 +1013,6 @@ compare_reformat_data = function(x,
 #  Geng, J., Bhattacharya, A., & Pati, D. (2019). Probabilistic Community
 #  Detection With Unknown Number of Communities, Journal of the American
 #  Statistical Association, 114:526, 893-905, DOI:10.1080/01621459.2018.1458618
-
 getDahl <- function(cluster_allocations) {
   # Dimensions of the input matrix
   niters <- nrow(cluster_allocations)  # Number of iterations
@@ -1045,21 +1044,87 @@ getDahl <- function(cluster_allocations) {
   return(DahlAns)
 }
 
+# Calculate the conditional probability of the number of
+# components given the cardinality of a sampled
+# allocation vector based on Equation (3.7) from
+# Miller & Harrison (2018). Mixture Models With a Prior on the Number of
+# Components, Journal of the American Statistical Association, 113:521, 340-356,
+# DOI:10.1080/01621459.2016.1255636
+
+compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) {
+  # Define the K_values
+  K_values <- as.numeric(1:no_variables)
+  # Initialize vector for probabilities
+  p_k_given_t <- numeric(length(K_values))
+  # The rate hyperparameter of the Poisson is fixed to 1
+  lambda <- 1
+  # Normalization constant for t
+  log_vn_t <- log_Vn[t]
+
+  # Loop through each value of K
+  for (i in seq_along(K_values)) {
+    K <- K_values[i]
+
+    if (K >= t) {
+      # Falling factorial k_(t) = prod(k - i) for i = 0:(t-1)
+      falling_factorial <- prod(K:(K - t + 1))
+
+      # Rising factorial (dirichlet_alpha k)^(n) = prod(dirichlet_alpha k + i) for i = 0:(no_variables-1)
+      rising_factorial <- prod((dirichlet_alpha * K) + 0:(no_variables - 1))
+
+      # Poisson probability p_K(k) = (lambda^k * exp(-lambda)) / k!
+      poisson_pmf <- dpois(K, lambda)
+
+      # Compute log probability
+      log_p_k <- log(falling_factorial) - log(rising_factorial) + log(poisson_pmf) - log_vn_t
+
+      # Convert log probability to probability
+      p_k_given_t[i] <- exp(log_p_k)
+    } else {
+      # Set probability to zero if K < t
+      p_k_given_t[i] <- 0
+    }
+  }
+  # Normalize probabilities
+  p_k_given_t <- p_k_given_t / sum(p_k_given_t)
 
-# A function that computes the cluster posterior probabilities and allocations
+  # Return a vector of length no_variables with the probabilities
+  return(p_k_given_t)
+}
 
-summary_SBM <- function(cluster_allocations) {
-  # Compute the number of unique clusters for each iteration
-  clusters <- apply(cluster_allocations, 1, function(row) length(unique(row)))
 
+# A function that computes the posterior probabilities of the
+# number of sampled clusters, the Rao-Blackwellized probabilities
+# and the allocations of the nodes based on Dahl's method
+summary_SBM <- function(cluster_allocations, dirichlet_alpha) {
+
+  # precompute things
+  no_variables <- ncol(cluster_allocations)
+  log_Vn <- compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, no_variables)
+  # Compute the number of unique clusters (t) for each iteration
+  # i.e., the cardinality  of the partition z
+  clusters <- apply(cluster_allocations, 1, function(row) length(unique(row)))
   # Compute the posterior probabilities of the actual unique clusters
   no_clusters <- table(clusters) / length(clusters)
 
+  # Compute the conditional probabilities of the number of clusters
+  # for each row in clusters
+  p_k_given_t <- matrix(NA, nrow = length(clusters), ncol = no_variables)
+
+  for (i in 1:length(clusters)) {
+    p_k_given_t[i, ] <- compute_p_k_given_t(clusters[i],
+                                            log_Vn,
+                                            dirichlet_alpha,
+                                            no_variables)
+  }
+  # average across all iterations
+  p_k_given_t <- colMeans(p_k_given_t)
+
   # Compute the allocations of the nodes based on Dahl's method
   allocations <- getDahl(cluster_allocations)
 
   # Return the results
   return(list(no_clusters = no_clusters,
+              r_b_cluster_prob = p_k_given_t, # the Rao-Blackwellized prob
               allocations = allocations))
 }
-
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index d1858a8f..20931308 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -123,12 +123,26 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// compute_Vn_mfm_sbm
+NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max);
+RcppExport SEXP _bgms_compute_Vn_mfm_sbm(SEXP no_variablesSEXP, SEXP dirichlet_alphaSEXP, SEXP t_maxSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type no_variables(no_variablesSEXP);
+    Rcpp::traits::input_parameter< double >::type dirichlet_alpha(dirichlet_alphaSEXP);
+    Rcpp::traits::input_parameter< int >::type t_max(t_maxSEXP);
+    rcpp_result_gen = Rcpp::wrap(compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, t_max));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_bgms_sample_omrf_gibbs", (DL_FUNC) &_bgms_sample_omrf_gibbs, 6},
     {"_bgms_sample_bcomrf_gibbs", (DL_FUNC) &_bgms_sample_bcomrf_gibbs, 8},
     {"_bgms_gibbs_sampler", (DL_FUNC) &_bgms_gibbs_sampler, 27},
     {"_bgms_compare_gibbs_sampler", (DL_FUNC) &_bgms_compare_gibbs_sampler, 32},
+    {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 3},
     {NULL, NULL, 0}
 };
 
diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index 0cdbc535..be7ed6eb 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -82,6 +82,7 @@ NumericMatrix add_row_col_block_prob_matrix(NumericMatrix X,
 // ----------------------------------------------------------------------------|
 // Compute partition coefficient for the MFM - SBM
 // ----------------------------------------------------------------------------|
+// [[Rcpp::export]]
 NumericVector compute_Vn_mfm_sbm(int no_variables,
                                  double dirichlet_alpha,
                                  int t_max) {

From 28394fe12f391ac9c7b1ec82cd81f68ec5c788a4 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Fri, 20 Dec 2024 16:42:36 +0100
Subject: [PATCH 08/21] Update: Include the proper calculation of the
 probability of the number of components. Also, some fixes in the roxygen
 code.

---
 R/bgm.R               | 33 ++++++++++++++++++++-------------
 R/utility_functions.R | 13 +++++++------
 inst/REFERENCES.bib   | 12 ++++++++++++
 man/bgm.Rd            | 30 +++++++++++++++++++-----------
 4 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/R/bgm.R b/R/bgm.R
index ce2ef829..07e4d566 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -147,7 +147,7 @@
 #' The default is \code{TRUE}.
 #'
 #' @return If \code{save = FALSE} (the default), the result is a list of class
-#' ``bgms'' containing the following matrices with model-averaged quantities:
+#' \code{"bgms"} containing the following matrices with model-averaged quantities:
 #' \itemize{
 #' \item \code{indicator}: A matrix with \code{p} rows and \code{p} columns,
 #' containing the posterior inclusion probabilities of individual edges.
@@ -155,16 +155,23 @@
 #' containing model-averaged posterior means of the pairwise associations.
 #' \item \code{thresholds}: A matrix with \code{p} rows and \code{max(m)}
 #' columns, containing model-averaged category thresholds. In the case of
-#' ``blume-capel'' variables, the first entry is the parameter for the linear
+#' \code{"blume-capel"} variables, the first entry is the parameter for the linear
 #' effect and the second entry is the parameter for the quadratic effect, which
 #' models the offset to the reference category.
-#' \item In the case of
-#' \code{edge_prior = "Stochastic-Block"} two additional elements are returned:
-#' a vector \code{allocations} with the estimated cluster assignments of the nodes and an
-#' table \code{clusters} with the estimated posterior probability of the number
-#' of clusters in the network. The vector of node allocations is calculated using
-#' a method proposed by \insertCite{Dahl2009}{bgms} and also used by
-#' \insertCite{GengEtAl_2019}{bgms}.
+#' \item In the case of \code{edge_prior = "Stochastic-Block"}, two additional
+#' elements are returned:
+#'   \itemize{
+#'     \item A vector \code{allocations} with the estimated cluster assignments
+#'     of the nodes, calculated using a method proposed by \code{Dahl (2009)} and
+#'     also used by \code{Geng et al. (2019)}.
+#'     \item A matrix \code{components} with the estimated posterior probability
+#'     of the number of components (clusters) in the network. These probabilities
+#'     are calculated based on Equation 3.7 in \code{Miller (2018)}, which computes
+#'     the conditional probability of the number of components given the
+#'     number of clusters. The number of clusters is derived from the
+#'     cardinality of the sampled \code{allocations} vector for each iteration of
+#'     the MCMC sampler (see \code{save = TRUE}).
+#'   }
 #' }
 #' If \code{save = TRUE}, the result is a list of class ``bgms'' containing:
 #' \itemize{
@@ -180,8 +187,9 @@
 #' \item In the case of
 #' \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the
 #' cluster assignments of the nodes from each iteration is returned. This matrix
-#' can be used to calculate the posterior probability of the number of clusters
-#' by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function.
+#' can be used to calculate the posterior probability of the number of components
+#' and estimated cluster assignments of the nodes, by and  utilizing the
+#' \code{summary_SBM()} function.
 #' }
 #' Column averages of these matrices provide the model-averaged posterior means.
 #' Except for the \code{allocations} matrix, for which the \code{summary_SBM}
@@ -527,9 +535,8 @@ bgm = function(x,
         output = list(indicator = indicator,
                       interactions = interactions,
                       thresholds = thresholds,
+                      components = summarySbm$components,
                       allocations = summarySbm$allocations,
-                      clusters = summarySbm$no_clusters,
-                      r_b_cluster_probability = summarySbm$r_b_cluster_prob,
                       arguments = arguments)
       } else {
 
diff --git a/R/utility_functions.R b/R/utility_functions.R
index 56476564..cef1a076 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1094,7 +1094,7 @@ compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) {
 
 
 # A function that computes the posterior probabilities of the
-# number of sampled clusters, the Rao-Blackwellized probabilities
+# number of components K given the cardinality of the partition t
 # and the allocations of the nodes based on Dahl's method
 summary_SBM <- function(cluster_allocations, dirichlet_alpha) {
 
@@ -1104,9 +1104,6 @@ summary_SBM <- function(cluster_allocations, dirichlet_alpha) {
   # Compute the number of unique clusters (t) for each iteration
   # i.e., the cardinality  of the partition z
   clusters <- apply(cluster_allocations, 1, function(row) length(unique(row)))
-  # Compute the posterior probabilities of the actual unique clusters
-  no_clusters <- table(clusters) / length(clusters)
-
   # Compute the conditional probabilities of the number of clusters
   # for each row in clusters
   p_k_given_t <- matrix(NA, nrow = length(clusters), ncol = no_variables)
@@ -1120,11 +1117,15 @@ summary_SBM <- function(cluster_allocations, dirichlet_alpha) {
   # average across all iterations
   p_k_given_t <- colMeans(p_k_given_t)
 
+  # make it as a matrix
+  no_components <- 1:no_variables
+  components <- cbind(no_components, p_k_given_t)
+  colnames(components) <- c("no_components", "probability")
+
   # Compute the allocations of the nodes based on Dahl's method
   allocations <- getDahl(cluster_allocations)
 
   # Return the results
-  return(list(no_clusters = no_clusters,
-              r_b_cluster_prob = p_k_given_t, # the Rao-Blackwellized prob
+  return(list(components = components,
               allocations = allocations))
 }
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 18afffc3..ee85e281 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -1,3 +1,15 @@
+@ARTICLE{miller2018mixture,
+  title={Mixture models with a prior on the number of components},
+  author={Miller, Jeffrey W and Harrison, Matthew T},
+  journal={Journal of the American Statistical Association},
+  volume={113},
+  number={521},
+  pages={340--356},
+  year={2018},
+  doi = {10.1080/01621459.2016.1255636},
+  publisher={Taylor \& Francis}
+}
+
 @article{Dahl2009,
   author       = {David B. Dahl},
   title        = {Modal clustering in a class of product partition models},
diff --git a/man/bgm.Rd b/man/bgm.Rd
index d091164b..0b573a11 100644
--- a/man/bgm.Rd
+++ b/man/bgm.Rd
@@ -144,7 +144,7 @@ The default is \code{TRUE}.}
 }
 \value{
 If \code{save = FALSE} (the default), the result is a list of class
-``bgms'' containing the following matrices with model-averaged quantities:
+\code{"bgms"} containing the following matrices with model-averaged quantities:
 \itemize{
 \item \code{indicator}: A matrix with \code{p} rows and \code{p} columns,
 containing the posterior inclusion probabilities of individual edges.
@@ -152,16 +152,23 @@ containing the posterior inclusion probabilities of individual edges.
 containing model-averaged posterior means of the pairwise associations.
 \item \code{thresholds}: A matrix with \code{p} rows and \code{max(m)}
 columns, containing model-averaged category thresholds. In the case of
-``blume-capel'' variables, the first entry is the parameter for the linear
+\code{"blume-capel"} variables, the first entry is the parameter for the linear
 effect and the second entry is the parameter for the quadratic effect, which
 models the offset to the reference category.
-\item In the case of
-\code{edge_prior = "Stochastic-Block"} two additional elements are returned:
-a vector \code{allocations} with the estimated cluster assignments of the nodes and an
-table \code{clusters} with the estimated posterior probability of the number
-of clusters in the network. The vector of node allocations is calculated using
-a method proposed by \insertCite{Dahl2009}{bgms} and also used by
-\insertCite{GengEtAl_2019}{bgms}.
+\item In the case of \code{edge_prior = "Stochastic-Block"}, two additional
+elements are returned:
+  \itemize{
+    \item A vector \code{allocations} with the estimated cluster assignments
+    of the nodes, calculated using a method proposed by \code{Dahl (2009)} and
+    also used by \code{Geng et al. (2019)}.
+    \item A table \code{components} with the estimated posterior probability
+    of the number of components (clusters) in the network. These probabilities
+    are calculated based on Equation 3.7 in \code{Miller (2018)}, which computes
+    the conditional probability of the number of components given the
+    number of clusters. The number of clusters is derived from the
+    cardinality of the sampled \code{allocations} vector for each iteration of
+    the MCMC sampler (see \code{save = TRUE}).
+  }
 }
 If \code{save = TRUE}, the result is a list of class ``bgms'' containing:
 \itemize{
@@ -177,8 +184,9 @@ the Gibbs sampler for the category thresholds.
 \item In the case of
 \code{edge_prior = "Stochastic-Block"} a matrix \code{allocations} with the
 cluster assignments of the nodes from each iteration is returned. This matrix
-can be used to calculate the posterior probability of the number of clusters
-by utilizing the \code{summary_SBM(bgm_output[["allocations"]])} function.
+can be used to calculate the posterior probability of the number of components
+and estimated cluster assignments of the nodes, by and  utilizing the
+\code{summary_SBM()} function.
 }
 Column averages of these matrices provide the model-averaged posterior means.
 Except for the \code{allocations} matrix, for which the \code{summary_SBM}

From b8f3de58200e2d31a37d2dd8e61ffbdce1f4841d Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Mon, 23 Dec 2024 13:50:57 +0100
Subject: [PATCH 09/21] update: initial configuration of the cluster
 memberships.

---
 src/gibbs_functions.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index 56e06236..df82d3c1 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -870,15 +870,14 @@ List gibbs_sampler(IntegerMatrix observations,
   NumericMatrix out_allocations(iter, no_variables);
 
   if(edge_prior == "Stochastic-Block") { // Initial Configuration of the cluster allocations
-    cluster_allocations[0] = 0;
-    cluster_allocations[1] = 1;
-    for(int i = 2; i < no_variables; i++) {
+    // Randomly choose the number of clusters (k) between 1 and no_variables
+    int k = static_cast<int>(R::unif_rand() * no_variables) + 1;
+
+    // Assign each variable to a random cluster
+    for (int i = 0; i < no_variables; i++) {
       double U = R::unif_rand();
-      if(U > 0.5){
-        cluster_allocations[i] = 1;
-      } else {
-        cluster_allocations[i] = 0;
-      }
+      int cluster = static_cast<int>(U * k); // Randomly select a cluster (0 to k-1)
+      cluster_allocations[i] = cluster;
     }
 
     cluster_prob = block_probs_mfm_sbm(cluster_allocations,
@@ -896,7 +895,7 @@ List gibbs_sampler(IntegerMatrix observations,
 
     log_Vn = compute_Vn_mfm_sbm(no_variables,
                                 dirichlet_alpha,
-                                no_variables + 10);
+                                no_variables);
   }
 
   //The Gibbs sampler ----------------------------------------------------------

From 7149ae1b5f34f72e53dabd75428418900fff4e1f Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Thu, 2 Jan 2025 14:29:49 +0100
Subject: [PATCH 10/21] delete unnecessary object

---
 src/gibbs_functions.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index df82d3c1..b88ab003 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -812,7 +812,6 @@ List gibbs_sampler(IntegerMatrix observations,
   int no_interactions = Index.nrow();
   int no_thresholds = sum(no_categories);
   int max_no_categories = max(no_categories);
-  IntegerVector K_values;  // To store sampled K values
 
 
   IntegerVector v = seq(0, no_interactions - 1);

From 39e3ab0f4e2e45c69144783ccc85588fa017d574 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Fri, 3 Jan 2025 13:35:47 +0100
Subject: [PATCH 11/21] correction to the truncated Poisson distribution

---
 R/utility_functions.R | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/R/utility_functions.R b/R/utility_functions.R
index cef1a076..b9d4302e 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1061,34 +1061,28 @@ compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) {
   # Normalization constant for t
   log_vn_t <- log_Vn[t]
 
+  # Normalizing factor for the truncated Poisson distribution
+  norm_factor <- 1 - dpois(0, lambda)
+  truncated_poisson_pmf <- dpois(K_values, lambda) / norm_factor
+
   # Loop through each value of K
   for (i in seq_along(K_values)) {
     K <- K_values[i]
-
     if (K >= t) {
-      # Falling factorial k_(t) = prod(k - i) for i = 0:(t-1)
+      # Falling factorial
       falling_factorial <- prod(K:(K - t + 1))
-
-      # Rising factorial (dirichlet_alpha k)^(n) = prod(dirichlet_alpha k + i) for i = 0:(no_variables-1)
+      # Rising factorial
       rising_factorial <- prod((dirichlet_alpha * K) + 0:(no_variables - 1))
-
-      # Poisson probability p_K(k) = (lambda^k * exp(-lambda)) / k!
-      poisson_pmf <- dpois(K, lambda)
-
       # Compute log probability
-      log_p_k <- log(falling_factorial) - log(rising_factorial) + log(poisson_pmf) - log_vn_t
-
+      log_p_k <- log(falling_factorial) - log(rising_factorial) + log(truncated_poisson_pmf[i]) - log_vn_t
       # Convert log probability to probability
       p_k_given_t[i] <- exp(log_p_k)
     } else {
-      # Set probability to zero if K < t
       p_k_given_t[i] <- 0
     }
   }
   # Normalize probabilities
   p_k_given_t <- p_k_given_t / sum(p_k_given_t)
-
-  # Return a vector of length no_variables with the probabilities
   return(p_k_given_t)
 }
 

From 6a8ed38bb4dc20fea1210f170b938d98a500c875 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Fri, 10 Jan 2025 14:10:28 +0100
Subject: [PATCH 12/21] update: randomize the order of the variables in the for
 loopp in the block_allocations_mfm_sbm function

---
 src/gibbs_functions_edge_prior.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index be7ed6eb..6da4100a 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -228,7 +228,11 @@ IntegerVector block_allocations_mfm_sbm(IntegerVector cluster_assign,
   double loglike;
   double logmarg;
 
-  for(int node = 0; node < no_variables; node++) {
+  // Generate a randomized order using Rcpp's sample function
+  IntegerVector indices = sample(no_variables, no_variables);
+
+  for (int idx = 0; idx < no_variables; ++idx) {
+    int node = indices[idx] - 1; // Convert to zero-based index
     old = cluster_assign[node];
 
     IntegerVector cluster_size = table_cpp(cluster_assign);

From 662d1c69efb6aa9e6750bc90836451da32b3f0d2 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Mon, 13 Jan 2025 15:01:38 +0100
Subject: [PATCH 13/21] update: include a user-specified lambda

---
 R/RcppExports.R                    |  8 ++++----
 R/bgm.R                            |  6 +++++-
 R/utility_functions.R              | 22 ++++++++++++++++------
 src/RcppExports.cpp                | 18 ++++++++++--------
 src/gibbs_functions.cpp            |  4 +++-
 src/gibbs_functions_edge_prior.cpp |  7 ++++++-
 src/gibbs_functions_edge_prior.h   |  3 ++-
 7 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/R/RcppExports.R b/R/RcppExports.R
index c5a186f9..ff281bfa 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -9,15 +9,15 @@ sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interact
     .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter)
 }
 
-gibbs_sampler <- function(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save = FALSE, display_progress = FALSE, edge_selection = TRUE) {
-    .Call(`_bgms_gibbs_sampler`, observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection)
+gibbs_sampler <- function(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save = FALSE, display_progress = FALSE, edge_selection = TRUE) {
+    .Call(`_bgms_gibbs_sampler`, observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection)
 }
 
 compare_gibbs_sampler <- function(observations_gr1, observations_gr2, no_categories_gr1, no_categories_gr2, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, n_cat_obs_gr1, n_cat_obs_gr2, sufficient_blume_capel_gr1, sufficient_blume_capel_gr2, threshold_alpha, threshold_beta, na_impute, missing_index_gr1, missing_index_gr2, ordinal_variable, reference_category, independent_thresholds, save = FALSE, display_progress = FALSE, difference_selection = TRUE) {
     .Call(`_bgms_compare_gibbs_sampler`, observations_gr1, observations_gr2, no_categories_gr1, no_categories_gr2, interaction_scale, pairwise_difference_scale, main_difference_scale, pairwise_difference_prior, main_difference_prior, inclusion_probability_difference, pairwise_beta_bernoulli_alpha, pairwise_beta_bernoulli_beta, main_beta_bernoulli_alpha, main_beta_bernoulli_beta, Index, iter, burnin, n_cat_obs_gr1, n_cat_obs_gr2, sufficient_blume_capel_gr1, sufficient_blume_capel_gr2, threshold_alpha, threshold_beta, na_impute, missing_index_gr1, missing_index_gr2, ordinal_variable, reference_category, independent_thresholds, save, display_progress, difference_selection)
 }
 
-compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max) {
-    .Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max)
+compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) {
+    .Call(`_bgms_compute_Vn_mfm_sbm`, no_variables, dirichlet_alpha, t_max, lambda)
 }
 
diff --git a/R/bgm.R b/R/bgm.R
index 07e4d566..3ead1e00 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -300,6 +300,7 @@ bgm = function(x,
                beta_bernoulli_alpha = 1,
                beta_bernoulli_beta = 1,
                dirichlet_alpha = 1,
+               lambda = 1,
                na.action = c("listwise", "impute"),
                save = FALSE,
                display_progress = TRUE) {
@@ -452,6 +453,7 @@ bgm = function(x,
                       beta_bernoulli_alpha = beta_bernoulli_alpha,
                       beta_bernoulli_beta = beta_bernoulli_beta,
                       dirichlet_alpha = dirichlet_alpha,
+                      lambda = lambda,
                       Index = Index,
                       iter = iter,
                       burnin = burnin,
@@ -485,6 +487,7 @@ bgm = function(x,
     beta_bernoulli_alpha = beta_bernoulli_alpha ,
     beta_bernoulli_beta =  beta_bernoulli_beta,
     dirichlet_alpha = dirichlet_alpha,
+    lambda = lambda,
     na.action = na.action,
     save = save,
     version = packageVersion("bgms")
@@ -530,7 +533,8 @@ bgm = function(x,
       if(edge_prior == "Stochastic-Block"){
 
         summarySbm <- summary_SBM(cluster_allocations = out$allocations,
-                                  dirichlet_alpha = dirichlet_alpha)
+                                  dirichlet_alpha = dirichlet_alpha,
+                                  lambda = lambda)
 
         output = list(indicator = indicator,
                       interactions = interactions,
diff --git a/R/utility_functions.R b/R/utility_functions.R
index b9d4302e..2829959a 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1051,13 +1051,17 @@ getDahl <- function(cluster_allocations) {
 # Components, Journal of the American Statistical Association, 113:521, 340-356,
 # DOI:10.1080/01621459.2016.1255636
 
-compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) {
+compute_p_k_given_t <- function(t,
+                                log_Vn,
+                                dirichlet_alpha,
+                                no_variables,
+                                lambda) {
   # Define the K_values
   K_values <- as.numeric(1:no_variables)
+
   # Initialize vector for probabilities
   p_k_given_t <- numeric(length(K_values))
-  # The rate hyperparameter of the Poisson is fixed to 1
-  lambda <- 1
+
   # Normalization constant for t
   log_vn_t <- log_Vn[t]
 
@@ -1090,11 +1094,16 @@ compute_p_k_given_t <- function(t, log_Vn, dirichlet_alpha, no_variables) {
 # A function that computes the posterior probabilities of the
 # number of components K given the cardinality of the partition t
 # and the allocations of the nodes based on Dahl's method
-summary_SBM <- function(cluster_allocations, dirichlet_alpha) {
+summary_SBM <- function(cluster_allocations,
+                        dirichlet_alpha,
+                        lambda) {
 
   # precompute things
   no_variables <- ncol(cluster_allocations)
-  log_Vn <- compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, no_variables)
+  log_Vn <- compute_Vn_mfm_sbm(no_variables,
+                               dirichlet_alpha,
+                               no_variables,
+                               lambda)
   # Compute the number of unique clusters (t) for each iteration
   # i.e., the cardinality  of the partition z
   clusters <- apply(cluster_allocations, 1, function(row) length(unique(row)))
@@ -1106,7 +1115,8 @@ summary_SBM <- function(cluster_allocations, dirichlet_alpha) {
     p_k_given_t[i, ] <- compute_p_k_given_t(clusters[i],
                                             log_Vn,
                                             dirichlet_alpha,
-                                            no_variables)
+                                            no_variables,
+                                            lambda)
   }
   # average across all iterations
   p_k_given_t <- colMeans(p_k_given_t)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 20931308..e777dff1 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -45,8 +45,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // gibbs_sampler
-List gibbs_sampler(IntegerMatrix observations, IntegerMatrix indicator, NumericMatrix interactions, NumericMatrix thresholds, IntegerVector no_categories, double interaction_scale, NumericMatrix proposal_sd, NumericMatrix proposal_sd_blumecapel, String edge_prior, NumericMatrix theta, double beta_bernoulli_alpha, double beta_bernoulli_beta, double dirichlet_alpha, IntegerMatrix Index, int iter, int burnin, IntegerMatrix n_cat_obs, IntegerMatrix sufficient_blume_capel, double threshold_alpha, double threshold_beta, bool na_impute, IntegerMatrix missing_index, LogicalVector variable_bool, IntegerVector reference_category, bool save, bool display_progress, bool edge_selection);
-RcppExport SEXP _bgms_gibbs_sampler(SEXP observationsSEXP, SEXP indicatorSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP no_categoriesSEXP, SEXP interaction_scaleSEXP, SEXP proposal_sdSEXP, SEXP proposal_sd_blumecapelSEXP, SEXP edge_priorSEXP, SEXP thetaSEXP, SEXP beta_bernoulli_alphaSEXP, SEXP beta_bernoulli_betaSEXP, SEXP dirichlet_alphaSEXP, SEXP IndexSEXP, SEXP iterSEXP, SEXP burninSEXP, SEXP n_cat_obsSEXP, SEXP sufficient_blume_capelSEXP, SEXP threshold_alphaSEXP, SEXP threshold_betaSEXP, SEXP na_imputeSEXP, SEXP missing_indexSEXP, SEXP variable_boolSEXP, SEXP reference_categorySEXP, SEXP saveSEXP, SEXP display_progressSEXP, SEXP edge_selectionSEXP) {
+List gibbs_sampler(IntegerMatrix observations, IntegerMatrix indicator, NumericMatrix interactions, NumericMatrix thresholds, IntegerVector no_categories, double interaction_scale, NumericMatrix proposal_sd, NumericMatrix proposal_sd_blumecapel, String edge_prior, NumericMatrix theta, double beta_bernoulli_alpha, double beta_bernoulli_beta, double dirichlet_alpha, double lambda, IntegerMatrix Index, int iter, int burnin, IntegerMatrix n_cat_obs, IntegerMatrix sufficient_blume_capel, double threshold_alpha, double threshold_beta, bool na_impute, IntegerMatrix missing_index, LogicalVector variable_bool, IntegerVector reference_category, bool save, bool display_progress, bool edge_selection);
+RcppExport SEXP _bgms_gibbs_sampler(SEXP observationsSEXP, SEXP indicatorSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP no_categoriesSEXP, SEXP interaction_scaleSEXP, SEXP proposal_sdSEXP, SEXP proposal_sd_blumecapelSEXP, SEXP edge_priorSEXP, SEXP thetaSEXP, SEXP beta_bernoulli_alphaSEXP, SEXP beta_bernoulli_betaSEXP, SEXP dirichlet_alphaSEXP, SEXP lambdaSEXP, SEXP IndexSEXP, SEXP iterSEXP, SEXP burninSEXP, SEXP n_cat_obsSEXP, SEXP sufficient_blume_capelSEXP, SEXP threshold_alphaSEXP, SEXP threshold_betaSEXP, SEXP na_imputeSEXP, SEXP missing_indexSEXP, SEXP variable_boolSEXP, SEXP reference_categorySEXP, SEXP saveSEXP, SEXP display_progressSEXP, SEXP edge_selectionSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -63,6 +63,7 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< double >::type beta_bernoulli_alpha(beta_bernoulli_alphaSEXP);
     Rcpp::traits::input_parameter< double >::type beta_bernoulli_beta(beta_bernoulli_betaSEXP);
     Rcpp::traits::input_parameter< double >::type dirichlet_alpha(dirichlet_alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP);
     Rcpp::traits::input_parameter< IntegerMatrix >::type Index(IndexSEXP);
     Rcpp::traits::input_parameter< int >::type iter(iterSEXP);
     Rcpp::traits::input_parameter< int >::type burnin(burninSEXP);
@@ -77,7 +78,7 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< bool >::type save(saveSEXP);
     Rcpp::traits::input_parameter< bool >::type display_progress(display_progressSEXP);
     Rcpp::traits::input_parameter< bool >::type edge_selection(edge_selectionSEXP);
-    rcpp_result_gen = Rcpp::wrap(gibbs_sampler(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection));
+    rcpp_result_gen = Rcpp::wrap(gibbs_sampler(observations, indicator, interactions, thresholds, no_categories, interaction_scale, proposal_sd, proposal_sd_blumecapel, edge_prior, theta, beta_bernoulli_alpha, beta_bernoulli_beta, dirichlet_alpha, lambda, Index, iter, burnin, n_cat_obs, sufficient_blume_capel, threshold_alpha, threshold_beta, na_impute, missing_index, variable_bool, reference_category, save, display_progress, edge_selection));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -124,15 +125,16 @@ BEGIN_RCPP
 END_RCPP
 }
 // compute_Vn_mfm_sbm
-NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max);
-RcppExport SEXP _bgms_compute_Vn_mfm_sbm(SEXP no_variablesSEXP, SEXP dirichlet_alphaSEXP, SEXP t_maxSEXP) {
+NumericVector compute_Vn_mfm_sbm(int no_variables, double dirichlet_alpha, int t_max, double lambda);
+RcppExport SEXP _bgms_compute_Vn_mfm_sbm(SEXP no_variablesSEXP, SEXP dirichlet_alphaSEXP, SEXP t_maxSEXP, SEXP lambdaSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< int >::type no_variables(no_variablesSEXP);
     Rcpp::traits::input_parameter< double >::type dirichlet_alpha(dirichlet_alphaSEXP);
     Rcpp::traits::input_parameter< int >::type t_max(t_maxSEXP);
-    rcpp_result_gen = Rcpp::wrap(compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, t_max));
+    Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP);
+    rcpp_result_gen = Rcpp::wrap(compute_Vn_mfm_sbm(no_variables, dirichlet_alpha, t_max, lambda));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -140,9 +142,9 @@ END_RCPP
 static const R_CallMethodDef CallEntries[] = {
     {"_bgms_sample_omrf_gibbs", (DL_FUNC) &_bgms_sample_omrf_gibbs, 6},
     {"_bgms_sample_bcomrf_gibbs", (DL_FUNC) &_bgms_sample_bcomrf_gibbs, 8},
-    {"_bgms_gibbs_sampler", (DL_FUNC) &_bgms_gibbs_sampler, 27},
+    {"_bgms_gibbs_sampler", (DL_FUNC) &_bgms_gibbs_sampler, 28},
     {"_bgms_compare_gibbs_sampler", (DL_FUNC) &_bgms_compare_gibbs_sampler, 32},
-    {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 3},
+    {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 4},
     {NULL, NULL, 0}
 };
 
diff --git a/src/gibbs_functions.cpp b/src/gibbs_functions.cpp
index b88ab003..6a8512e6 100644
--- a/src/gibbs_functions.cpp
+++ b/src/gibbs_functions.cpp
@@ -792,6 +792,7 @@ List gibbs_sampler(IntegerMatrix observations,
                    double beta_bernoulli_alpha,
                    double beta_bernoulli_beta,
                    double dirichlet_alpha,
+                   double lambda,
                    IntegerMatrix Index,
                    int iter,
                    int burnin,
@@ -894,7 +895,8 @@ List gibbs_sampler(IntegerMatrix observations,
 
     log_Vn = compute_Vn_mfm_sbm(no_variables,
                                 dirichlet_alpha,
-                                no_variables);
+                                no_variables,
+                                lambda);
   }
 
   //The Gibbs sampler ----------------------------------------------------------
diff --git a/src/gibbs_functions_edge_prior.cpp b/src/gibbs_functions_edge_prior.cpp
index 6da4100a..fef05628 100644
--- a/src/gibbs_functions_edge_prior.cpp
+++ b/src/gibbs_functions_edge_prior.cpp
@@ -85,7 +85,8 @@ NumericMatrix add_row_col_block_prob_matrix(NumericMatrix X,
 // [[Rcpp::export]]
 NumericVector compute_Vn_mfm_sbm(int no_variables,
                                  double dirichlet_alpha,
-                                 int t_max) {
+                                 int t_max,
+                                 double lambda) {
   NumericVector log_Vn(t_max);
   double r;
   double tmp;
@@ -102,6 +103,10 @@ NumericVector compute_Vn_mfm_sbm(int no_variables,
       }
       tmp -= std::lgamma(k + 1);
 
+      // Add the poisson term
+      double log_norm_factor = log(1.0 - exp(R::dpois(0, lambda, true)));
+      tmp += R::dpois(k-1, lambda, true) - log_norm_factor;
+
       // Compute the maximum between r and tmp
       if (tmp > r) {
         r = std::log(std::exp(r - tmp) + 1) + tmp;
diff --git a/src/gibbs_functions_edge_prior.h b/src/gibbs_functions_edge_prior.h
index 827c4ea9..6818dbb8 100644
--- a/src/gibbs_functions_edge_prior.h
+++ b/src/gibbs_functions_edge_prior.h
@@ -6,7 +6,8 @@ using namespace Rcpp;
 // ----------------------------------------------------------------------------|
 Rcpp::NumericVector compute_Vn_mfm_sbm(int no_variables,
                                        double dirichlet_alpha,
-                                       int t_max);
+                                       int t_max,
+                                       double lambda);
 
 // ----------------------------------------------------------------------------|
 // Sample the block allocations for the MFM - SBM

From 12fd23bcde73311867633a6312c19805bcda068e Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Tue, 21 Jan 2025 18:50:17 +0100
Subject: [PATCH 14/21] update SBM

---
 R/bgm.R               | 10 +++++++---
 R/utility_functions.R | 15 ++++++++-------
 bgms.Rproj            |  1 +
 man/bgm.Rd            | 10 +++++++---
 4 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/R/bgm.R b/R/bgm.R
index 3ead1e00..82426849 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -116,9 +116,12 @@
 #' when nodes are expected to fall into distinct clusters. The inclusion
 #' probabilities for the edges are defined at the level of the clusters, with a
 #' beta prior for the unknown inclusion probability with shape parameters
-#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet
+#' \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, a Dirichlet
 #' prior on the cluster assignment probabilities with a common concentration
-#' parameter \code{dirichlet_alpha}. The default is
+#' parameter \code{dirichlet_alpha} and a zero-truncated Poisson prior on the
+#' number of clusters with a rate parameter \code{lambda}, indicating the
+#' expected number of clusters.
+#' The default is
 #' \code{edge_prior = "Bernoulli"}.
 #' @param inclusion_probability The prior edge inclusion probability for the
 #' Bernoulli model. Can be a single probability, or a matrix of \code{p} rows
@@ -327,7 +330,8 @@ bgm = function(x,
                       inclusion_probability = inclusion_probability,
                       beta_bernoulli_alpha = beta_bernoulli_alpha,
                       beta_bernoulli_beta = beta_bernoulli_beta,
-                      dirichlet_alpha = dirichlet_alpha)
+                      dirichlet_alpha = dirichlet_alpha,
+                      lambda = lambda)
 
   # ----------------------------------------------------------------------------
   # The vector variable_type is now coded as boolean.
diff --git a/R/utility_functions.R b/R/utility_functions.R
index 2829959a..27f9a811 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -13,7 +13,8 @@ check_model = function(x,
                        inclusion_probability = 0.5,
                        beta_bernoulli_alpha = 1,
                        beta_bernoulli_beta = 1,
-                       dirichlet_alpha = dirichlet_alpha) {
+                       dirichlet_alpha = dirichlet_alpha,
+                       lambda = lambda) {
 
   #Check variable type input ---------------------------------------------------
   if(length(variable_type) == 1) {
@@ -187,14 +188,14 @@ check_model = function(x,
     }
     if(edge_prior == "Stochastic-Block") {
       theta = matrix(0.5, nrow = ncol(x), ncol = ncol(x))
-      if(beta_bernoulli_alpha <= 0 || beta_bernoulli_beta <= 0 || dirichlet_alpha <= 0)
-        stop("The scale parameters of the beta and Dirichlet distribution need to be positive.")
-      if(!is.finite(beta_bernoulli_alpha) || !is.finite(beta_bernoulli_beta) || !is.finite(dirichlet_alpha))
-        stop("The scale parameters of the beta and Dirichlet distribution need to be finite.")
+      if(beta_bernoulli_alpha <= 0 || beta_bernoulli_beta <= 0 || dirichlet_alpha <= 0 || lambda <= 0)
+        stop("The scale parameters the beta distrubution, the concentration parameter of the Dirichlet distribution and the rate parameter of the Poisson distribution need to be positive.")
+      if(!is.finite(beta_bernoulli_alpha) || !is.finite(beta_bernoulli_beta) || !is.finite(dirichlet_alpha) || !is.finite(lambda))
+        stop("The scale parameters of the beta distrubution, the concentration parameter of the Dirichlet distribution and the rate parameter of the Poisson distribution need to be finite.")
       if(is.na(beta_bernoulli_alpha) || is.na(beta_bernoulli_beta) ||
          is.null(beta_bernoulli_alpha) || is.null(beta_bernoulli_beta) ||
-         is.null(dirichlet_alpha) || is.null(dirichlet_alpha))
-        stop("Values for both scale parameters of the beta and Dirichlet distribution need to be specified.")
+         is.null(dirichlet_alpha) || is.null(dirichlet_alpha) || is.null(lambda) || is.null(lambda))
+        stop("Values for both scale parameters of the betathe concentration parameter of the Dirichlet distribution and the rate parameter of the Poisson distribution need to be specified.")
     }
   } else {
     theta = matrix(0.5, nrow = 1, ncol = 1)
diff --git a/bgms.Rproj b/bgms.Rproj
index 6291372e..1c78b1f3 100644
--- a/bgms.Rproj
+++ b/bgms.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: 2fb4a59f-6132-41a6-9659-f1d3297dc9d0
 
 RestoreWorkspace: Default
 SaveWorkspace: Default
diff --git a/man/bgm.Rd b/man/bgm.Rd
index 0b573a11..635b7e1c 100644
--- a/man/bgm.Rd
+++ b/man/bgm.Rd
@@ -20,6 +20,7 @@ bgm(
   beta_bernoulli_alpha = 1,
   beta_bernoulli_beta = 1,
   dirichlet_alpha = 1,
+  lambda = 1,
   na.action = c("listwise", "impute"),
   save = FALSE,
   display_progress = TRUE
@@ -106,9 +107,12 @@ of allocations as described by @GengEtAl_2019}{bgms}. This model is advantageous
 when nodes are expected to fall into distinct clusters. The inclusion
 probabilities for the edges are defined at the level of the clusters, with a
 beta prior for the unknown inclusion probability with shape parameters
-\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, and a Dirichlet
+\code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}, a Dirichlet
 prior on the cluster assignment probabilities with a common concentration
-parameter \code{dirichlet_alpha}. The default is
+parameter \code{dirichlet_alpha} and a zero-truncated Poisson prior on the
+number of clusters with a rate parameter \code{lambda}, indicating the
+expected number of clusters.
+The default is
 \code{edge_prior = "Bernoulli"}.}
 
 \item{inclusion_probability}{The prior edge inclusion probability for the
@@ -161,7 +165,7 @@ elements are returned:
     \item A vector \code{allocations} with the estimated cluster assignments
     of the nodes, calculated using a method proposed by \code{Dahl (2009)} and
     also used by \code{Geng et al. (2019)}.
-    \item A table \code{components} with the estimated posterior probability
+    \item A matrix \code{components} with the estimated posterior probability
     of the number of components (clusters) in the network. These probabilities
     are calculated based on Equation 3.7 in \code{Miller (2018)}, which computes
     the conditional probability of the number of components given the

From 42f0e86fca78b2e1c3074ef7d1ee70a5fc2c34ae Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Tue, 21 Jan 2025 18:54:44 +0100
Subject: [PATCH 15/21] update

---
 R/utility_functions.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/utility_functions.R b/R/utility_functions.R
index 27f9a811..3064c929 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1088,6 +1088,7 @@ compute_p_k_given_t <- function(t,
   }
   # Normalize probabilities
   p_k_given_t <- p_k_given_t / sum(p_k_given_t)
+
   return(p_k_given_t)
 }
 
@@ -1095,6 +1096,7 @@ compute_p_k_given_t <- function(t,
 # A function that computes the posterior probabilities of the
 # number of components K given the cardinality of the partition t
 # and the allocations of the nodes based on Dahl's method
+
 summary_SBM <- function(cluster_allocations,
                         dirichlet_alpha,
                         lambda) {
@@ -1130,7 +1132,6 @@ summary_SBM <- function(cluster_allocations,
   # Compute the allocations of the nodes based on Dahl's method
   allocations <- getDahl(cluster_allocations)
 
-  # Return the results
   return(list(components = components,
               allocations = allocations))
 }

From 2d4a8ac83710464639748df21e5850a8312f3d48 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Tue, 21 Jan 2025 20:05:52 +0100
Subject: [PATCH 16/21] sloving note: "Undefined global functions or variables:
 dpois"

---
 R/utility_functions.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/utility_functions.R b/R/utility_functions.R
index 3064c929..14875b93 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1,6 +1,7 @@
 #' @importFrom Rcpp evalCpp
 #' @importFrom Rdpack reprompt
 #' @importFrom methods hasArg
+#' @importFrom stats dpois
 
 check_model = function(x,
                        variable_type,

From ce8871d71ae19fc96f4326ef4f80a4342ea97be3 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Tue, 21 Jan 2025 20:15:46 +0100
Subject: [PATCH 17/21] solving notes and warning messages for the checks

---
 NAMESPACE             | 1 +
 R/bgm.R               | 2 ++
 R/utility_functions.R | 2 +-
 man/bgm.Rd            | 3 +++
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 077d91fe..400b5666 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,5 +26,6 @@ export(mrfSampler)
 importFrom(Rcpp,evalCpp)
 importFrom(Rdpack,reprompt)
 importFrom(methods,hasArg)
+importFrom(stats,dpois)
 importFrom(utils,packageVersion)
 useDynLib(bgms, .registration=TRUE)
diff --git a/R/bgm.R b/R/bgm.R
index 82426849..4af346a9 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -133,6 +133,8 @@
 #' \code{beta_bernoulli_beta = 1}.
 #' @param dirichlet_alpha The shape of the Dirichlet prior on the node-to-block
 #' allocation probabilities for the Stochastic Block model.
+#' @param lambda The rate parameter of the zero-truncated Poisson prior on the
+#' number of cluster for the Stochastic Block model.
 #' @param na.action How do you want the function to handle missing data? If
 #' \code{na.action = "listwise"}, listwise deletion is used. If
 #' \code{na.action = "impute"}, missing data are imputed iteratively during the
diff --git a/R/utility_functions.R b/R/utility_functions.R
index 14875b93..12b08278 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1,7 +1,7 @@
 #' @importFrom Rcpp evalCpp
 #' @importFrom Rdpack reprompt
 #' @importFrom methods hasArg
-#' @importFrom stats dpois
+#' importFrom("stats", "dpois")
 
 check_model = function(x,
                        variable_type,
diff --git a/man/bgm.Rd b/man/bgm.Rd
index 635b7e1c..9f306879 100644
--- a/man/bgm.Rd
+++ b/man/bgm.Rd
@@ -128,6 +128,9 @@ positive numbers. Defaults to \code{beta_bernoulli_alpha = 1} and
 \item{dirichlet_alpha}{The shape of the Dirichlet prior on the node-to-block
 allocation probabilities for the Stochastic Block model.}
 
+\item{lambda}{The rate parameter of the zero-truncated Poisson prior on the
+number of cluster for the Stochastic Block model.}
+
 \item{na.action}{How do you want the function to handle missing data? If
 \code{na.action = "listwise"}, listwise deletion is used. If
 \code{na.action = "impute"}, missing data are imputed iteratively during the

From 1f09f8de6b71ce0da76c9e4d8390478ad9cb79c3 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Wed, 22 Jan 2025 13:04:20 +0100
Subject: [PATCH 18/21] export the summary_SBM function

---
 R/utility_functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utility_functions.R b/R/utility_functions.R
index 12b08278..8e5490f0 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1097,7 +1097,7 @@ compute_p_k_given_t <- function(t,
 # A function that computes the posterior probabilities of the
 # number of components K given the cardinality of the partition t
 # and the allocations of the nodes based on Dahl's method
-
+#' @export
 summary_SBM <- function(cluster_allocations,
                         dirichlet_alpha,
                         lambda) {

From 75cca47a1076d29452c4b6b04c2ee04a09c89d0b Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Wed, 22 Jan 2025 13:33:06 +0100
Subject: [PATCH 19/21] export summary_SBM function (needed when save = TRUE,
 as well as for the easybgm implementation)

---
 NAMESPACE             |  1 +
 R/utility_functions.R | 24 +++++++++++++++++++-----
 man/summary_SBM.Rd    | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 5 deletions(-)
 create mode 100644 man/summary_SBM.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 400b5666..83f1c915 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,7 @@ export(extract_pairwise_interactions)
 export(extract_pairwise_thresholds)
 export(extract_posterior_inclusion_probabilities)
 export(mrfSampler)
+export(summary_SBM)
 importFrom(Rcpp,evalCpp)
 importFrom(Rdpack,reprompt)
 importFrom(methods,hasArg)
diff --git a/R/utility_functions.R b/R/utility_functions.R
index 8e5490f0..beaa94f8 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1,7 +1,7 @@
 #' @importFrom Rcpp evalCpp
 #' @importFrom Rdpack reprompt
 #' @importFrom methods hasArg
-#' importFrom("stats", "dpois")
+#' @importFrom stats dpois
 
 check_model = function(x,
                        variable_type,
@@ -1093,10 +1093,24 @@ compute_p_k_given_t <- function(t,
   return(p_k_given_t)
 }
 
-
-# A function that computes the posterior probabilities of the
-# number of components K given the cardinality of the partition t
-# and the allocations of the nodes based on Dahl's method
+#' Function for summarizing the block allocation vectors
+#'
+#' This function summarizes the sampled allocation vectors from each iteration
+#' of the Gibbs sampler from the output of  \code{bgm}
+#' when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}.
+#'
+#' @param cluster_allocations a matrix of \code{iter} rows and \code{no_variables}
+#' columns containing the sampled cluster allocation vectors from each iteration.
+#' @param dirichlet_alpha the concentration parameter of the Dirichlet prior
+#' @param lambda the Poisson rate parameter
+#' @return Returns a list of two elements: \code{components} and \code{allocations},
+#' containing the posterior probabilties for the number of components (clusters)
+#' and the estimated cluster allocation of the nodes using Dahl's method.
+#' @examples
+#' x <- summary_SBM(bgm_object$allocations,
+#'                  bgm_object$arguments$dirichlet_alpha,
+#'                  bgm_object$arguments$dirichlet_alpha)
+#'
 #' @export
 summary_SBM <- function(cluster_allocations,
                         dirichlet_alpha,
diff --git a/man/summary_SBM.Rd b/man/summary_SBM.Rd
new file mode 100644
index 00000000..967910f0
--- /dev/null
+++ b/man/summary_SBM.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utility_functions.R
+\name{summary_SBM}
+\alias{summary_SBM}
+\title{Function for summarizing the block allocation vectors}
+\usage{
+summary_SBM(cluster_allocations, dirichlet_alpha, lambda)
+}
+\arguments{
+\item{cluster_allocations}{a matrix of \code{iter} rows and \code{no_variables}
+columns containing the sampled cluster allocation vectors from each iteration.}
+
+\item{dirichlet_alpha}{the concentration parameter of the Dirichlet prior}
+
+\item{lambda}{the Poisson rate parameter}
+}
+\value{
+Returns a list of two elements: \code{components} and \code{allocations},
+containing the posterior probabilties for the number of components (clusters)
+and the estimated cluster allocation of the nodes using Dahl's method.
+}
+\description{
+This function summarizes the sampled allocation vectors from each iteration
+of the Gibbs sampler from the output of  \code{bgm}
+when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}.
+}
+\examples{
+x <- summary_SBM(bgm_object$allocations,
+                 bgm_object$arguments$dirichlet_alpha,
+                 bgm_object$arguments$dirichlet_alpha)
+
+}

From 8564e286b1b11a4edd428e7016076bf23de74fd8 Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Wed, 22 Jan 2025 13:45:43 +0100
Subject: [PATCH 20/21] Udate in the documentation

---
 R/utility_functions.R | 9 ++++++++-
 man/summary_SBM.Rd    | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/R/utility_functions.R b/R/utility_functions.R
index beaa94f8..bf38f41f 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1107,7 +1107,14 @@ compute_p_k_given_t <- function(t,
 #' containing the posterior probabilties for the number of components (clusters)
 #' and the estimated cluster allocation of the nodes using Dahl's method.
 #' @examples
-#' x <- summary_SBM(bgm_object$allocations,
+#'
+#' # fit a model with the SBM prior
+#' bgm_object <- bgm(Wenchuan[, c(1:5)],
+#'                   edge_prior = "Stochastic-Block",
+#'                   save = TRUE,
+#'                   iter = 1e3)
+#'
+#' s <- summary_SBM(bgm_object$allocations,
 #'                  bgm_object$arguments$dirichlet_alpha,
 #'                  bgm_object$arguments$dirichlet_alpha)
 #'
diff --git a/man/summary_SBM.Rd b/man/summary_SBM.Rd
index 967910f0..2e74a130 100644
--- a/man/summary_SBM.Rd
+++ b/man/summary_SBM.Rd
@@ -25,7 +25,14 @@ of the Gibbs sampler from the output of  \code{bgm}
 when \code{edge_prior = "Stochastic-Block"} and \code{save = TRUE}.
 }
 \examples{
-x <- summary_SBM(bgm_object$allocations,
+
+# fit a model with the SBM prior
+bgm_object <- bgm(Wenchuan[, c(1:5)],
+                  edge_prior = "Stochastic-Block",
+                  save = TRUE,
+                  iter = 1e3)
+
+s <- summary_SBM(bgm_object$allocations,
                  bgm_object$arguments$dirichlet_alpha,
                  bgm_object$arguments$dirichlet_alpha)
 

From c7c2568aaf844b1a14770cb57c1bf1bb7319c6ff Mon Sep 17 00:00:00 2001
From: Nikola Sekulovski <nikolasekulovski7@gmail.com>
Date: Wed, 22 Jan 2025 13:58:22 +0100
Subject: [PATCH 21/21] final update in the documentation

---
 R/utility_functions.R | 2 +-
 man/summary_SBM.Rd    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/utility_functions.R b/R/utility_functions.R
index bf38f41f..11047669 100644
--- a/R/utility_functions.R
+++ b/R/utility_functions.R
@@ -1116,7 +1116,7 @@ compute_p_k_given_t <- function(t,
 #'
 #' s <- summary_SBM(bgm_object$allocations,
 #'                  bgm_object$arguments$dirichlet_alpha,
-#'                  bgm_object$arguments$dirichlet_alpha)
+#'                  bgm_object$arguments$lambda)
 #'
 #' @export
 summary_SBM <- function(cluster_allocations,
diff --git a/man/summary_SBM.Rd b/man/summary_SBM.Rd
index 2e74a130..fde02886 100644
--- a/man/summary_SBM.Rd
+++ b/man/summary_SBM.Rd
@@ -34,6 +34,6 @@ bgm_object <- bgm(Wenchuan[, c(1:5)],
 
 s <- summary_SBM(bgm_object$allocations,
                  bgm_object$arguments$dirichlet_alpha,
-                 bgm_object$arguments$dirichlet_alpha)
+                 bgm_object$arguments$lambda)
 
 }