marcelortizv · marcelortizv · Jan 23, 2026 · Jan 18, 2026 · Jan 18, 2026 · Jan 23, 2026
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,4 @@ test_print_cohorts_direct.R
 test_rcs_all_methods.R
 test_inffunc.R
 /.quarto/
+..Rcheck/00check.log
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: triplediff
 Title: Triple-Difference Estimators
-Version: 0.2.0
+Version: 0.2.1
 Authors@R: c(person("Marcelo", "Ortiz-Villavicencio", email = "[email protected]", role = c("aut", "cre")),
               person("Pedro H. C.", "Sant'Anna", email = "[email protected]", role = c("aut"))
             )

diff --git a/NEWS.md b/NEWS.md
@@ -14,3 +14,10 @@
 
   * Replaced `parglm` with `fastglm` to avoid issues related to parglm's scheduled archival on 2026-01-29.
   * Added support for unbalanced panel data and repeated cross-sectional data by properly implementing the `allow_unbalanced_panel` parameter across all functions.
+
+# triplediff 0.2.1
+  * Add asymmetric propensity score trimming for control units with pscore >= 0.995. 
+  * Add partition-specific collinearity detection with two-stage checking.
+  * Add comprehensive test suite including Monte Carlo coverage test when trimming. 
+
+
diff --git a/R/compute_nuisances.R b/R/compute_nuisances.R
diff --git a/R/preprocess.R b/R/preprocess.R
@@ -3,6 +3,60 @@
 #' @import BMisc
 #' @export
 NULL
+#--------------------------------------------------
+# Helper function to check partition-specific collinearity
+# Returns a list with:
+#   - collinear_vars: named list mapping dropped variables to the partition(s) where they're collinear
+#   - all_collinear: vector of all variables that should be dropped globally
+check_partition_collinearity <- function(data, subgroup_col, cov_cols, tol = 1e-6) {
+  # Get unique subgroups (should be 1, 2, 3, 4)
+  subgroups <- sort(unique(data[[subgroup_col]]))
+
+  # We check comparisons: 4 vs 3, 4 vs 2, 4 vs 1
+  # These are the partitions used in the DDD estimation
+  comparison_groups <- subgroups[subgroups < 4]
+
+  # Track which variables are collinear in which partition
+  partition_collinear <- list()
+
+  for (comp_group in comparison_groups) {
+    # Subset data to subgroup 4 and the comparison group
+    subset_data <- data[data[[subgroup_col]] %in% c(4, comp_group), ]
+
+    # Get covariate matrix for this subset
+    cov_subset <- as.matrix(subset_data[, cov_cols, with = FALSE])
+
+    # Skip if no observations (shouldn't happen, but be safe)
+    if (nrow(cov_subset) == 0) next
+
+    # Use QR decomposition to detect collinearity
+    qr_subset <- qr(cov_subset, tol = tol)
+    rank_subset <- qr_subset$rank
+
+    # Get indices of linearly independent columns
+    non_collinear_indices <- qr_subset$pivot[seq_len(rank_subset)]
+
+    # Find collinear variables in this subset
+    collinear_in_subset <- setdiff(cov_cols, cov_cols[non_collinear_indices])
+
+    if (length(collinear_in_subset) > 0) {
+      partition_name <- paste0("subgroup 4 vs ", comp_group)
+      for (var in collinear_in_subset) {
+        if (is.null(partition_collinear[[var]])) {
+          partition_collinear[[var]] <- partition_name
+        } else {
+          partition_collinear[[var]] <- c(partition_collinear[[var]], partition_name)
+        }
+      }
+    }
+  }
+
+  return(list(
+    collinear_vars = partition_collinear,
+    all_collinear = names(partition_collinear)
+  ))
+}
+
 #--------------------------------------------------
 # Function to pre-process the data to use on ddd estimator
 
@@ -253,13 +307,48 @@ run_nopreprocess_2periods <- function(yname,
   rank_m <- qr_m$rank
   # Get the indices of the non-collinear columns
   non_collinear_indices <- qr_m$pivot[seq_len(rank_m)]
+  # Check if any covariates were dropped due to global collinearity (following DRDID approach)
+  dropped_covariates_global <- setdiff(colnames(cov_m), colnames(cov_m)[non_collinear_indices])
+  if (length(dropped_covariates_global) > 0) {
+    warning("The following covariates were dropped due to global collinearity: ", paste(dropped_covariates_global, collapse = ", "))
+  }
   # Drop the collinear columns from the data.table
   cleaned_data <- cleaned_data[, c(seq(1,idx_static_vars,1), non_collinear_indices + idx_static_vars), with = FALSE]
 
   # drop the intercept
   #cleaned_data[, (idx_static_vars+1) := NULL]
   cleaned_data[, "(Intercept)" := NULL]
 
+  # Check for partition-specific collinearity (after global check)
+  # Get remaining covariate column names (excluding static vars)
+  cov_cols_remaining <- setdiff(names(cleaned_data), c("id", "y", "post", "treat", "period", "partition", "weights", "cluster", "subgroup"))
+  if (length(cov_cols_remaining) > 0) {
+    partition_check <- check_partition_collinearity(cleaned_data, "subgroup", cov_cols_remaining)
+
+    if (length(partition_check$all_collinear) > 0) {
+      # Build informative warning message
+      partition_warnings <- sapply(names(partition_check$collinear_vars), function(var) {
+        partitions <- paste(partition_check$collinear_vars[[var]], collapse = ", ")
+        paste0("  - ", var, " (collinear in: ", partitions, ")")
+      })
+      warning("The following covariates were dropped due to partition-specific collinearity:\n",
+              paste(partition_warnings, collapse = "\n"))
+
+      # Drop these covariates globally
+      cols_to_keep <- setdiff(names(cleaned_data), partition_check$all_collinear)
+      cleaned_data <- cleaned_data[, ..cols_to_keep]
+    }
+  }
+
+  # Update xformula to reflect dropped covariates (following DRDID approach)
+  # Get final covariate column names after all collinearity checks
+  final_cov_cols <- setdiff(names(cleaned_data), c("id", "y", "post", "treat", "period", "partition", "weights", "cluster", "subgroup"))
+  if (length(final_cov_cols) > 0) {
+    xformla <- stats::as.formula(paste("~", paste(final_cov_cols, collapse = " + ")))
+  } else {
+    xformla <- ~1
+  }
+
   out <- list(preprocessed_data = cleaned_data,
               xformula = xformla,
               est_method = est_method,
@@ -603,12 +692,47 @@ run_preprocess_2Periods <- function(yname,
   rank_m <- qr_m$rank
   # Get the indices of the non-collinear columns
   non_collinear_indices <- qr_m$pivot[seq_len(rank_m)]
+  # Check if any covariates were dropped due to global collinearity (following DRDID approach)
+  dropped_covariates_global <- setdiff(colnames(cov_m), colnames(cov_m)[non_collinear_indices])
+  if (length(dropped_covariates_global) > 0) {
+    warning("The following covariates were dropped due to global collinearity: ", paste(dropped_covariates_global, collapse = ", "))
+  }
   # Drop the collinear columns from the data.table
   cleaned_data <- cleaned_data[, c(seq(1,idx_static_vars,1), non_collinear_indices + idx_static_vars), with = FALSE]
 
   # drop the intercept
   cleaned_data[, "(Intercept)" := NULL]
 
+  # Check for partition-specific collinearity (after global check)
+  # Get remaining covariate column names (excluding static vars)
+  cov_cols_remaining <- setdiff(names(cleaned_data), c("id", "y", "post", "treat", "period", "partition", "weights", "cluster", "subgroup"))
+  if (length(cov_cols_remaining) > 0) {
+    partition_check <- check_partition_collinearity(cleaned_data, "subgroup", cov_cols_remaining)
+
+    if (length(partition_check$all_collinear) > 0) {
+      # Build informative warning message
+      partition_warnings <- sapply(names(partition_check$collinear_vars), function(var) {
+        partitions <- paste(partition_check$collinear_vars[[var]], collapse = ", ")
+        paste0("  - ", var, " (collinear in: ", partitions, ")")
+      })
+      warning("The following covariates were dropped due to partition-specific collinearity:\n",
+              paste(partition_warnings, collapse = "\n"))
+
+      # Drop these covariates globally
+      cols_to_keep <- setdiff(names(cleaned_data), partition_check$all_collinear)
+      cleaned_data <- cleaned_data[, ..cols_to_keep]
+    }
+  }
+
+  # Update xformula to reflect dropped covariates (following DRDID approach)
+  # Get final covariate column names after all collinearity checks
+  final_cov_cols <- setdiff(names(cleaned_data), c("id", "y", "post", "treat", "period", "partition", "weights", "cluster", "subgroup"))
+  if (length(final_cov_cols) > 0) {
+    xformla <- stats::as.formula(paste("~", paste(final_cov_cols, collapse = " + ")))
+  } else {
+    xformla <- ~1
+  }
+
   out <- list(preprocessed_data = cleaned_data,
               xformula = xformla,
               tname = tname,
@@ -1037,9 +1161,45 @@ run_preprocess_multPeriods <- function(yname,
                                                             data = dta,
                                                             na.action = na.pass))
   }
+
+  # Remove collinear variables (following DRDID approach)
+  # Determine the number of static columns (before covariates)
+  # Static cols: id, y, first_treat, period, partition, weights, [cluster]
+  idx_static_vars <- ifelse(is.null(cluster), 6, 7)
+
+  # Convert the covariate columns (including intercept) to a matrix
+  cov_m <- as.matrix(cleaned_data[, -c(1:idx_static_vars), with = FALSE])
+
+  # Only check for collinearity if there are covariates beyond the intercept
+  if (ncol(cov_m) > 1) {
+    # Use the qr() function to detect collinear columns
+    qr_m <- qr(cov_m, tol = 1e-6)
+    # Get the rank of the matrix
+    rank_m <- qr_m$rank
+    # Get the indices of the non-collinear columns
+    non_collinear_indices <- qr_m$pivot[seq_len(rank_m)]
+    # Check if any covariates were dropped due to global collinearity
+    dropped_covariates_global <- setdiff(colnames(cov_m), colnames(cov_m)[non_collinear_indices])
+    if (length(dropped_covariates_global) > 0) {
+      warning("The following covariates were dropped due to global collinearity: ", paste(dropped_covariates_global, collapse = ", "))
+    }
+    # Drop the collinear columns from the data.table
+    cleaned_data <- cleaned_data[, c(seq(1, idx_static_vars, 1), non_collinear_indices + idx_static_vars), with = FALSE]
+  }
+
   # drop the intercept
   cleaned_data[, "(Intercept)" := NULL]
 
+  # Update xformula to reflect dropped covariates (following DRDID approach)
+  # Get final covariate column names after collinearity check
+  # Static cols in multi-period: id, y, first_treat, period, partition, weights, [cluster]
+  final_cov_cols <- setdiff(names(cleaned_data), c("id", "y", "first_treat", "period", "partition", "weights", "cluster"))
+  if (length(final_cov_cols) > 0) {
+    xformla <- stats::as.formula(paste("~", paste(final_cov_cols, collapse = " + ")))
+  } else {
+    xformla <- ~1
+  }
+
   # order dataset wrt idname and tname
   setorder(cleaned_data, "id", "period")
 

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 # Triple Differences Estimators <img src="man/figures/triplediff-logo.png" align="right" alt="" width="155" />
 
 ![](https://img.shields.io/badge/release%20lifecycle-alpha-orange.svg)
-[![](https://img.shields.io/badge/devel%20version-0.2.0-blue.svg)](https://github.com/marcelortizv/triplediff)
+[![](https://img.shields.io/badge/devel%20version-0.2.1-blue.svg)](https://github.com/marcelortizv/triplediff)
 [![](https://img.shields.io/badge/doi-10.48550/arXiv.2505.09942-yellow.svg)](https://doi.org/10.48550/arXiv.2505.09942)
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->

diff --git a/man/figures/README-unnamed-chunk-10-1.png b/man/figures/README-unnamed-chunk-10-1.png