diff --git a/cpp/include/qdk/chemistry/data/hamiltonian.hpp b/cpp/include/qdk/chemistry/data/hamiltonian.hpp
index a2a9258ab..30d8866ce 100644
--- a/cpp/include/qdk/chemistry/data/hamiltonian.hpp
+++ b/cpp/include/qdk/chemistry/data/hamiltonian.hpp
@@ -262,10 +262,11 @@ class HamiltonianContainer {
    * @param filename Path to FCIDUMP file to create/overwrite
    * @param nalpha Number of alpha electrons
    * @param nbeta Number of beta electrons
-   * @throws std::runtime_error if I/O error occurs
+   * @throws std::runtime_error if I/O error occurs or Hamiltonian is
+   * unrestricted
    */
   virtual void to_fcidump_file(const std::string& filename, size_t nalpha,
-                               size_t nbeta) const = 0;
+                               size_t nbeta) const;
 
   /**
    * @brief Check if the Hamiltonian data is complete and consistent
diff --git a/cpp/include/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.hpp b/cpp/include/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.hpp
index 7ac3fc45c..6c8b4fdd1 100644
--- a/cpp/include/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.hpp
+++ b/cpp/include/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.hpp
@@ -181,16 +181,6 @@ class CanonicalFourCenterHamiltonianContainer : public HamiltonianContainer {
   static std::unique_ptr<CanonicalFourCenterHamiltonianContainer> from_json(
       const nlohmann::json& j);
 
-  /**
-   * @brief Save Hamiltonian to an FCIDUMP file
-   * @param filename Path to FCIDUMP file to create/overwrite
-   * @param nalpha Number of alpha electrons
-   * @param nbeta Number of beta electrons
-   * @throws std::runtime_error if I/O error occurs
-   */
-  void to_fcidump_file(const std::string& filename, size_t nalpha,
-                       size_t nbeta) const override;
-
   /**
    * @brief Check if the Hamiltonian data is complete and consistent
    * @return True if all required data is set and dimensions are consistent
@@ -215,16 +205,6 @@ class CanonicalFourCenterHamiltonianContainer : public HamiltonianContainer {
                     std::shared_ptr<Eigen::VectorXd>>
   make_restricted_two_body_integrals(const Eigen::VectorXd& integrals);
 
-  /**
-   * @brief Save FCIDUMP file without filename validation (internal use)
-   * @param filename Path to FCIDUMP file to create/overwrite
-   * @param nalpha Number of alpha electrons
-   * @param nbeta Number of beta electrons
-   * @throws std::runtime_error if I/O error occurs
-   */
-  void _to_fcidump_file(const std::string& filename, size_t nalpha,
-                        size_t nbeta) const;
-
   /// Serialization version
   static constexpr const char* SERIALIZATION_VERSION = "0.1.0";
 };
diff --git a/cpp/include/qdk/chemistry/data/hamiltonian_containers/cholesky.hpp b/cpp/include/qdk/chemistry/data/hamiltonian_containers/cholesky.hpp
index f2181297d..3e148b90c 100644
--- a/cpp/include/qdk/chemistry/data/hamiltonian_containers/cholesky.hpp
+++ b/cpp/include/qdk/chemistry/data/hamiltonian_containers/cholesky.hpp
@@ -8,89 +8,79 @@
 #include <Eigen/Dense>
 #include <memory>
 #include <nlohmann/json.hpp>
+#include <optional>
 #include <qdk/chemistry/data/hamiltonian.hpp>
 #include <qdk/chemistry/data/orbitals.hpp>
 #include <stdexcept>
 #include <string>
 #include <vector>
 
-#include "canonical_four_center.hpp"
-
 namespace qdk::chemistry::data {
 
 /**
  * @class CholeskyHamiltonianContainer
- * @brief Contains a molecular Hamiltonian and AO Cholesky vectors
- *
- * This class stores molecular Hamiltonian data for quantum chemistry
- * calculations, specifically designed for active space methods. It contains:
- * - One-electron integrals (kinetic + nuclear attraction) in MO representation
- * - Two-electron integrals (electron-electron repulsion) in MO representation
- * - Cholesky vectors in AO representation for reconstructing integrals
- * - Molecular orbital information for the active space
- * - Inactive Fock matrix
- * - Core energy contributions from inactive orbitals and nuclear repulsion
+ * @brief Contains a molecular Hamiltonian expressed using three-center
+ * integrals.
  *
- * This class implies that all inactive orbitals are fully occupied for the
- * purpose of computing the core energy and inactive Fock matrix.
+ * In addition to those contained in HamiltonianContainer, this subclass also
+ * contains:
+ * - Three-center two-electron integrals (electron-electron repulsion) in MO
+ * representation.
+ * - Optionally, AO Cholesky vectors for potential reuse in further
+ * transformations.
  *
- * The Hamiltonian is immutable after construction, meaning all data must be
- * provided during construction and cannot be modified afterwards. The
- * Hamiltonian supports both restricted and unrestricted calculations and
- * integrates with the broader quantum chemistry framework for active space
- * methods.
  */
-// TODO: Change Cholesky container to only store MO cholesky vectors and
-// optionally AO cholesky vecs. Make two body ints evaluated on request
-class CholeskyHamiltonianContainer
-    : public CanonicalFourCenterHamiltonianContainer {
+class CholeskyHamiltonianContainer : public HamiltonianContainer {
  public:
   /**
-   * @brief Constructor for restricted Cholesky Hamiltonian
-   *
-   * Creates a restricted Hamiltonian where alpha and beta spin components
-   * share the same integrals.
+   * @brief Constructor for active space Hamiltonian with three center integrals
+   * (ij|Q)
    *
    * @param one_body_integrals One-electron integrals in MO basis [norb x norb]
-   * @param two_body_integrals Two-electron integrals in MO basis [norb x norb x
-   * norb x norb]
+   * @param three_center_integrals Three-center two-electron integrals in MO
+   * basis [(norb x norb) x naux]
    * @param orbitals Shared pointer to molecular orbital data for the system
    * @param core_energy Core energy (nuclear repulsion + inactive orbital
    * energy)
    * @param inactive_fock_matrix Inactive Fock matrix for the selected active
    * space
-   * @param L_ao AO Cholesky vectors for reconstructing integrals
+   * @param ao_cholesky_vectors Optional AO Cholesky vectors for potential reuse
+   * (default: std::nullopt)
    * @param type Type of Hamiltonian (Hermitian by default)
    *
    * @throws std::invalid_argument if orbitals pointer is nullptr
    */
   CholeskyHamiltonianContainer(
       const Eigen::MatrixXd& one_body_integrals,
-      const Eigen::VectorXd& two_body_integrals,
+      const Eigen::MatrixXd& three_center_integrals,
       std::shared_ptr<Orbitals> orbitals, double core_energy,
-      const Eigen::MatrixXd& inactive_fock_matrix, const Eigen::MatrixXd& L_ao,
+      const Eigen::MatrixXd& inactive_fock_matrix,
+      std::optional<Eigen::MatrixXd> ao_cholesky_vectors = std::nullopt,
       HamiltonianType type = HamiltonianType::Hermitian);
 
   /**
-   * @brief Constructor for unrestricted Cholesky Hamiltonian
-   *
-   * Creates an unrestricted Hamiltonian with separate alpha and beta spin
-   * components.
+   * @brief Constructor for active space Hamiltonian with three center integrals
+   * using separate spin components
    *
    * @param one_body_integrals_alpha One-electron integrals for alpha spin in MO
    * basis
    * @param one_body_integrals_beta One-electron integrals for beta spin in MO
    * basis
-   * @param two_body_integrals_aaaa Two-electron alpha-alpha-alpha-alpha
-   * integrals
-   * @param two_body_integrals_aabb Two-electron alpha-beta-alpha-beta integrals
-   * @param two_body_integrals_bbbb Two-electron beta-beta-beta-beta integrals
+   * @param three_center_integrals_aa Three-center two-electron alpha-alpha
+   * integrals (ij|Q), where the orbital pair index ij are stored in row-major
+   * order
+   * @param three_center_integrals_bb Three-center two-electron beta-beta
+   * integrals (ij|Q), where the orbital pair index ij are stored in row-major
+   * order
    * @param orbitals Shared pointer to molecular orbital data for the system
    * @param core_energy Core energy (nuclear repulsion + inactive orbital
    * energy)
-   * @param inactive_fock_matrix_alpha Inactive Fock matrix for alpha spin
-   * @param inactive_fock_matrix_beta Inactive Fock matrix for beta spin
-   * @param L_ao AO Cholesky vectors for reconstructing integrals
+   * @param inactive_fock_matrix_alpha Inactive Fock matrix for alpha spin in
+   * the selected active space
+   * @param inactive_fock_matrix_beta Inactive Fock matrix for beta spin in the
+   * selected active space
+   * @param ao_cholesky_vectors Optional AO Cholesky vectors for potential reuse
+   * (default: std::nullopt)
    * @param type Type of Hamiltonian (Hermitian by default)
    *
    * @throws std::invalid_argument if orbitals pointer is nullptr
@@ -98,19 +88,18 @@ class CholeskyHamiltonianContainer
   CholeskyHamiltonianContainer(
       const Eigen::MatrixXd& one_body_integrals_alpha,
       const Eigen::MatrixXd& one_body_integrals_beta,
-      const Eigen::VectorXd& two_body_integrals_aaaa,
-      const Eigen::VectorXd& two_body_integrals_aabb,
-      const Eigen::VectorXd& two_body_integrals_bbbb,
+      const Eigen::MatrixXd& three_center_integrals_aa,
+      const Eigen::MatrixXd& three_center_integrals_bb,
       std::shared_ptr<Orbitals> orbitals, double core_energy,
       const Eigen::MatrixXd& inactive_fock_matrix_alpha,
       const Eigen::MatrixXd& inactive_fock_matrix_beta,
-      const Eigen::MatrixXd& L_ao,
+      std::optional<Eigen::MatrixXd> ao_cholesky_vectors = std::nullopt,
       HamiltonianType type = HamiltonianType::Hermitian);
 
   /**
    * @brief Destructor
    */
-  ~CholeskyHamiltonianContainer() = default;
+  ~CholeskyHamiltonianContainer() override = default;
 
   /**
    * @brief Create a deep copy of this container
@@ -120,10 +109,67 @@ class CholeskyHamiltonianContainer
 
   /**
    * @brief Get the type of the underlying container
-   * @return String "cholesky" identifying this container type
+   * @return String identifying the container type (e.g.,
+   * "cholesky")
    */
   std::string get_container_type() const override final;
 
+  /**
+   * @brief Get four-center two-electron integrals in MO basis for all spin
+   * channels
+   * @return Tuple of references to (aaaa, aabb, bbbb) four-center two-electron
+   * integrals vectors
+   * @throws std::runtime_error if integrals are not set
+   */
+  std::tuple<const Eigen::VectorXd&, const Eigen::VectorXd&,
+             const Eigen::VectorXd&>
+  get_two_body_integrals() const override;
+
+  /**
+   * @brief Get three-center integrals in MO basis for all spin channels
+   * @return Pair of references to (aa, bb) three-center two-electron
+   * integrals matrices, where each matrix is of dimension [(norb x norb) x
+   * naux], with the (norb x norb) part stored in row-major order
+   * @throws std::runtime_error if integrals are not set
+   */
+  std::pair<const Eigen::MatrixXd&, const Eigen::MatrixXd&>
+  get_three_center_integrals() const;
+
+  /**
+   * @brief Get the optional AO Cholesky vectors
+   * @return Const reference to the optional AO Cholesky vectors matrix
+   * [nao^2 x nchol]. Contains std::nullopt if AO Cholesky vectors were not
+   * provided at construction.
+   */
+  const std::optional<Eigen::MatrixXd>& get_ao_cholesky_vectors() const;
+
+  /**
+   * @brief Get specific four-center two-electron integral element
+   * @param i First orbital index
+   * @param j Second orbital index
+   * @param k Third orbital index
+   * @param l Fourth orbital index
+   * @param channel Spin channel to query (aaaa, aabb, or bbbb), defaults to
+   * aaaa
+   * @return Four-center two-electron integral (ij|kl)
+   * @throws std::out_of_range if indices are invalid
+   */
+  double get_two_body_element(
+      unsigned i, unsigned j, unsigned k, unsigned l,
+      SpinChannel channel = SpinChannel::aaaa) const override;
+
+  /**
+   * @brief Check if two-body integrals are available
+   * @return True if two-body integrals are set
+   */
+  bool has_two_body_integrals() const override;
+
+  /**
+   * @brief Check if the Hamiltonian is restricted
+   * @return True if alpha and beta integrals are identical
+   */
+  bool is_restricted() const override final;
+
   /**
    * @brief Convert Hamiltonian to JSON
    * @return JSON object containing Hamiltonian data
@@ -140,8 +186,9 @@ class CholeskyHamiltonianContainer
   /**
    * @brief Deserialize Hamiltonian data from HDF5 group
    * @param group HDF5 group to read data from
-   * @return Unique pointer to CholeskyHamiltonianContainer loaded from group
-   * @throws std::runtime_error if I/O error occurs or data is malformed
+   * @return Unique pointer to const CholeskyHamiltonianContainer loaded from
+   * group
+   * @throws std::runtime_error if I/O error occurs
    */
   static std::unique_ptr<CholeskyHamiltonianContainer> from_hdf5(
       H5::Group& group);
@@ -149,15 +196,55 @@ class CholeskyHamiltonianContainer
   /**
    * @brief Load Hamiltonian from JSON
    * @param j JSON object containing Hamiltonian data
-   * @return Unique pointer to CholeskyHamiltonianContainer loaded from JSON
-   * @throws std::runtime_error if JSON is malformed or missing required fields
+   * @return Unique pointer to const CholeskyHamiltonianContainer loaded from
+   * JSON
+   * @throws std::runtime_error if JSON is malformed
    */
   static std::unique_ptr<CholeskyHamiltonianContainer> from_json(
       const nlohmann::json& j);
 
+  /**
+   * @brief Check if the Hamiltonian data is complete and consistent
+   * @return True if all required data is set and dimensions are consistent
+   */
+  bool is_valid() const override final;
+
  private:
-  // AO Cholesky vectors for integral reconstruction
-  const std::shared_ptr<Eigen::MatrixXd> _ao_cholesky_vectors;
+  /**
+   * Three-center integrals in MO basis, where each channel is stored as a
+   * matrix of dimension [(norb x norb) x naux] where norb x norb is stored in
+   * row major order
+   */
+  const std::pair<std::shared_ptr<Eigen::MatrixXd>,
+                  std::shared_ptr<Eigen::MatrixXd>>
+      _three_center_integrals;
+
+  /**
+   * Lazily computed four-center integrals cache (built on first access).
+   * Stores (aaaa, aabb, bbbb) as flattened arrays [norb^4].
+   * Uses shared_ptr so restricted case can share the same data for all
+   * channels.
+   */
+  mutable std::tuple<std::shared_ptr<Eigen::VectorXd>,
+                     std::shared_ptr<Eigen::VectorXd>,
+                     std::shared_ptr<Eigen::VectorXd>>
+      _cached_four_center_integrals;
+
+  /** Build four-center integrals from three-center integrals and cache them */
+  void _build_four_center_cache() const;
+
+  /** Validation helper for integral dimensions */
+  void validate_integral_dimensions() const override final;
+
+  /** Optional AO Cholesky vectors for potential reuse */
+  const std::optional<Eigen::MatrixXd> _ao_cholesky_vectors;
+
+  static std::pair<std::shared_ptr<Eigen::MatrixXd>,
+                   std::shared_ptr<Eigen::MatrixXd>>
+  make_restricted_three_center_integrals(const Eigen::MatrixXd& integrals);
+
+  /** Serialization version */
+  static constexpr const char* SERIALIZATION_VERSION = "0.1.0";
 };
 
 }  // namespace qdk::chemistry::data
diff --git a/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.cpp b/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.cpp
index a28ae3953..2da93502e 100644
--- a/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.cpp
+++ b/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.cpp
@@ -89,7 +89,15 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
 
   // Cholesky decomposition (rank is bounded by num_aos*(num_aos+1)/2)
   const size_t max_rank = num_aos * (num_aos + 1) / 2;
-  QDK_LOGGER().debug("Maximum possible Cholesky rank: {}", max_rank);
+  QDK_LOGGER().info("Maximum possible Cholesky rank: {}", max_rank);
+
+  // Precompute upper bound for shell-pair block columns: n_cols = n1 * n2.
+  // This enables reusing ERI column buffers across iterations.
+  size_t max_shell_size = 0;
+  for (size_t s = 0; s < num_shells; ++s) {
+    max_shell_size = std::max(max_shell_size, obs[s].size());
+  }
+  const size_t max_n_cols = max_shell_size * max_shell_size;
 
   // Fix threshold to (= sqrt(max_rank) * eps), to prevent numerical noise.
   const double min_threshold = std::sqrt(static_cast<double>(max_rank)) *
@@ -147,6 +155,9 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
   std::vector<std::vector<size_t>> active_shell_pairs_local(nthreads);
 #endif
 
+  // Reusable ERI column buffer — single shared buffer, no per-thread copies.
+  std::vector<double> eri_col_max(num_aos2 * max_n_cols, 0.0);
+
   // Compute diagonal elements for all shell pairs
   QDK_LOGGER().debug("Computing diagonal elements");
 #ifdef _OPENMP
@@ -207,11 +218,11 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
   }
 #endif
 
-  QDK_LOGGER().debug("Cholesky Rank | Max Diagonal Element");
+  QDK_LOGGER().info("Cholesky Rank | Max Diagonal Element");
   double D_max = 0.0;
   while (current_col < max_rank) {
     if (active_shell_pairs.empty()) {
-      QDK_LOGGER().debug("{:>13} | all shell pairs converged", current_col);
+      QDK_LOGGER().info("{:>13} | all shell pairs converged", current_col);
       break;
     }
 
@@ -231,7 +242,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
         s2_max = s2;
       }
     }
-    QDK_LOGGER().debug("{:>13} | {}", current_col, D_max);
+    QDK_LOGGER().info("{:>13} | {}", current_col, D_max);
 
     // check convergence
     if (D_max < threshold) {
@@ -242,19 +253,19 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
     const size_t n1_max = obs[s1_max].size();
     const size_t n2_max = obs[s2_max].size();
     const size_t n_cols = n1_max * n2_max;
-    std::vector<double> eri_col(num_aos2 * n_cols, 0.0);
+    const size_t eri_col_used = num_aos2 * n_cols;
+    std::fill_n(eri_col_max.begin(), eri_col_used, 0.0);
+
+    // Accumulate ERI integrals directly into the shared buffer.
+    // Thread assignment by (s34++) % nthreads maps each (s3, s4) shell pair
+    // to exactly one thread, so each (bf3, bf4) index block is written by a
+    // single thread. No synchronization or per-thread buffers are needed.
 #ifdef _OPENMP
-    std::vector<std::vector<double>> eri_col_threads(nthreads);
-    for (int t = 0; t < nthreads; ++t) {
-      eri_col_threads[t].resize(num_aos2 * n_cols, 0.0);
-    }
 #pragma omp parallel
 #endif
     {
 #ifdef _OPENMP
       const auto thread_id = omp_get_thread_num();
-      // get thread-local eri_col
-      auto& eri_col_local = eri_col_threads[thread_id];
 #else
       const auto thread_id = 0;
 #endif
@@ -282,7 +293,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
           const auto& res = buf[0];
           if (res == nullptr) continue;
 
-          // fill in eri_col
+          // fill in eri_col — direct write, no thread-local copy
           for (size_t i = 0, ijkl = 0; i < n1_max; ++i) {
             const size_t ind_i = i * n2_max;
             for (size_t j = 0; j < n2_max; ++j) {
@@ -291,11 +302,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
                 const size_t ind_ijk = ind_ij + (bf3_st + k) * num_aos;
                 for (size_t l = 0; l < n4; ++l, ++ijkl) {
                   const size_t ind_ijkl = ind_ijk + (bf4_st + l);
-#ifdef _OPENMP
-                  eri_col_local[ind_ijkl] += res[ijkl];
-#else
-                  eri_col[ind_ijkl] += res[ijkl];
-#endif
+                  eri_col_max[ind_ijkl] += res[ijkl];
                 }  // l
               }  // k
             }  // j
@@ -303,14 +310,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
         }  // s4
       }  // s3
     }  // omp parallel
-
-    // merge thread-local eri_col
-#ifdef _OPENMP
-    for (int t = 0; t < nthreads; ++t) {
-      blas::axpy(eri_col.size(), 1.0, eri_col_threads[t].data(), 1,
-                 eri_col.data(), 1);
-    }
-#endif
+    // No merge step — threads wrote directly to shared buffer.
 
     // precompute lookup
     const size_t bf1_max_st = shell2bf[s1_max];
@@ -339,7 +339,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
       // L_rows^T is current_col x n_cols
       blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::Trans,
                  num_aos2, n_cols, current_col, -1.0, L_data.data(), num_aos2,
-                 L_rows.data(), n_cols, 1.0, eri_col.data(), num_aos2);
+                 L_rows.data(), n_cols, 1.0, eri_col_max.data(), num_aos2);
     }
 
     // form new cholesky vector for each index in shell pair avoid redundant
@@ -359,7 +359,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
         double Q_max = std::sqrt(1.0 / D_val);
         std::vector<double> L_col_vec(num_aos2);
         // Copy column from eri_col
-        blas::copy(num_aos2, eri_col.data() + local_index * num_aos2, 1,
+        blas::copy(num_aos2, eri_col_max.data() + local_index * num_aos2, 1,
                    L_col_vec.data(), 1);
         // Scale by Q_max
         blas::scal(num_aos2, Q_max, L_col_vec.data(), 1);
@@ -376,7 +376,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
         for (size_t col = local_index + 1; col < n1_max * n2_max; ++col) {
           const size_t global_col_idx = shell_pairs_to_lookup[col];
           const double scale_factor = -L_col[global_col_idx];
-          double* eri_col_ptr = eri_col.data() + col * num_aos2;
+          double* eri_col_ptr = eri_col_max.data() + col * num_aos2;
           blas::axpy(num_aos2, scale_factor, L_col, 1, eri_col_ptr, 1);
         }
 
@@ -414,7 +414,7 @@ std::tuple<std::vector<double>, size_t> compute_cholesky_vectors(
     }
   }
 
-  QDK_LOGGER().debug("Cholesky rank: {}", current_col);
+  QDK_LOGGER().info("Cholesky rank: {}", current_col);
 
   if (current_col == max_rank) {
     QDK_LOGGER().warn(
@@ -730,16 +730,6 @@ std::shared_ptr<data::Hamiltonian> CholeskyHamiltonianConstructor::_run_impl(
   // Compute integrals (same size for alpha and beta)
   const size_t nactive = nactive_alpha;
 
-  // Declare MOERI vectors
-  Eigen::VectorXd moeri_aaaa;
-  Eigen::VectorXd moeri_aabb;
-  Eigen::VectorXd moeri_bbbb;
-
-  const size_t moeri_size = nactive * nactive * nactive * nactive;
-
-  // Store Cholesky vectors for later use in inactive space computation
-  // Eigen::MatrixXd L_ao;
-
   // Use Cholesky Decomposition
   double cholesky_tol = _settings->get<double>("cholesky_tolerance");
   double eri_tol = _settings->get<double>("eri_threshold");
@@ -752,36 +742,18 @@ std::shared_ptr<data::Hamiltonian> CholeskyHamiltonianConstructor::_run_impl(
   Eigen::Map<const Eigen::MatrixXd> L_ao(
       output.data(), num_atomic_orbitals * num_atomic_orbitals,
       num_cholesky_vectors);
-  // L_ao = L_ao_map;
-  /**
-   * @brief Reconstruct MO ERIs from Cholesky vectors
-   * @param L_left Left Cholesky vectors in MO basis
-   * @param L_right Right Cholesky vectors in MO basis
-   * @param output Output vector to store reconstructed ERIs
-   */
-  auto reconstruct_eris = [moeri_size](const Eigen::MatrixXd& L_left,
-                                       const Eigen::MatrixXd& L_right,
-                                       Eigen::VectorXd& output) {
-    output.resize(moeri_size);
-    size_t n = L_left.rows();  // n_mo * n_mo
-    Eigen::Map<Eigen::MatrixXd> output_matrix(output.data(), n, n);
-    output_matrix.noalias() = L_left * L_right.transpose();
-  };
+
+  Eigen::MatrixXd L_mo;
+  Eigen::MatrixXd L_mo_alpha;
+  Eigen::MatrixXd L_mo_beta;
 
   if (is_restricted_calc) {
     // Transform to MO
-    Eigen::MatrixXd L_mo = detail::transform_cholesky_to_mo(L_ao, Ca_active);
-    reconstruct_eris(L_mo, L_mo, moeri_aaaa);
+    L_mo = detail::transform_cholesky_to_mo(L_ao, Ca_active);
   } else {
     // Transform to MO (Alpha and Beta)
-    Eigen::MatrixXd L_mo_alpha =
-        detail::transform_cholesky_to_mo(L_ao, Ca_active);
-    Eigen::MatrixXd L_mo_beta =
-        detail::transform_cholesky_to_mo(L_ao, Cb_active);
-
-    reconstruct_eris(L_mo_alpha, L_mo_alpha, moeri_aaaa);  // (aaaa)
-    reconstruct_eris(L_mo_beta, L_mo_beta, moeri_bbbb);    // (bbbb)
-    reconstruct_eris(L_mo_beta, L_mo_alpha, moeri_aabb);   // (aabb)
+    L_mo_alpha = detail::transform_cholesky_to_mo(L_ao, Ca_active);
+    L_mo_beta = detail::transform_cholesky_to_mo(L_ao, Cb_active);
   }
 
   // Get inactive space indices for both alpha and beta
@@ -804,17 +776,17 @@ std::shared_ptr<data::Hamiltonian> CholeskyHamiltonianConstructor::_run_impl(
       Eigen::MatrixXd H_active(nactive, nactive);
       H_active = Ca_active.transpose() * H_full * Ca_active;
       Eigen::MatrixXd dummy_fock = Eigen::MatrixXd::Zero(0, 0);
-      if (!_settings->get<bool>("store_cholesky_vectors")) {
+      if (_settings->get<bool>("store_ao_cholesky_vectors")) {
         return std::make_shared<data::Hamiltonian>(
-            std::make_unique<data::CanonicalFourCenterHamiltonianContainer>(
-                H_active, moeri_aaaa, orbitals,
-                structure->calculate_nuclear_repulsion_energy(), dummy_fock));
+            std::make_unique<data::CholeskyHamiltonianContainer>(
+                H_active, L_mo, orbitals,
+                structure->calculate_nuclear_repulsion_energy(), dummy_fock,
+                L_ao));
       }
       return std::make_shared<data::Hamiltonian>(
           std::make_unique<data::CholeskyHamiltonianContainer>(
-              H_active, moeri_aaaa, orbitals,
-              structure->calculate_nuclear_repulsion_energy(), dummy_fock,
-              L_ao));
+              H_active, L_mo, orbitals,
+              structure->calculate_nuclear_repulsion_energy(), dummy_fock));
     } else {
       // Use unrestricted constructor
       Eigen::MatrixXd H_active_alpha(nactive, nactive);
@@ -823,19 +795,18 @@ std::shared_ptr<data::Hamiltonian> CholeskyHamiltonianConstructor::_run_impl(
       H_active_beta = Cb_active.transpose() * H_full * Cb_active;
       Eigen::MatrixXd dummy_fock_alpha = Eigen::MatrixXd::Zero(0, 0);
       Eigen::MatrixXd dummy_fock_beta = Eigen::MatrixXd::Zero(0, 0);
-      if (!_settings->get<bool>("store_cholesky_vectors")) {
+      if (_settings->get<bool>("store_ao_cholesky_vectors")) {
         return std::make_shared<data::Hamiltonian>(
-            std::make_unique<data::CanonicalFourCenterHamiltonianContainer>(
-                H_active_alpha, H_active_beta, moeri_aaaa, moeri_aabb,
-                moeri_bbbb, orbitals,
+            std::make_unique<data::CholeskyHamiltonianContainer>(
+                H_active_alpha, H_active_beta, L_mo_alpha, L_mo_beta, orbitals,
                 structure->calculate_nuclear_repulsion_energy(),
-                dummy_fock_alpha, dummy_fock_beta));
+                dummy_fock_alpha, dummy_fock_beta, L_ao));
       }
       return std::make_shared<data::Hamiltonian>(
           std::make_unique<data::CholeskyHamiltonianContainer>(
-              H_active_alpha, H_active_beta, moeri_aaaa, moeri_aabb, moeri_bbbb,
-              orbitals, structure->calculate_nuclear_repulsion_energy(),
-              dummy_fock_alpha, dummy_fock_beta, L_ao));
+              H_active_alpha, H_active_beta, L_mo_alpha, L_mo_beta, orbitals,
+              structure->calculate_nuclear_repulsion_energy(), dummy_fock_alpha,
+              dummy_fock_beta));
     }
   }
 
@@ -893,18 +864,18 @@ std::shared_ptr<data::Hamiltonian> CholeskyHamiltonianConstructor::_run_impl(
       }
     }
 
-    if (!_settings->get<bool>("store_cholesky_vectors")) {
+    if (_settings->get<bool>("store_ao_cholesky_vectors")) {
       return std::make_shared<data::Hamiltonian>(
-          std::make_unique<data::CanonicalFourCenterHamiltonianContainer>(
-              H_active, moeri_aaaa, orbitals,
+          std::make_unique<data::CholeskyHamiltonianContainer>(
+              H_active, L_mo, orbitals,
               E_inactive + structure->calculate_nuclear_repulsion_energy(),
-              F_inactive));
+              F_inactive, L_ao));
     }
     return std::make_shared<data::Hamiltonian>(
         std::make_unique<data::CholeskyHamiltonianContainer>(
-            H_active, moeri_aaaa, orbitals,
+            H_active, L_mo, orbitals,
             E_inactive + structure->calculate_nuclear_repulsion_energy(),
-            F_inactive, L_ao));
+            F_inactive));
   } else {
     // Unrestricted case
 
@@ -1005,20 +976,18 @@ std::shared_ptr<data::Hamiltonian> CholeskyHamiltonianConstructor::_run_impl(
       }
     }
 
-    if (!_settings->get<bool>("store_cholesky_vectors")) {
+    if (_settings->get<bool>("store_ao_cholesky_vectors")) {
       return std::make_shared<data::Hamiltonian>(
-          std::make_unique<data::CanonicalFourCenterHamiltonianContainer>(
-              H_active_alpha, H_active_beta, moeri_aaaa, moeri_aabb, moeri_bbbb,
-              orbitals,
+          std::make_unique<data::CholeskyHamiltonianContainer>(
+              H_active_alpha, H_active_beta, L_mo_alpha, L_mo_beta, orbitals,
               E_inactive + structure->calculate_nuclear_repulsion_energy(),
-              F_inactive_alpha, F_inactive_beta));
+              F_inactive_alpha, F_inactive_beta, L_ao));
     }
     return std::make_shared<data::Hamiltonian>(
         std::make_unique<data::CholeskyHamiltonianContainer>(
-            H_active_alpha, H_active_beta, moeri_aaaa, moeri_aabb, moeri_bbbb,
-            orbitals,
+            H_active_alpha, H_active_beta, L_mo_alpha, L_mo_beta, orbitals,
             E_inactive + structure->calculate_nuclear_repulsion_energy(),
-            F_inactive_alpha, F_inactive_beta, L_ao));
+            F_inactive_alpha, F_inactive_beta));
   }
 }
 }  // namespace qdk::chemistry::algorithms::microsoft
diff --git a/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.hpp b/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.hpp
index 0475a7fae..3a981ee2a 100644
--- a/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.hpp
+++ b/cpp/src/qdk/chemistry/algorithms/microsoft/cholesky_hamiltonian.hpp
@@ -59,7 +59,7 @@ class CholeskyHamiltonianSettings : public qdk::chemistry::data::Settings {
                 "ERI screening threshold for skipping negligible shell "
                 "quartets during Cholesky decomposition",
                 qdk::chemistry::data::BoundConstraint<double>{0.0, 1.0});
-    set_default("store_cholesky_vectors", false);
+    set_default("store_ao_cholesky_vectors", false);
   }
   ~CholeskyHamiltonianSettings() override = default;
 };
diff --git a/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.cpp b/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.cpp
index 232ba6b01..49253c496 100644
--- a/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.cpp
+++ b/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.cpp
@@ -7,9 +7,9 @@
 #include <macis/asci/determinant_search.hpp>
 #include <macis/asci/grow.hpp>
 #include <macis/asci/refine.hpp>
-#include <macis/hamiltonian_generator/sorted_double_loop.hpp>
-#include <macis/hamiltonian_generator/residue_arrays.hpp>
 #include <macis/hamiltonian_generator/dynamic_bit_masking.hpp>
+#include <macis/hamiltonian_generator/residue_arrays.hpp>
+#include <macis/hamiltonian_generator/sorted_double_loop.hpp>
 #include <macis/mcscf/cas.hpp>
 #include <macis/util/mpi.hpp>
 #include <qdk/chemistry/data/structure.hpp>
@@ -66,7 +66,7 @@ struct asci_helper {
     macis::MCSCFSettings mcscf_settings = get_mcscf_settings_(settings_);
     macis::ASCISettings asci_settings = get_asci_settings_(settings_);
 
-    QDK_LOGGER().debug(
+    QDK_LOGGER().info(
         "MACIS ASCI helper prepared: norb={}, nalpha={}, nbeta={}, "
         "ntdets_max={}, "
         "ntdets_min={}, max_refine_iter={}, grow_factor={}, rv_prune_tol={}",
@@ -74,16 +74,15 @@ struct asci_helper {
         asci_settings.ntdets_min, asci_settings.max_refine_iter,
         asci_settings.grow_factor, asci_settings.rv_prune_tol);
 
-    macis::matrix_span<double> T_span(
-        const_cast<double*>(T_a.data()),
-        num_molecular_orbitals, num_molecular_orbitals);
+    macis::matrix_span<double> T_span(const_cast<double*>(T_a.data()),
+                                      num_molecular_orbitals,
+                                      num_molecular_orbitals);
     macis::rank4_span<double> V_span(
-        const_cast<double*>(V_aaaa.data()),
-        num_molecular_orbitals, num_molecular_orbitals,
-        num_molecular_orbitals, num_molecular_orbitals);
+        const_cast<double*>(V_aaaa.data()), num_molecular_orbitals,
+        num_molecular_orbitals, num_molecular_orbitals, num_molecular_orbitals);
 
     // Select Hamiltonian generator based on h_build_algo
-    QDK_LOGGER().debug("Constructing MACIS Hamiltonian generator.");
+    QDK_LOGGER().info("Constructing MACIS Hamiltonian generator.");
     std::unique_ptr<macis::HamiltonianGenerator<wfn_type>> ham_gen_ptr;
     const auto& algo = asci_settings.h_build_algo;
     if (algo == "residue_arrays") {
@@ -109,7 +108,7 @@ struct asci_helper {
       ham_gen_ptr = std::make_unique<sdl_gen_t>(T_span, V_span);
     }
     auto& ham_gen = *ham_gen_ptr;
-    QDK_LOGGER().debug("MACIS Hamiltonian generator constructed.");
+    QDK_LOGGER().info("MACIS Hamiltonian generator constructed.");
 
     std::vector<double> C_casci;
     std::vector<wfn_type> dets;
@@ -121,7 +120,7 @@ struct asci_helper {
         qdk::chemistry::utils::microsoft::binomial_coefficient(
             num_molecular_orbitals, nbeta);
 
-    QDK_LOGGER().debug("MACIS ASCI FCI dimension estimate: {}", fci_dimension);
+    QDK_LOGGER().info("MACIS ASCI FCI dimension estimate: {}", fci_dimension);
 
     if (asci_settings.ntdets_max > fci_dimension) {
       QDK_LOGGER().info(
@@ -143,23 +142,23 @@ struct asci_helper {
       C_casci = {1.0};
 
       // Growth phase
-      QDK_LOGGER().debug("Starting MACIS ASCI growth phase.");
+      QDK_LOGGER().info("Starting MACIS ASCI growth phase.");
       std::tie(E_casci, dets, C_casci) = macis::asci_grow<N, int64_t>(
           asci_settings, mcscf_settings, E_casci, std::move(dets),
           std::move(C_casci), ham_gen,
           num_molecular_orbitals MACIS_MPI_CODE(, MPI_COMM_WORLD));
-      QDK_LOGGER().debug(
+      QDK_LOGGER().info(
           "Completed MACIS ASCI growth phase with {} determinants.",
           dets.size());
 
       // Refinement phase
       if (asci_settings.max_refine_iter) {
-        QDK_LOGGER().debug("Starting MACIS ASCI refinement phase.");
+        QDK_LOGGER().info("Starting MACIS ASCI refinement phase.");
         std::tie(E_casci, dets, C_casci) = macis::asci_refine<N, int64_t>(
             asci_settings, mcscf_settings, E_casci, std::move(dets),
             std::move(C_casci), ham_gen,
             num_molecular_orbitals MACIS_MPI_CODE(, MPI_COMM_WORLD));
-        QDK_LOGGER().debug(
+        QDK_LOGGER().info(
             "Completed MACIS ASCI refinement phase with {} determinants.",
             dets.size());
       }
diff --git a/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.hpp b/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.hpp
index 08bff27d9..8d344c1b0 100644
--- a/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.hpp
+++ b/cpp/src/qdk/chemistry/algorithms/microsoft/macis_asci.hpp
@@ -120,12 +120,10 @@ class MacisAsciSettings : public MultiConfigurationSettings {
                             std::numeric_limits<double>::epsilon(), 1.0});
 
     // Warm-start and tolerance controls
-    set_default<bool>("warm_start_davidson",
-                      macis_defaults.warm_start_davidson,
+    set_default<bool>("warm_start_davidson", macis_defaults.warm_start_davidson,
                       "Warm-start Davidson from previous eigenvector");
     set_default<double>(
-        "min_warm_start_overlap",
-        macis_defaults.min_warm_start_overlap,
+        "min_warm_start_overlap", macis_defaults.min_warm_start_overlap,
         "Minimum projected vector norm for warm-start Davidson. "
         "Measures how much of the previous eigenvector's weight is "
         "retained in the new determinant set (0=never, 1=always reject)",
@@ -137,21 +135,21 @@ class MacisAsciSettings : public MultiConfigurationSettings {
         "triggered only when overlap drops below this threshold",
         data::BoundConstraint<double>{0.0, 1.0});
     set_default<double>(
-        "grow_ci_residual_tolerance",
-        macis_defaults.grow_ci_residual_tolerance,
+        "grow_ci_residual_tolerance", macis_defaults.grow_ci_residual_tolerance,
         "CI residual tolerance during grow phase (0 = use refine tolerance)");
-    // I still don't understand this option very well, can you document is better and explain it to me in the chat?
+    // I still don't understand this option very well, can you document is
+    // better and explain it to me in the chat?
     set_default<double>(
         "taper_grow_factor", macis_defaults.taper_grow_factor,
         "Growth factor for final expansion near ntdets_max (0 = disabled)");
 
     // Hamiltonian build algorithm selection
-    // This is a bad variable name and should use the "constraint" type to enumerate the allowed options
-    set_default<std::string>(
-        "h_build_algo", macis_defaults.h_build_algo,
-        "Algorithm for diagonal Hamiltonian construction: "
-        "'' or 'sorted_double_loop' (default), "
-        "'residue_arrays', 'dynamic_bit_masking'");
+    // This is a bad variable name and should use the "constraint" type to
+    // enumerate the allowed options
+    set_default<std::string>("h_build_algo", macis_defaults.h_build_algo,
+                             "Algorithm for diagonal Hamiltonian construction: "
+                             "'' or 'sorted_double_loop' (default), "
+                             "'residue_arrays', 'dynamic_bit_masking'");
   }
 
   /**
diff --git a/cpp/src/qdk/chemistry/algorithms/microsoft/macis_base.hpp b/cpp/src/qdk/chemistry/algorithms/microsoft/macis_base.hpp
index 87f9463e2..72913f3a0 100644
--- a/cpp/src/qdk/chemistry/algorithms/microsoft/macis_base.hpp
+++ b/cpp/src/qdk/chemistry/algorithms/microsoft/macis_base.hpp
@@ -123,9 +123,10 @@ inline data::Wavefunction build_wavefunction(
 
   // General Wavefunction construction
   // All the new logging in this file should be debug, not info
-  QDK_LOGGER().info("Building wavefunction: converting {} determinants to "
-                    "configurations...",
-                    dets.size());
+  QDK_LOGGER().info(
+      "Building wavefunction: converting {} determinants to "
+      "configurations...",
+      dets.size());
   Eigen::VectorXd C_vector(coeffs.size());
   std::copy(coeffs.begin(), coeffs.end(), C_vector.data());
   std::vector<data::Configuration> dets_configs;
@@ -173,9 +174,8 @@ inline data::Wavefunction build_wavefunction(
             eval_two_rdm ? active_two_aabb.data() : nullptr, nmo, nmo, nmo,
             nmo));
     auto rdm_en = std::chrono::high_resolution_clock::now();
-    QDK_LOGGER().info(
-        "RDM computation complete ({:.1f}s).",
-        std::chrono::duration<double>(rdm_en - rdm_st).count());
+    QDK_LOGGER().info("RDM computation complete ({:.1f}s).",
+                      std::chrono::duration<double>(rdm_en - rdm_st).count());
 
     if (eval_one_rdm) {
       one_aa = Eigen::Map<Eigen::MatrixXd>(active_one_aa.data(), nmo, nmo);
@@ -198,9 +198,10 @@ inline data::Wavefunction build_wavefunction(
   // information
   data::OrbitalEntropies computed_entropies;
   if (eval_s1 || eval_s2 || eval_mutual_info) {
-    QDK_LOGGER().info("Computing orbital entropies ({} determinants, {} "
-                      "orbitals)...",
-                      dets.size(), nmo);
+    QDK_LOGGER().info(
+        "Computing orbital entropies ({} determinants, {} "
+        "orbitals)...",
+        dets.size(), nmo);
     auto ent_st = std::chrono::high_resolution_clock::now();
     std::vector<double> s1_vec(nmo, 0.0);
     std::vector<double> s2_data(eval_s2 ? nmo * nmo : 0, 0.0);
@@ -214,9 +215,8 @@ inline data::Wavefunction build_wavefunction(
         macis::matrix_span<double>(eval_mutual_info ? mi_data.data() : nullptr,
                                    nmo, nmo));
     auto ent_en = std::chrono::high_resolution_clock::now();
-    QDK_LOGGER().info(
-        "Entropy computation complete ({:.1f}s).",
-        std::chrono::duration<double>(ent_en - ent_st).count());
+    QDK_LOGGER().info("Entropy computation complete ({:.1f}s).",
+                      std::chrono::duration<double>(ent_en - ent_st).count());
 
     if (eval_s1) {
       computed_entropies.single_orbital =
diff --git a/cpp/src/qdk/chemistry/algorithms/microsoft/scf.cpp b/cpp/src/qdk/chemistry/algorithms/microsoft/scf.cpp
index 89cd150e2..438098060 100644
--- a/cpp/src/qdk/chemistry/algorithms/microsoft/scf.cpp
+++ b/cpp/src/qdk/chemistry/algorithms/microsoft/scf.cpp
@@ -308,8 +308,10 @@ std::pair<double, std::shared_ptr<data::Wavefunction>> ScfSolver::_run_impl(
 
     const bool is_unrestricted =
         (ms_scf_config->scf_orbital_type == SCFOrbitalType::Unrestricted);
-    if (is_unrestricted) {
-      if (initial_guess->is_restricted())
+    const bool is_restricted_open_shell = (ms_scf_config->scf_orbital_type ==
+                                           SCFOrbitalType::RestrictedOpenShell);
+    if (is_unrestricted || is_restricted_open_shell) {
+      if (is_unrestricted && initial_guess->is_restricted())
         QDK_LOGGER().warn(
             "Unrestricted calculation requested but restricted "
             "initial guess provided.");
diff --git a/cpp/src/qdk/chemistry/data/basis_set.cpp b/cpp/src/qdk/chemistry/data/basis_set.cpp
index 9ceb43bc2..44b72fd13 100644
--- a/cpp/src/qdk/chemistry/data/basis_set.cpp
+++ b/cpp/src/qdk/chemistry/data/basis_set.cpp
@@ -62,6 +62,13 @@ std::filesystem::path unpack_basis_set_archive(std::string& basis_set_name) {
     std::filesystem::create_directories(temp_dir);
   }
 
+  // Check if basis set JSON already exists (avoids tar race in parallel runs)
+  std::filesystem::path expected_json =
+      temp_dir / "basis" / (normalized_name + ".json");
+  if (std::filesystem::exists(expected_json)) {
+    return temp_dir;
+  }
+
   // unpack the tar.gz file
   auto cmd = "tar xzf \"" + file_path.generic_string() + "\" --directory \"" +
              temp_dir.generic_string() + "\"";
diff --git a/cpp/src/qdk/chemistry/data/hamiltonian.cpp b/cpp/src/qdk/chemistry/data/hamiltonian.cpp
index 54a186701..48ffb1267 100644
--- a/cpp/src/qdk/chemistry/data/hamiltonian.cpp
+++ b/cpp/src/qdk/chemistry/data/hamiltonian.cpp
@@ -4,7 +4,9 @@
 
 #include <algorithm>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
+#include <limits>
 #include <macis/util/fcidump.hpp>
 #include <qdk/chemistry/data/hamiltonian.hpp>
 #include <qdk/chemistry/data/hamiltonian_containers/canonical_four_center.hpp>
@@ -295,6 +297,115 @@ HamiltonianContainer::make_restricted_inactive_fock_matrix(
       shared_matrix);  // Both alpha and beta point to same data
 }
 
+void HamiltonianContainer::to_fcidump_file(const std::string& filename,
+                                           size_t nalpha, size_t nbeta) const {
+  QDK_LOG_TRACE_ENTERING();
+
+  if (is_unrestricted()) {
+    throw std::runtime_error(
+        "FCIDUMP format is not supported for unrestricted Hamiltonians.");
+  }
+
+  std::ofstream file(filename);
+  if (!file.is_open()) {
+    throw std::runtime_error("Cannot open file for writing: " + filename);
+  }
+
+  size_t num_molecular_orbitals;
+  if (has_orbitals()) {
+    if (_orbitals->has_active_space()) {
+      auto active_indices = _orbitals->get_active_space_indices();
+      size_t n_active_alpha = active_indices.first.size();
+      size_t n_active_beta = active_indices.second.size();
+
+      if (n_active_alpha != n_active_beta) {
+        throw std::invalid_argument(
+            "For restricted Hamiltonian, alpha and beta active spaces must "
+            "have same size");
+      }
+      num_molecular_orbitals = n_active_alpha;
+    } else {
+      num_molecular_orbitals = _orbitals->get_num_molecular_orbitals();
+    }
+  } else {
+    throw std::runtime_error("Orbitals are not set");
+  }
+
+  const size_t nelec = nalpha + nbeta;
+  const size_t num_molecular_orbitals2 =
+      num_molecular_orbitals * num_molecular_orbitals;
+  const size_t num_molecular_orbitals3 =
+      num_molecular_orbitals2 * num_molecular_orbitals;
+  const double print_thresh = std::numeric_limits<double>::epsilon();
+
+  // C1 symmetry
+  std::string orb_string;
+  for (size_t i = 0; i < num_molecular_orbitals - 1; ++i) {
+    orb_string += "1,";
+  }
+  orb_string += "1";
+
+  // Write header
+  file << "&FCI ";
+  file << "NORB=" << num_molecular_orbitals << ", ";
+  file << "NELEC=" << nelec << ", ";
+  file << "MS2=" << (nalpha - nbeta) << ",\n";
+  file << "ORBSYM=" << orb_string << ",\n";
+  file << "ISYM=1,\n";
+  file << "&END\n";
+
+  auto formatted_line = [&](size_t i, size_t j, size_t k, size_t l,
+                            double val) {
+    if (std::abs(val) < print_thresh) return;
+
+    file << std::setw(28) << std::scientific << std::setprecision(16)
+         << std::right << val << " ";
+    file << std::setw(4) << i << " ";
+    file << std::setw(4) << j << " ";
+    file << std::setw(4) << k << " ";
+    file << std::setw(4) << l;
+  };
+
+  // Get the two-electron integrals via the virtual accessor
+  auto [eri_aaaa, eri_aabb, eri_bbbb] = get_two_body_integrals();
+
+  auto write_eri = [&](size_t i, size_t j, size_t k, size_t l) {
+    auto eri =
+        eri_aaaa(i * num_molecular_orbitals3 + j * num_molecular_orbitals2 +
+                 k * num_molecular_orbitals + l);
+
+    formatted_line(i + 1, j + 1, k + 1, l + 1, eri);
+    file << "\n";
+  };
+
+  auto write_1body = [&](size_t i, size_t j) {
+    auto hel = (*_one_body_integrals.first)(i, j);
+
+    formatted_line(i + 1, j + 1, 0, 0, hel);
+    file << "\n";
+  };
+
+  // Write permutationally unique MO ERIs
+  for (size_t i = 0, ij = 0; i < num_molecular_orbitals; ++i)
+    for (size_t j = i; j < num_molecular_orbitals; ++j, ij++) {
+      for (size_t k = 0, kl = 0; k < num_molecular_orbitals; ++k)
+        for (size_t l = k; l < num_molecular_orbitals; ++l, kl++) {
+          if (ij <= kl) {
+            write_eri(i, j, k, l);
+          }
+        }
+    }
+
+  // Write permutationally unique MO 1-body integrals
+  for (size_t i = 0; i < num_molecular_orbitals; ++i)
+    for (size_t j = 0; j <= i; ++j) {
+      write_1body(i, j);
+    }
+
+  // Write core energy
+  formatted_line(0, 0, 0, 0, _core_energy);
+}
+
 std::string Hamiltonian::get_summary() const {
   QDK_LOG_TRACE_ENTERING();
   std::string summary = "Hamiltonian Summary:\n";
diff --git a/cpp/src/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.cpp b/cpp/src/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.cpp
index 38064b2ed..7c3798c7b 100644
--- a/cpp/src/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.cpp
+++ b/cpp/src/qdk/chemistry/data/hamiltonian_containers/canonical_four_center.cpp
@@ -226,12 +226,6 @@ CanonicalFourCenterHamiltonianContainer::make_restricted_two_body_integrals(
       shared_integrals);  // aaaa, aabb, bbbb all point to same data
 }
 
-void CanonicalFourCenterHamiltonianContainer::to_fcidump_file(
-    const std::string& filename, size_t nalpha, size_t nbeta) const {
-  QDK_LOG_TRACE_ENTERING();
-  _to_fcidump_file(filename, nalpha, nbeta);
-}
-
 nlohmann::json CanonicalFourCenterHamiltonianContainer::to_json() const {
   QDK_LOG_TRACE_ENTERING();
   nlohmann::json j;
@@ -728,113 +722,4 @@ CanonicalFourCenterHamiltonianContainer::from_hdf5(H5::Group& group) {
   }
 }
 
-void CanonicalFourCenterHamiltonianContainer::_to_fcidump_file(
-    const std::string& filename, size_t nalpha, size_t nbeta) const {
-  QDK_LOG_TRACE_ENTERING();
-  // Check if this is an unrestricted Hamiltonian and throw error
-  if (is_unrestricted()) {
-    throw std::runtime_error(
-        "FCIDUMP format is not supported for unrestricted Hamiltonians.");
-  }
-
-  std::ofstream file(filename);
-  if (!file.is_open()) {
-    throw std::runtime_error("Cannot open file for writing: " + filename);
-  }
-
-  size_t num_molecular_orbitals;
-  if (has_orbitals()) {
-    if (_orbitals->has_active_space()) {
-      auto active_indices = _orbitals->get_active_space_indices();
-      size_t n_active_alpha = active_indices.first.size();
-      size_t n_active_beta = active_indices.second.size();
-
-      // For restricted case, alpha and beta should be the same
-      if (n_active_alpha != n_active_beta) {
-        throw std::invalid_argument(
-            "For restricted Hamiltonian, alpha and beta active spaces must "
-            "have "
-            "same size");
-      }
-      num_molecular_orbitals =
-          n_active_alpha;  // Can use either alpha or beta since they're equal
-    } else {
-      num_molecular_orbitals = _orbitals->get_num_molecular_orbitals();
-    }
-  } else {
-    throw std::runtime_error("Orbitals are not set");
-  }
-
-  const size_t nelec = nalpha + nbeta;
-  const size_t num_molecular_orbitals2 =
-      num_molecular_orbitals * num_molecular_orbitals;
-  const size_t num_molecular_orbitals3 =
-      num_molecular_orbitals2 * num_molecular_orbitals;
-  const double print_thresh = std::numeric_limits<double>::epsilon();
-
-  // We don't use symmetry, so populate with C1 data
-  std::string orb_string;
-  for (auto i = 0ul; i < num_molecular_orbitals - 1; ++i) {
-    orb_string += "1,";
-  }
-  orb_string += "1";
-
-  // Write the header of the FCIDUMP file
-  file << "&FCI ";
-  file << "NORB=" << num_molecular_orbitals << ", ";
-  file << "NELEC=" << nelec << ", ";
-  file << "MS2=" << (nalpha - nbeta) << ",\n";
-  file << "ORBSYM=" << orb_string << ",\n";
-  file << "ISYM=1,\n";
-  file << "&END\n";
-
-  auto formatted_line = [&](size_t i, size_t j, size_t k, size_t l,
-                            double val) {
-    if (std::abs(val) < print_thresh) return;
-
-    file << std::setw(28) << std::scientific << std::setprecision(16)
-         << std::right << val << " ";
-    file << std::setw(4) << i << " ";
-    file << std::setw(4) << j << " ";
-    file << std::setw(4) << k << " ";
-    file << std::setw(4) << l;
-  };
-
-  auto write_eri = [&](size_t i, size_t j, size_t k, size_t l) {
-    auto eri = (*std::get<0>(_two_body_integrals))(
-        i * num_molecular_orbitals3 + j * num_molecular_orbitals2 +
-        k * num_molecular_orbitals + l);
-
-    formatted_line(i + 1, j + 1, k + 1, l + 1, eri);
-    file << "\n";
-  };
-
-  auto write_1body = [&](size_t i, size_t j) {
-    auto hel = (*_one_body_integrals.first)(i, j);
-
-    formatted_line(i + 1, j + 1, 0, 0, hel);
-    file << "\n";
-  };
-
-  // Write permutationally unique MO ERIs
-  for (size_t i = 0, ij = 0; i < num_molecular_orbitals; ++i)
-    for (size_t j = i; j < num_molecular_orbitals; ++j, ij++) {
-      for (size_t k = 0, kl = 0; k < num_molecular_orbitals; ++k)
-        for (size_t l = k; l < num_molecular_orbitals; ++l, kl++) {
-          if (ij <= kl) {
-            write_eri(i, j, k, l);
-          }
-        }  // kl loop
-    }  // ij loop
-
-  // Write permutationally unique MO 1-body integrals
-  for (size_t i = 0; i < num_molecular_orbitals; ++i)
-    for (size_t j = 0; j <= i; ++j) {
-      write_1body(i, j);
-    }
-
-  // Write core energy
-  formatted_line(0, 0, 0, 0, _core_energy);
-}
-
 }  // namespace qdk::chemistry::data
diff --git a/cpp/src/qdk/chemistry/data/hamiltonian_containers/cholesky.cpp b/cpp/src/qdk/chemistry/data/hamiltonian_containers/cholesky.cpp
index 210a91793..0ef9cae26 100644
--- a/cpp/src/qdk/chemistry/data/hamiltonian_containers/cholesky.cpp
+++ b/cpp/src/qdk/chemistry/data/hamiltonian_containers/cholesky.cpp
@@ -3,9 +3,14 @@
 // license information.
 
 #include <algorithm>
+#include <blas.hh>
+#include <cstddef>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
+#include <limits>
 #include <macis/util/fcidump.hpp>
+#include <memory>
 #include <qdk/chemistry/data/hamiltonian_containers/cholesky.hpp>
 #include <qdk/chemistry/data/orbitals.hpp>
 #include <qdk/chemistry/utils/logger.hpp>
@@ -20,34 +25,53 @@ namespace qdk::chemistry::data {
 
 CholeskyHamiltonianContainer::CholeskyHamiltonianContainer(
     const Eigen::MatrixXd& one_body_integrals,
-    const Eigen::VectorXd& two_body_integrals,
+    const Eigen::MatrixXd& three_center_integrals,
     std::shared_ptr<Orbitals> orbitals, double core_energy,
-    const Eigen::MatrixXd& inactive_fock_matrix, const Eigen::MatrixXd& L_ao,
-    HamiltonianType type)
-    : CanonicalFourCenterHamiltonianContainer(
-          one_body_integrals, two_body_integrals, orbitals, core_energy,
-          inactive_fock_matrix, type),
-      _ao_cholesky_vectors(std::make_shared<Eigen::MatrixXd>(L_ao)) {
+    const Eigen::MatrixXd& inactive_fock_matrix,
+    std::optional<Eigen::MatrixXd> ao_cholesky_vectors, HamiltonianType type)
+    : HamiltonianContainer(one_body_integrals, orbitals, core_energy,
+                           inactive_fock_matrix, type),
+      _three_center_integrals(
+          make_restricted_three_center_integrals(three_center_integrals)),
+      _ao_cholesky_vectors(std::move(ao_cholesky_vectors)) {
   QDK_LOG_TRACE_ENTERING();
+
+  validate_integral_dimensions();
+  validate_restrictedness_consistency();
+  validate_active_space_dimensions();
+
+  if (!is_valid()) {
+    throw std::invalid_argument(
+        "Tried to generate invalid Hamiltonian object.");
+  }
 }
 
 CholeskyHamiltonianContainer::CholeskyHamiltonianContainer(
     const Eigen::MatrixXd& one_body_integrals_alpha,
     const Eigen::MatrixXd& one_body_integrals_beta,
-    const Eigen::VectorXd& two_body_integrals_aaaa,
-    const Eigen::VectorXd& two_body_integrals_aabb,
-    const Eigen::VectorXd& two_body_integrals_bbbb,
+    const Eigen::MatrixXd& three_center_integrals_aa,
+    const Eigen::MatrixXd& three_center_integrals_bb,
     std::shared_ptr<Orbitals> orbitals, double core_energy,
     const Eigen::MatrixXd& inactive_fock_matrix_alpha,
     const Eigen::MatrixXd& inactive_fock_matrix_beta,
-    const Eigen::MatrixXd& L_ao, HamiltonianType type)
-    : CanonicalFourCenterHamiltonianContainer(
-          one_body_integrals_alpha, one_body_integrals_beta,
-          two_body_integrals_aaaa, two_body_integrals_aabb,
-          two_body_integrals_bbbb, orbitals, core_energy,
-          inactive_fock_matrix_alpha, inactive_fock_matrix_beta, type),
-      _ao_cholesky_vectors(std::make_shared<Eigen::MatrixXd>(L_ao)) {
+    std::optional<Eigen::MatrixXd> ao_cholesky_vectors, HamiltonianType type)
+    : HamiltonianContainer(one_body_integrals_alpha, one_body_integrals_beta,
+                           orbitals, core_energy, inactive_fock_matrix_alpha,
+                           inactive_fock_matrix_beta, type),
+      _three_center_integrals(
+          std::make_unique<Eigen::MatrixXd>(three_center_integrals_aa),
+          std::make_unique<Eigen::MatrixXd>(three_center_integrals_bb)),
+      _ao_cholesky_vectors(std::move(ao_cholesky_vectors)) {
   QDK_LOG_TRACE_ENTERING();
+
+  validate_integral_dimensions();
+  validate_restrictedness_consistency();
+  validate_active_space_dimensions();
+
+  if (!is_valid()) {
+    throw std::invalid_argument(
+        "Tried to generate invalid Hamiltonian object.");
+  }
 }
 
 std::unique_ptr<HamiltonianContainer> CholeskyHamiltonianContainer::clone()
@@ -55,16 +79,15 @@ std::unique_ptr<HamiltonianContainer> CholeskyHamiltonianContainer::clone()
   QDK_LOG_TRACE_ENTERING();
   if (is_restricted()) {
     return std::make_unique<CholeskyHamiltonianContainer>(
-        *_one_body_integrals.first, *std::get<0>(_two_body_integrals),
-        _orbitals, _core_energy, *_inactive_fock_matrix.first,
-        *_ao_cholesky_vectors, _type);
+        *_one_body_integrals.first, *_three_center_integrals.first, _orbitals,
+        _core_energy, *_inactive_fock_matrix.first, _ao_cholesky_vectors,
+        _type);
   }
   return std::make_unique<CholeskyHamiltonianContainer>(
       *_one_body_integrals.first, *_one_body_integrals.second,
-      *std::get<0>(_two_body_integrals), *std::get<1>(_two_body_integrals),
-      *std::get<2>(_two_body_integrals), _orbitals, _core_energy,
-      *_inactive_fock_matrix.first, *_inactive_fock_matrix.second,
-      *_ao_cholesky_vectors, _type);
+      *_three_center_integrals.first, *_three_center_integrals.second,
+      _orbitals, _core_energy, *_inactive_fock_matrix.first,
+      *_inactive_fock_matrix.second, _ao_cholesky_vectors, _type);
 }
 
 std::string CholeskyHamiltonianContainer::get_container_type() const {
@@ -72,32 +95,339 @@ std::string CholeskyHamiltonianContainer::get_container_type() const {
   return "cholesky";
 }
 
+std::tuple<const Eigen::VectorXd&, const Eigen::VectorXd&,
+           const Eigen::VectorXd&>
+CholeskyHamiltonianContainer::get_two_body_integrals() const {
+  QDK_LOG_TRACE_ENTERING();
+  if (!has_two_body_integrals()) {
+    throw std::runtime_error("Three-center integrals are not set");
+  }
+
+  // Lazily build and cache the four-center integrals on first access
+  if (!std::get<0>(_cached_four_center_integrals)) {
+    _build_four_center_cache();
+  }
+
+  return std::make_tuple(
+      std::cref(*std::get<0>(_cached_four_center_integrals)),
+      std::cref(*std::get<1>(_cached_four_center_integrals)),
+      std::cref(*std::get<2>(_cached_four_center_integrals)));
+}
+
+void CholeskyHamiltonianContainer::_build_four_center_cache() const {
+  QDK_LOG_TRACE_ENTERING();
+
+  size_t norb = _orbitals->get_active_space_indices().first.size();
+  size_t norb2 = norb * norb;
+  size_t norb4 = norb2 * norb2;
+
+  // Helper lambda to build 4-center from 3-center: (ij|kl) = sum_Q L_ij,Q *
+  // R_Q,kl, where L and R are column-major, and (ij|kl) is row-major
+  auto build_four_center =
+      [&](std::shared_ptr<const Eigen::MatrixXd> three_center_left,
+          std::shared_ptr<const Eigen::MatrixXd> three_center_right)
+      -> std::shared_ptr<Eigen::VectorXd> {
+    // Allocate output vector
+    auto four_center = std::make_shared<Eigen::VectorXd>(norb4);
+
+    size_t naux = three_center_left->cols();
+
+    // To make the resulting four-center integrals compatible with the row major
+    // storage, we calculate (kl|ij) = sum_Q R_kl,Q * L_Q,ij and store the
+    // result in col-major order, effectively (ij|kl) in row-major order.
+    blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::Trans,
+               norb2, norb2, naux, 1.0, three_center_right->data(), norb2,
+               three_center_left->data(), norb2, 0.0, four_center->data(),
+               norb2);
+    return four_center;
+  };
+
+  // Build four-center integrals from three-center
+  auto aaaa = build_four_center(_three_center_integrals.first,
+                                _three_center_integrals.first);
+
+  if (is_restricted()) {
+    _cached_four_center_integrals = std::make_tuple(aaaa, aaaa, aaaa);
+  } else {
+    auto aabb = build_four_center(_three_center_integrals.first,
+                                  _three_center_integrals.second);
+    auto bbbb = build_four_center(_three_center_integrals.second,
+                                  _three_center_integrals.second);
+    _cached_four_center_integrals =
+        std::make_tuple(std::move(aaaa), std::move(aabb), std::move(bbbb));
+  }
+}
+
+std::pair<const Eigen::MatrixXd&, const Eigen::MatrixXd&>
+CholeskyHamiltonianContainer::get_three_center_integrals() const {
+  QDK_LOG_TRACE_ENTERING();
+  if (!has_two_body_integrals()) {
+    throw std::runtime_error("Three-center two-body integrals are not set");
+  }
+  return std::make_pair(std::cref(*_three_center_integrals.first),
+                        std::cref(*_three_center_integrals.second));
+}
+
+const std::optional<Eigen::MatrixXd>&
+CholeskyHamiltonianContainer::get_ao_cholesky_vectors() const {
+  QDK_LOG_TRACE_ENTERING();
+  return _ao_cholesky_vectors;
+}
+
+double CholeskyHamiltonianContainer::get_two_body_element(
+    unsigned i, unsigned j, unsigned k, unsigned l, SpinChannel channel) const {
+  QDK_LOG_TRACE_ENTERING();
+
+  if (!has_two_body_integrals()) {
+    throw std::runtime_error("Two-body integrals are not set");
+  }
+
+  size_t norb = _orbitals->get_active_space_indices().first.size();
+  if (i >= norb || j >= norb || k >= norb || l >= norb) {
+    throw std::out_of_range("Orbital index out of range");
+  }
+
+  if (!std::get<0>(_cached_four_center_integrals)) {
+    _build_four_center_cache();
+  }
+
+  size_t ij = i * norb + j;
+  size_t kl = k * norb + l;
+
+  // Select the appropriate integral based on spin channel
+  switch (channel) {
+    case SpinChannel::aaaa:
+      return (*std::get<0>(_cached_four_center_integrals))(ij * norb * norb +
+                                                           kl);
+    case SpinChannel::aabb:
+      return (*std::get<1>(_cached_four_center_integrals))(ij * norb * norb +
+                                                           kl);
+    case SpinChannel::bbbb:
+      return (*std::get<2>(_cached_four_center_integrals))(ij * norb * norb +
+                                                           kl);
+
+    default:
+      throw std::invalid_argument("Invalid spin channel");
+  }
+}
+
+bool CholeskyHamiltonianContainer::has_two_body_integrals() const {
+  QDK_LOG_TRACE_ENTERING();
+  return _three_center_integrals.first != nullptr &&
+         _three_center_integrals.first->size() > 0;
+}
+
+bool CholeskyHamiltonianContainer::is_restricted() const {
+  QDK_LOG_TRACE_ENTERING();
+  // Hamiltonian is restricted if alpha and beta components point to the same
+  // data
+  return (_one_body_integrals.first == _one_body_integrals.second) &&
+         (_three_center_integrals.first == _three_center_integrals.second) &&
+         (_inactive_fock_matrix.first == _inactive_fock_matrix.second ||
+          (!_inactive_fock_matrix.first && !_inactive_fock_matrix.second));
+}
+
+bool CholeskyHamiltonianContainer::is_valid() const {
+  QDK_LOG_TRACE_ENTERING();
+  // Check if essential data is present
+  if (!has_one_body_integrals() || !has_two_body_integrals()) {
+    return false;
+  }
+
+  // Check dimension consistency
+  try {
+    validate_integral_dimensions();
+  } catch (const std::exception&) {
+    return false;
+  }
+
+  return true;
+}
+
+void CholeskyHamiltonianContainer::validate_integral_dimensions() const {
+  QDK_LOG_TRACE_ENTERING();
+  // Check alpha one-body integrals
+  HamiltonianContainer::validate_integral_dimensions();
+
+  if (!has_two_body_integrals()) {
+    return;
+  }
+
+  // Check two-body integrals dimensions
+  // Three-center integrals have shape [n_orb_pairs x n_aux] where n_orb_pairs =
+  // norb^2
+  auto norb_alpha = _one_body_integrals.first->rows();
+  auto orb_pair_size = norb_alpha * norb_alpha;
+
+  // Check alpha-alpha integrals - rows should equal orb_pair_size
+  if (static_cast<size_t>(_three_center_integrals.first->rows()) !=
+      orb_pair_size) {
+    throw std::invalid_argument(
+        "Alpha-alpha three-center integrals rows (" +
+        std::to_string(_three_center_integrals.first->rows()) +
+        ") does not match expected orb_pair size (" +
+        std::to_string(orb_pair_size) + " for " + std::to_string(norb_alpha) +
+        " orbitals)");
+  }
+
+  // Check beta-beta integrals (if different from alpha-alpha)
+  if (_three_center_integrals.second != _three_center_integrals.first) {
+    if (static_cast<size_t>(_three_center_integrals.second->rows()) !=
+            orb_pair_size ||
+        static_cast<size_t>(_three_center_integrals.second->cols()) !=
+            static_cast<size_t>(_three_center_integrals.first->cols())) {
+      throw std::invalid_argument(
+          "Beta three-center integrals size does not match with Alpha");
+    }
+  }
+}
+
+std::pair<std::shared_ptr<Eigen::MatrixXd>, std::shared_ptr<Eigen::MatrixXd>>
+CholeskyHamiltonianContainer::make_restricted_three_center_integrals(
+    const Eigen::MatrixXd& integrals) {
+  QDK_LOG_TRACE_ENTERING();
+  auto shared_integrals = std::make_shared<Eigen::MatrixXd>(integrals);
+  return std::make_pair(shared_integrals, shared_integrals);
+}
+
 nlohmann::json CholeskyHamiltonianContainer::to_json() const {
   QDK_LOG_TRACE_ENTERING();
+  nlohmann::json j;
 
-  // Start with base class serialization
-  nlohmann::json j = CanonicalFourCenterHamiltonianContainer::to_json();
+  // Store version first
+  j["version"] = SERIALIZATION_VERSION;
 
-  // Override container type
+  // Store container type
   j["container_type"] = get_container_type();
 
-  // Store ao cholesky vectors (Cholesky-specific data)
-  if (_ao_cholesky_vectors != nullptr && _ao_cholesky_vectors->size() > 0) {
-    j["has_ao_cholesky_vectors"] = true;
-    // Store ao cholesky vectors
-    std::vector<std::vector<double>> L_ao_vec;
+  // Store metadata
+  j["core_energy"] = _core_energy;
+  j["type"] =
+      (_type == HamiltonianType::Hermitian) ? "Hermitian" : "NonHermitian";
+
+  // Store restrictedness information
+  j["is_restricted"] = is_restricted();
+
+  // Store one-body integrals
+  if (has_one_body_integrals()) {
+    j["has_one_body_integrals"] = true;
+
+    // Store alpha one-body integrals
+    std::vector<std::vector<double>> one_body_alpha_vec;
+    for (int i = 0; i < _one_body_integrals.first->rows(); ++i) {
+      std::vector<double> row;
+      for (int j_idx = 0; j_idx < _one_body_integrals.first->cols(); ++j_idx) {
+        row.push_back((*_one_body_integrals.first)(i, j_idx));
+      }
+      one_body_alpha_vec.push_back(row);
+    }
+    j["one_body_integrals_alpha"] = one_body_alpha_vec;
+
+    // Store beta one-body integrals (only if unrestricted)
+    if (is_unrestricted()) {
+      std::vector<std::vector<double>> one_body_beta_vec;
+      for (int i = 0; i < _one_body_integrals.second->rows(); ++i) {
+        std::vector<double> row;
+        for (int j_idx = 0; j_idx < _one_body_integrals.second->cols();
+             ++j_idx) {
+          row.push_back((*_one_body_integrals.second)(i, j_idx));
+        }
+        one_body_beta_vec.push_back(row);
+      }
+      j["one_body_integrals_beta"] = one_body_beta_vec;
+    }
+  } else {
+    j["has_one_body_integrals"] = false;
+  }
+
+  // Store two-body integrals
+  if (has_two_body_integrals()) {
+    j["has_two_body_integrals"] = true;
+
+    // Store as object {"aa": [...], "bb": [...]}
+    nlohmann::json two_body_obj;
+
+    // Store aa
+    std::vector<std::vector<double>> three_center_aa_vec;
+    for (int i = 0; i < _three_center_integrals.first->rows(); ++i) {
+      std::vector<double> row;
+      for (int j_idx = 0; j_idx < _three_center_integrals.first->cols();
+           ++j_idx) {
+        row.push_back((*_three_center_integrals.first)(i, j_idx));
+      }
+      three_center_aa_vec.push_back(row);
+    }
+    two_body_obj["aa"] = three_center_aa_vec;
+
+    if (is_unrestricted()) {
+      // Store bb
+      std::vector<std::vector<double>> three_center_bb_vec;
+      for (int i = 0; i < _three_center_integrals.second->rows(); ++i) {
+        std::vector<double> row;
+        for (int j_idx = 0; j_idx < _three_center_integrals.second->cols();
+             ++j_idx) {
+          row.push_back((*_three_center_integrals.second)(i, j_idx));
+        }
+        three_center_bb_vec.push_back(row);
+      }
+      two_body_obj["bb"] = three_center_bb_vec;
+    }
+    j["three_center_integrals"] = two_body_obj;
+  } else {
+    j["has_two_body_integrals"] = false;
+  }
+
+  // Store inactive Fock matrix
+  if (has_inactive_fock_matrix()) {
+    j["has_inactive_fock_matrix"] = true;
+    // Store alpha inactive Fock matrix
+    std::vector<std::vector<double>> inactive_fock_alpha_vec;
+    for (int i = 0; i < _inactive_fock_matrix.first->rows(); ++i) {
+      std::vector<double> row;
+      for (int j_idx = 0; j_idx < _inactive_fock_matrix.first->cols();
+           ++j_idx) {
+        row.push_back((*_inactive_fock_matrix.first)(i, j_idx));
+      }
+      inactive_fock_alpha_vec.push_back(row);
+    }
+    j["inactive_fock_matrix_alpha"] = inactive_fock_alpha_vec;
+
+    // Store beta inactive Fock matrix (only if unrestricted)
+    if (is_unrestricted()) {
+      std::vector<std::vector<double>> inactive_fock_beta_vec;
+      for (int i = 0; i < _inactive_fock_matrix.second->rows(); ++i) {
+        std::vector<double> row;
+        for (int j_idx = 0; j_idx < _inactive_fock_matrix.second->cols();
+             ++j_idx) {
+          row.push_back((*_inactive_fock_matrix.second)(i, j_idx));
+        }
+        inactive_fock_beta_vec.push_back(row);
+      }
+      j["inactive_fock_matrix_beta"] = inactive_fock_beta_vec;
+    }
+  } else {
+    j["has_inactive_fock_matrix"] = false;
+  }
+
+  // Store orbital data
+  if (has_orbitals()) {
+    j["has_orbitals"] = true;
+    j["orbitals"] = _orbitals->to_json();
+  } else {
+    j["has_orbitals"] = false;
+  }
+
+  if (_ao_cholesky_vectors) {
+    std::vector<std::vector<double>> ao_cholesky_vectors_vec;
     for (int i = 0; i < _ao_cholesky_vectors->rows(); ++i) {
       std::vector<double> row;
       for (int j_idx = 0; j_idx < _ao_cholesky_vectors->cols(); ++j_idx) {
         row.push_back((*_ao_cholesky_vectors)(i, j_idx));
       }
-      L_ao_vec.push_back(row);
+      ao_cholesky_vectors_vec.push_back(row);
     }
-    j["ao_cholesky_vectors"] = L_ao_vec;
-  } else {
-    j["has_ao_cholesky_vectors"] = false;
+    j["ao_cholesky_vectors"] = ao_cholesky_vectors_vec;
   }
-
   return j;
 }
 
@@ -130,33 +460,21 @@ CholeskyHamiltonianContainer::from_json(const nlohmann::json& j) {
     auto load_matrix =
         [](const nlohmann::json& matrix_json) -> Eigen::MatrixXd {
       auto matrix_vec = matrix_json.get<std::vector<std::vector<double>>>();
-      if (matrix_vec.empty()) {
-        return Eigen::MatrixXd(0, 0);
-      }
-
-      Eigen::MatrixXd matrix(matrix_vec.size(), matrix_vec[0].size());
-      for (Eigen::Index i = 0; i < matrix.rows(); ++i) {
-        if (static_cast<Eigen::Index>(matrix_vec[i].size()) != matrix.cols()) {
+      int rows = matrix_vec.size();
+      int cols = rows > 0 ? matrix_vec[0].size() : 0;
+      Eigen::MatrixXd matrix(rows, cols);
+      for (int i = 0; i < rows; ++i) {
+        if (static_cast<int>(matrix_vec[i].size()) != cols) {
           throw std::runtime_error(
               "Matrix rows have inconsistent column counts");
         }
-        matrix.row(i) =
-            Eigen::VectorXd::Map(matrix_vec[i].data(), matrix.cols());
+        for (int j_idx = 0; j_idx < cols; ++j_idx) {
+          matrix(i, j_idx) = matrix_vec[i][j_idx];
+        }
       }
       return matrix;
     };
 
-    // Helper function to load vector from JSON
-    auto load_vector =
-        [](const nlohmann::json& vector_json) -> Eigen::VectorXd {
-      auto vector_vec = vector_json.get<std::vector<double>>();
-      Eigen::VectorXd vector(vector_vec.size());
-      for (size_t i = 0; i < vector_vec.size(); ++i) {
-        vector(i) = vector_vec[i];
-      }
-      return vector;
-    };
-
     // Load one-body integrals
     Eigen::MatrixXd one_body_alpha, one_body_beta;
     if (j.value("has_one_body_integrals", false)) {
@@ -174,28 +492,34 @@ CholeskyHamiltonianContainer::from_json(const nlohmann::json& j) {
     }
 
     // Load two-body integrals
-    Eigen::VectorXd two_body_aaaa, two_body_aabb, two_body_bbbb;
+    Eigen::MatrixXd three_center_aa, three_center_bb;
     bool has_two_body = j.value("has_two_body_integrals", false);
     if (has_two_body) {
-      if (!j.contains("two_body_integrals")) {
+      if (!j.contains("three_center_integrals")) {
         throw std::runtime_error("Two-body integrals data not found in JSON");
       }
 
-      auto two_body_obj = j["two_body_integrals"];
+      auto two_body_obj = j["three_center_integrals"];
       if (!two_body_obj.is_object()) {
         throw std::runtime_error(
-            "two_body_integrals must be an object with aaaa, aabb, bbbb keys");
+            "three_center_integrals must be an object with aa, bb "
+            "keys");
       }
 
-      if (!two_body_obj.contains("aaaa") || !two_body_obj.contains("aabb") ||
-          !two_body_obj.contains("bbbb")) {
+      if (!two_body_obj.contains("aa")) {
         throw std::runtime_error(
-            "two_body_integrals must contain aaaa, aabb, and bbbb keys");
+            "three_center_integrals must contain at least aa key");
       }
 
-      two_body_aaaa = load_vector(two_body_obj["aaaa"]);
-      two_body_aabb = load_vector(two_body_obj["aabb"]);
-      two_body_bbbb = load_vector(two_body_obj["bbbb"]);
+      three_center_aa = load_matrix(two_body_obj["aa"]);
+      if (!is_restricted_data) {
+        if (!two_body_obj.contains("bb")) {
+          throw std::runtime_error(
+              "three_center_integrals must contain bb key for unrestricted "
+              "Hamiltonian");
+        }
+        three_center_bb = load_matrix(two_body_obj["bb"]);
+      }
     }
 
     // Load inactive Fock matrix
@@ -220,15 +544,6 @@ CholeskyHamiltonianContainer::from_json(const nlohmann::json& j) {
     }
     auto orbitals = Orbitals::from_json(j["orbitals"]);
 
-    // Load ao cholesky vectors
-    Eigen::MatrixXd L_ao;
-    bool has_ao_cholesky_vectors = j.value("has_ao_cholesky_vectors", false);
-    if (has_ao_cholesky_vectors) {
-      if (j.contains("ao_cholesky_vectors")) {
-        L_ao = load_matrix(j["ao_cholesky_vectors"]);
-      }
-    }
-
     // Validate consistency: if orbitals have inactive indices,
     // then inactive fock matrix must be present
     if (orbitals->has_inactive_space()) {
@@ -253,19 +568,24 @@ CholeskyHamiltonianContainer::from_json(const nlohmann::json& j) {
       }
     }
 
+    std::optional<Eigen::MatrixXd> ao_cholesky_vectors;
+    if (j.contains("ao_cholesky_vectors")) {
+      ao_cholesky_vectors = load_matrix(j["ao_cholesky_vectors"]);
+    }
+
     // Create and return appropriate Hamiltonian using the correct constructor
     if (is_restricted_data) {
       // Use restricted constructor - it will create shared pointers internally
       // so alpha and beta point to the same data
       return std::make_unique<CholeskyHamiltonianContainer>(
-          one_body_alpha, two_body_aaaa, orbitals, core_energy,
-          inactive_fock_alpha, L_ao, type);
+          one_body_alpha, three_center_aa, orbitals, core_energy,
+          inactive_fock_alpha, std::move(ao_cholesky_vectors), type);
     } else {
       // Use unrestricted constructor with separate alpha and beta data
       return std::make_unique<CholeskyHamiltonianContainer>(
-          one_body_alpha, one_body_beta, two_body_aaaa, two_body_aabb,
-          two_body_bbbb, orbitals, core_energy, inactive_fock_alpha,
-          inactive_fock_beta, L_ao, type);
+          one_body_alpha, one_body_beta, three_center_aa, three_center_bb,
+          orbitals, core_energy, inactive_fock_alpha, inactive_fock_beta,
+          std::move(ao_cholesky_vectors), type);
     }
 
   } catch (const std::exception& e) {
@@ -277,24 +597,79 @@ CholeskyHamiltonianContainer::from_json(const nlohmann::json& j) {
 void CholeskyHamiltonianContainer::to_hdf5(H5::Group& group) const {
   QDK_LOG_TRACE_ENTERING();
   try {
-    // Start with base class serialization
-    CanonicalFourCenterHamiltonianContainer::to_hdf5(group);
-
-    // Override container type attribute
+    // Save version first
     H5::DataSpace scalar_space(H5S_SCALAR);
     H5::StrType string_type(H5::PredType::C_S1, H5T_VARIABLE);
 
-    // Remove and recreate container_type attribute with correct value
-    if (group.attrExists("container_type")) {
-      group.removeAttr("container_type");
-    }
+    H5::Attribute version_attr =
+        group.createAttribute("version", string_type, scalar_space);
+    std::string version_str = SERIALIZATION_VERSION;
+    version_attr.write(string_type, version_str);
+
+    // Add container type attribute
     H5::Attribute container_type_attr =
         group.createAttribute("container_type", string_type, scalar_space);
     std::string container_type_str = get_container_type();
     container_type_attr.write(string_type, container_type_str);
 
-    // Save ao cholesky vectors (Cholesky-specific data)
-    if (_ao_cholesky_vectors != nullptr && _ao_cholesky_vectors->size() > 0) {
+    // Save metadata
+    H5::Group metadata_group = group.createGroup("metadata");
+
+    // Save core energy
+    H5::Attribute core_energy_attr = metadata_group.createAttribute(
+        "core_energy", H5::PredType::NATIVE_DOUBLE, scalar_space);
+    core_energy_attr.write(H5::PredType::NATIVE_DOUBLE, &_core_energy);
+
+    // Save Hamiltonian type
+    std::string type_str =
+        (_type == HamiltonianType::Hermitian) ? "Hermitian" : "NonHermitian";
+    H5::StrType type_string_type(H5::PredType::C_S1, type_str.length() + 1);
+    H5::Attribute type_attr =
+        metadata_group.createAttribute("type", type_string_type, scalar_space);
+    type_attr.write(type_string_type, type_str.c_str());
+
+    // Save restrictedness information
+    hbool_t is_restricted_flag = is_restricted() ? 1 : 0;
+    H5::Attribute restricted_attr = metadata_group.createAttribute(
+        "is_restricted", H5::PredType::NATIVE_HBOOL, scalar_space);
+    restricted_attr.write(H5::PredType::NATIVE_HBOOL, &is_restricted_flag);
+
+    // Save integrals data
+    if (has_one_body_integrals()) {
+      save_matrix_to_group(group, "one_body_integrals_alpha",
+                           *_one_body_integrals.first);
+      if (is_unrestricted()) {
+        save_matrix_to_group(group, "one_body_integrals_beta",
+                             *_one_body_integrals.second);
+      }
+    }
+
+    if (has_two_body_integrals()) {
+      save_matrix_to_group(group, "three_center_integrals_aa",
+                           *_three_center_integrals.first);
+      if (is_unrestricted()) {
+        save_matrix_to_group(group, "three_center_integrals_bb",
+                             *_three_center_integrals.second);
+      }
+    }
+
+    // Save inactive Fock matrix
+    if (has_inactive_fock_matrix()) {
+      save_matrix_to_group(group, "inactive_fock_matrix_alpha",
+                           *_inactive_fock_matrix.first);
+      if (is_unrestricted()) {
+        save_matrix_to_group(group, "inactive_fock_matrix_beta",
+                             *_inactive_fock_matrix.second);
+      }
+    }
+
+    // Save nested orbitals data using HDF5 group
+    if (_orbitals) {
+      H5::Group orbitals_group = group.createGroup("orbitals");
+      _orbitals->to_hdf5(orbitals_group);
+    }
+
+    if (_ao_cholesky_vectors) {
       save_matrix_to_group(group, "ao_cholesky_vectors", *_ao_cholesky_vectors);
     }
 
@@ -363,7 +738,7 @@ CholeskyHamiltonianContainer::from_hdf5(H5::Group& group) {
 
     // Load integral data based on restrictedness
     Eigen::MatrixXd one_body_alpha, one_body_beta;
-    Eigen::VectorXd two_body_aaaa, two_body_aabb, two_body_bbbb;
+    Eigen::MatrixXd three_center_aa, three_center_bb;
     Eigen::MatrixXd inactive_fock_alpha, inactive_fock_beta;
 
     // Load one-body integrals
@@ -379,19 +754,16 @@ CholeskyHamiltonianContainer::from_hdf5(H5::Group& group) {
     }
 
     // Load two-body integrals
-    if (dataset_exists_in_group(group, "two_body_integrals_aaaa")) {
-      two_body_aaaa = load_vector_from_group(group, "two_body_integrals_aaaa");
+    if (dataset_exists_in_group(group, "three_center_integrals_aa")) {
+      three_center_aa =
+          load_matrix_from_group(group, "three_center_integrals_aa");
     }
 
-    // For unrestricted, load aabb and bbbb separately
+    // For unrestricted, load bb separately
     if (!is_restricted_data) {
-      if (dataset_exists_in_group(group, "two_body_integrals_aabb")) {
-        two_body_aabb =
-            load_vector_from_group(group, "two_body_integrals_aabb");
-      }
-      if (dataset_exists_in_group(group, "two_body_integrals_bbbb")) {
-        two_body_bbbb =
-            load_vector_from_group(group, "two_body_integrals_bbbb");
+      if (dataset_exists_in_group(group, "three_center_integrals_bb")) {
+        three_center_bb =
+            load_matrix_from_group(group, "three_center_integrals_bb");
       }
     }
 
@@ -408,24 +780,25 @@ CholeskyHamiltonianContainer::from_hdf5(H5::Group& group) {
           load_matrix_from_group(group, "inactive_fock_matrix_beta");
     }
 
-    // load ao cholesky vectors
-    Eigen::MatrixXd L_ao;
+    // Load AO Cholesky vectors
+    std::optional<Eigen::MatrixXd> ao_cholesky_vectors;
     if (dataset_exists_in_group(group, "ao_cholesky_vectors")) {
-      L_ao = load_matrix_from_group(group, "ao_cholesky_vectors");
+      ao_cholesky_vectors =
+          load_matrix_from_group(group, "ao_cholesky_vectors");
     }
 
     // Create and return appropriate Hamiltonian using the correct constructor
     if (is_restricted_data) {
       // Use restricted constructor - it will create shared pointers internally
       return std::make_unique<CholeskyHamiltonianContainer>(
-          one_body_alpha, two_body_aaaa, orbitals, core_energy,
-          inactive_fock_alpha, L_ao, type);
+          one_body_alpha, three_center_aa, orbitals, core_energy,
+          inactive_fock_alpha, std::move(ao_cholesky_vectors), type);
     } else {
       // Use unrestricted constructor with separate alpha and beta data
       return std::make_unique<CholeskyHamiltonianContainer>(
-          one_body_alpha, one_body_beta, two_body_aaaa, two_body_aabb,
-          two_body_bbbb, orbitals, core_energy, inactive_fock_alpha,
-          inactive_fock_beta, L_ao, type);
+          one_body_alpha, one_body_beta, three_center_aa, three_center_bb,
+          orbitals, core_energy, inactive_fock_alpha, inactive_fock_beta,
+          std::move(ao_cholesky_vectors), type);
     }
 
   } catch (const H5::Exception& e) {
diff --git a/cpp/tests/test_hamiltonian.cpp b/cpp/tests/test_hamiltonian.cpp
index 9b9de9dd4..a273ee9ce 100644
--- a/cpp/tests/test_hamiltonian.cpp
+++ b/cpp/tests/test_hamiltonian.cpp
@@ -88,7 +88,7 @@ auto run_restricted_o2 = [](const std::string& factory_name = "qdk") {
 
   auto ham_factory = HamiltonianConstructorFactory::create(factory_name);
   if (factory_name == "qdk_cholesky") {
-    ham_factory->settings().set("store_cholesky_vectors", true);
+    ham_factory->settings().set("store_ao_cholesky_vectors", true);
   }
   auto rhf_hamiltonian = ham_factory->run(rhf_orbitals);
 
@@ -112,7 +112,7 @@ auto run_unrestricted_o2 = [](const std::string& factory_name = "qdk") {
 
   auto ham_factory = HamiltonianConstructorFactory::create(factory_name);
   if (factory_name == "qdk_cholesky") {
-    ham_factory->settings().set("store_cholesky_vectors", true);
+    ham_factory->settings().set("store_ao_cholesky_vectors", true);
   }
   auto uhf_hamiltonian = ham_factory->run(uhf_orbitals);
 
@@ -960,12 +960,12 @@ TEST_F(HamiltonianTest, CholeskyContainerConstruction) {
   double core_energy = 1.5;
   Eigen::MatrixXd inactive_fock = Eigen::MatrixXd::Zero(0, 0);
 
-  // Create cholesky vectors (2x2 AO basis, 3 cholesky vectors)
-  Eigen::MatrixXd L_ao = Eigen::MatrixXd::Random(4, 3);
+  // Create cholesky vectors (2x2 MO basis, 3 cholesky vectors)
+  Eigen::MatrixXd L_mo = Eigen::MatrixXd::Random(4, 3);
 
   // Test restricted constructor
   Hamiltonian h(std::make_unique<CholeskyHamiltonianContainer>(
-      one_body, two_body, orbitals, core_energy, inactive_fock, L_ao));
+      one_body, L_mo, orbitals, core_energy, inactive_fock));
 
   EXPECT_TRUE(h.has_one_body_integrals());
   EXPECT_TRUE(h.has_two_body_integrals());
@@ -992,13 +992,14 @@ TEST_F(HamiltonianTest, CholeskyContainerUnrestrictedConstruction) {
   double core_energy = 1.5;
 
   // Create cholesky vectors
-  Eigen::MatrixXd L_ao = Eigen::MatrixXd::Random(4, 3);
+  Eigen::MatrixXd L_mo_alpha = Eigen::MatrixXd::Random(4, 3);
+  Eigen::MatrixXd L_mo_beta = Eigen::MatrixXd::Random(4, 3);
 
   // Test unrestricted constructor
   Hamiltonian h(std::make_unique<CholeskyHamiltonianContainer>(
-      one_body_alpha, one_body_beta, two_body_aaaa, two_body_aabb,
-      two_body_bbbb, unrestricted_orbitals, core_energy, inactive_fock_alpha,
-      inactive_fock_beta, L_ao));
+      one_body_alpha, one_body_beta, L_mo_alpha, L_mo_beta,
+      unrestricted_orbitals, core_energy, inactive_fock_alpha,
+      inactive_fock_beta));
 
   EXPECT_TRUE(h.has_one_body_integrals());
   EXPECT_TRUE(h.has_two_body_integrals());
@@ -1014,10 +1015,11 @@ TEST_F(HamiltonianTest, CholeskyContainerJSONSerialization) {
   auto orbitals = std::make_shared<ModelOrbitals>(2, true);
   double core_energy = 1.5;
   Eigen::MatrixXd inactive_fock = Eigen::MatrixXd::Zero(0, 0);
+  Eigen::MatrixXd L_mo = Eigen::MatrixXd::Random(4, 3);
   Eigen::MatrixXd L_ao = Eigen::MatrixXd::Random(4, 3);
 
   Hamiltonian h(std::make_unique<CholeskyHamiltonianContainer>(
-      one_body, two_body, orbitals, core_energy, inactive_fock, L_ao));
+      one_body, L_mo, orbitals, core_energy, inactive_fock, L_ao));
 
   // Test JSON conversion
   nlohmann::json j = h.to_json();
@@ -1026,7 +1028,7 @@ TEST_F(HamiltonianTest, CholeskyContainerJSONSerialization) {
   EXPECT_EQ(j["container"]["core_energy"], 1.5);
   EXPECT_TRUE(j["container"]["has_one_body_integrals"]);
   EXPECT_TRUE(j["container"]["has_two_body_integrals"]);
-  EXPECT_TRUE(j["container"]["has_ao_cholesky_vectors"]);
+  EXPECT_TRUE(j["container"].contains("ao_cholesky_vectors"));
 
   // Test deserialization
   auto h_loaded = Hamiltonian::from_json(j);
@@ -1043,10 +1045,10 @@ TEST_F(HamiltonianTest, CholeskyContainerHDF5Serialization) {
   auto orbitals = std::make_shared<ModelOrbitals>(2, true);
   double core_energy = 1.5;
   Eigen::MatrixXd inactive_fock = Eigen::MatrixXd::Zero(0, 0);
-  Eigen::MatrixXd L_ao = Eigen::MatrixXd::Random(4, 3);
+  Eigen::MatrixXd L_mo = Eigen::MatrixXd::Random(4, 3);
 
   Hamiltonian h(std::make_unique<CholeskyHamiltonianContainer>(
-      one_body, two_body, orbitals, core_energy, inactive_fock, L_ao));
+      one_body, L_mo, orbitals, core_energy, inactive_fock));
 
   // Save to HDF5
   std::string filename = "test.cholesky.hamiltonian.h5";
@@ -1071,10 +1073,10 @@ TEST_F(HamiltonianTest, CholeskyContainerClone) {
   auto orbitals = std::make_shared<ModelOrbitals>(2, true);
   double core_energy = 1.5;
   Eigen::MatrixXd inactive_fock = Eigen::MatrixXd::Zero(0, 0);
-  Eigen::MatrixXd L_ao = Eigen::MatrixXd::Random(4, 3);
+  Eigen::MatrixXd L_mo = Eigen::MatrixXd::Random(4, 3);
 
   Hamiltonian h1(std::make_unique<CholeskyHamiltonianContainer>(
-      one_body, two_body, orbitals, core_energy, inactive_fock, L_ao));
+      one_body, L_mo, orbitals, core_energy, inactive_fock));
 
   // Test copy constructor (uses clone internally)
   Hamiltonian h2(h1);
@@ -2227,7 +2229,6 @@ class DummyHamiltonianContainer : public HamiltonianContainer {
   bool is_restricted() const override { return true; }
   nlohmann::json to_json() const override { return {}; }
   void to_hdf5(H5::Group&) const override {}
-  void to_fcidump_file(const std::string&, size_t, size_t) const override {}
   bool is_valid() const override { return true; }
 };
 
diff --git a/docs/source/_static/examples/python/example_circuit.circuit.h5 b/docs/source/_static/examples/python/example_circuit.circuit.h5
new file mode 100644
index 000000000..317653c38
Binary files /dev/null and b/docs/source/_static/examples/python/example_circuit.circuit.h5 differ
diff --git a/docs/source/_static/examples/python/example_circuit.circuit.json b/docs/source/_static/examples/python/example_circuit.circuit.json
new file mode 100644
index 000000000..da0f1281a
--- /dev/null
+++ b/docs/source/_static/examples/python/example_circuit.circuit.json
@@ -0,0 +1,5 @@
+{
+  "qir": "%Result = type opaque\n%Qubit = type opaque\n\n@0 = internal constant [4 x i8] c\"0_t\\00\"\n\ndefine i64 @ENTRYPOINT__main() #0 {\nblock_0:\n  call void @__quantum__rt__initialize(i8* null)\n  call void @__quantum__qis__s__adj(%Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__rz__body(double 2.915681460760505, %Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__s__body(%Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__rz__body(double 1.5707963267948966, %Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__s__adj(%Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__rz__body(double 3.141592653589793, %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__s__body(%Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__rz__body(double -1.5707963267948966, %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__rz__body(double 1.5707963267948966, %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 1 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 1 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Qubit* inttoptr (i64 3 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 3 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*))\n  call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Qubit* inttoptr (i64 2 to %Qubit*))\n  call void @__quantum__rt__tuple_record_output(i64 0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @0, i64 0, i64 0))\n  ret i64 0\n}\n\ndeclare void @__quantum__rt__initialize(i8*)\n\ndeclare void @__quantum__qis__s__adj(%Qubit*)\n\ndeclare void @__quantum__qis__h__body(%Qubit*)\n\ndeclare void @__quantum__qis__rz__body(double, %Qubit*)\n\ndeclare void @__quantum__qis__s__body(%Qubit*)\n\ndeclare void @__quantum__qis__cx__body(%Qubit*, %Qubit*)\n\ndeclare void @__quantum__qis__x__body(%Qubit*)\n\ndeclare void @__quantum__rt__tuple_record_output(i64, i8*)\n\nattributes #0 = { \"entry_point\" \"output_labeling_schema\" \"qir_profiles\"=\"base_profile\" \"required_num_qubits\"=\"4\" \"required_num_results\"=\"0\" }\nattributes #1 = { \"irreversible\" }\n\n; module flags\n\n!llvm.module.flags = !{!0, !1, !2, !3}\n\n!0 = !{i32 1, !\"qir_major_version\", i32 1}\n!1 = !{i32 7, !\"qir_minor_version\", i32 0}\n!2 = !{i32 1, !\"dynamic_qubit_management\", i1 false}\n!3 = !{i32 1, !\"dynamic_result_management\", i1 false}\n",
+  "encoding": "jordan-wigner",
+  "version": "0.1.0"
+}
diff --git a/docs/source/_static/examples/python/release_notes_v1_1.py b/docs/source/_static/examples/python/release_notes_v1_1.py
index a6a296bcc..db68ce4ed 100644
--- a/docs/source/_static/examples/python/release_notes_v1_1.py
+++ b/docs/source/_static/examples/python/release_notes_v1_1.py
@@ -165,7 +165,7 @@
 constructor = create("hamiltonian_constructor", "qdk_cholesky")
 constructor.settings().set("cholesky_tolerance", 1e-8)
 constructor.settings().set("eri_threshold", 1e-12)
-constructor.settings().set("store_cholesky_vectors", True)
+constructor.settings().set("store_ao_cholesky_vectors", True)
 cholesky_hamiltonian = constructor.run(orbitals)
 # end-cell-cholesky
 ################################################################################
diff --git a/docs/source/user/comprehensive/algorithms/hamiltonian_constructor.rst b/docs/source/user/comprehensive/algorithms/hamiltonian_constructor.rst
index d50f048a7..d14918caa 100644
--- a/docs/source/user/comprehensive/algorithms/hamiltonian_constructor.rst
+++ b/docs/source/user/comprehensive/algorithms/hamiltonian_constructor.rst
@@ -133,7 +133,10 @@ QDK Cholesky
 
 A Cholesky decomposition-based implementation for Hamiltonian construction.
 This method uses Cholesky decomposition of the electron repulsion integral (ERI) tensor to reduce memory requirements and computational cost while maintaining high accuracy.
-The decomposition represents the four-center ERIs as products of three-center integrals (Cholesky vectors), which are then transformed to the molecular one- and two-electron integrals.
+The decomposition represents the four-center ERIs as products of three-center integrals (Cholesky vectors), which are transformed to the MO basis.
+The output Hamiltonian stores the MO three-center integrals directly in a ``CholeskyHamiltonianContainer``, avoiding expansion to the full four-center representation.
+Additionally, the original AO Cholesky vectors are preserved in the container when ``store_ao_cholesky_vectors`` is enabled, and can be retrieved via ``get_ao_cholesky_vectors()``.
+Four-center integrals are lazily computed from the three-center integrals on demand.
 
 .. rubric:: Settings
 
@@ -153,9 +156,9 @@ The decomposition represents the four-center ERIs as products of three-center in
    * - ``eri_threshold``
      - float
      - ERI screening threshold for skipping negligible shell quartets during Cholesky decomposition. Default: 1e-12
-   * - ``store_cholesky_vectors``
+   * - ``store_ao_cholesky_vectors``
      - bool
-     - Whether to store the AO Cholesky vectors in the output Hamiltonian container for potential reuse. Default: false
+     - Whether to store the AO three-center integrals in a ``CholeskyHamiltonianContainer`` in addition to the MO three-center integrals, which are always saved. Default: false
 
 Related classes
 ---------------
diff --git a/external/macis/cmake/macis-ips4o.cmake b/external/macis/cmake/macis-ips4o.cmake
index e1dfd0db4..136156270 100644
--- a/external/macis/cmake/macis-ips4o.cmake
+++ b/external/macis/cmake/macis-ips4o.cmake
@@ -13,4 +13,6 @@ FetchContent_Declare( ips4o
 FetchContent_MakeAvailable( ips4o )
 add_library( ips4o INTERFACE )
 target_include_directories( ips4o INTERFACE ${ips4o_SOURCE_DIR} )
-target_link_libraries( ips4o INTERFACE atomic )
+if(NOT APPLE)
+  target_link_libraries( ips4o INTERFACE atomic )
+endif()
diff --git a/external/macis/examples/src/standalone_driver.cxx b/external/macis/examples/src/standalone_driver.cxx
index 3267f1329..6d383bbb3 100644
--- a/external/macis/examples/src/standalone_driver.cxx
+++ b/external/macis/examples/src/standalone_driver.cxx
@@ -436,7 +436,7 @@ int main(int argc, char** argv) {
           EPT2 = macis::asci_pt2_constraint(
               asci_settings, dets.begin(), dets.end(),
               E0 - (E_inactive + E_core), C, n_active, ham_gen.T(),
-              ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), ham_gen.V(),
+              ham_gen.G_red(), ham_gen.V_red(), ham_gen.V(),
               ham_gen MACIS_MPI_CODE(, MPI_COMM_WORLD));
           MPI_Barrier(MPI_COMM_WORLD);
           auto pt2_en = hrt_t::now();
diff --git a/external/macis/include/macis/asci/determinant_contributions.hpp b/external/macis/include/macis/asci/determinant_contributions.hpp
index be3a55f4d..d6735cf35 100644
--- a/external/macis/include/macis/asci/determinant_contributions.hpp
+++ b/external/macis/include/macis/asci/determinant_contributions.hpp
@@ -152,8 +152,9 @@ void append_singles_asci_contributions(
  * @param[in] vir Virtual orbital indices
  * @param[in] os_occ Opposite-spin occupied orbital indices
  * @param[in] eps_same Orbital energies for the same spin
- * @param[in] G Same-spin two-electron integral tensor
- * @param[in] LDG Leading dimension of G tensor
+ * @param[in] V Two-electron integral tensor used to build same-spin
+ * antisymmetrized terms on the fly
+ * @param[in] LDV Leading dimension of V tensor
  * @param[in] h_el_tol Threshold for matrix element magnitude
  * @param[in] root_diag Root diagonal correction term
  * @param[in] E0 Reference energy
@@ -166,7 +167,7 @@ void append_ss_doubles_asci_contributions(
     double coeff, WfnType state_full, SpinWfnType state_same,
     SpinWfnType state_other, const std::vector<uint32_t>& ss_occ,
     const std::vector<uint32_t>& vir, const std::vector<uint32_t>& os_occ,
-    const double* eps_same, const double* G, size_t LDG, double h_el_tol,
+    const double* eps_same, const double* V, size_t LDV, double h_el_tol,
     double root_diag, double E0,
     const HamiltonianGeneratorBase<double>& ham_gen,
     asci_contrib_container<WfnType>& asci_contributions) {
@@ -175,19 +176,22 @@ void append_ss_doubles_asci_contributions(
   const size_t num_occupied_orbitals = ss_occ.size();
   const size_t num_virtual_orbitals = vir.size();
 
-  const size_t LDG2 = LDG * LDG;
+  const size_t LDV2 = LDV * LDV;
   for (auto ii = 0; ii < num_occupied_orbitals; ++ii)
     for (auto aa = 0; aa < num_virtual_orbitals; ++aa) {
       const auto i = ss_occ[ii];
       const auto a = vir[aa];
-      const auto G_ai = G + (a + i * LDG) * LDG2;
+      const auto V_ai = V + (a + i * LDV) * LDV2;
 
       for (auto jj = ii + 1; jj < num_occupied_orbitals; ++jj)
         for (auto bb = aa + 1; bb < num_virtual_orbitals; ++bb) {
           const auto j = ss_occ[jj];
           const auto b = vir[bb];
-          const auto jb = b + j * LDG;
-          const auto G_aibj = G_ai[jb];
+          const auto jb = b + j * LDV;
+          const auto ib = b + i * LDV;
+          const auto V_aibj = V_ai[jb];
+          const auto V_ajbi = (V + (a + j * LDV) * LDV2)[ib];
+          const auto G_aibj = V_aibj - V_ajbi;
 
           if (std::abs(coeff * G_aibj) < h_el_tol) continue;
 
diff --git a/external/macis/include/macis/asci/determinant_search.hpp b/external/macis/include/macis/asci/determinant_search.hpp
index 4681fc5e8..20c55a6aa 100644
--- a/external/macis/include/macis/asci/determinant_search.hpp
+++ b/external/macis/include/macis/asci/determinant_search.hpp
@@ -218,7 +218,6 @@ struct ASCISettings {
  * @param[in] T_pq One-electron integral matrix
  * @param[in] G_red Reduced same-spin two-electron integral tensor
  * @param[in] V_red Reduced opposite-spin two-electron integral tensor
- * @param[in] G_pqrs Full same-spin two-electron integral tensor
  * @param[in] V_pqrs Full opposite-spin two-electron integral tensor
  * @param[in] ham_gen Hamiltonian generator for matrix element evaluation
  * @return Container of ASCI contributions with their associated scores
@@ -228,8 +227,8 @@ asci_contrib_container<wfn_t<N>> asci_contributions_standard(
     ASCISettings asci_settings, wavefunction_iterator_t<N> cdets_begin,
     wavefunction_iterator_t<N> cdets_end, const double E_ASCI,
     const std::vector<double>& C, size_t norb, const double* T_pq,
-    const double* G_red, const double* V_red, const double* G_pqrs,
-    const double* V_pqrs, HamiltonianGenerator<wfn_t<N>>& ham_gen) {
+    const double* G_red, const double* V_red, const double* V_pqrs,
+    HamiltonianGenerator<wfn_t<N>>& ham_gen) {
   using wfn_traits = wavefunction_traits<wfn_t<N>>;
   using spin_wfn_type = spin_wfn_t<wfn_t<N>>;
   using spin_wfn_traits = wavefunction_traits<spin_wfn_type>;
@@ -277,13 +276,13 @@ asci_contrib_container<wfn_t<N>> asci_contributions_standard(
       // Doubles - AAAA
       append_ss_doubles_asci_contributions<Spin::Alpha>(
           coeff, state, state_alpha, state_beta, occ_alpha, vir_alpha, occ_beta,
-          eps_alpha.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen,
+          eps_alpha.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen,
           asci_pairs);
 
       // Doubles - BBBB
       append_ss_doubles_asci_contributions<Spin::Beta>(
           coeff, state, state_beta, state_alpha, occ_beta, vir_beta, occ_alpha,
-          eps_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen,
+          eps_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen,
           asci_pairs);
 
       // Doubles - AABB
@@ -343,7 +342,6 @@ asci_contrib_container<wfn_t<N>> asci_contributions_standard(
  * @param[in] T_pq One-electron integral matrix
  * @param[in] G_red Reduced same-spin two-electron integral tensor
  * @param[in] V_red Reduced opposite-spin two-electron integral tensor
- * @param[in] G_pqrs Full same-spin two-electron integral tensor
  * @param[in] V_pqrs Full opposite-spin two-electron integral tensor
  * @param[in] ham_gen Hamiltonian generator for matrix element evaluation
  * @param[in] comm MPI communicator for parallel execution (if MPI enabled)
@@ -355,8 +353,7 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
     wavefunction_iterator_t<N> cdets_begin,
     wavefunction_iterator_t<N> cdets_end, const double E_ASCI,
     const std::vector<double>& C, size_t norb, const double* T_pq,
-    const double* G_red, const double* V_red, const double* G_pqrs,
-    const double* V_pqrs,
+    const double* G_red, const double* V_red, const double* V_pqrs,
     HamiltonianGenerator<wfn_t<N>>& ham_gen MACIS_MPI_CODE(, MPI_Comm comm)) {
   using clock_type = std::chrono::high_resolution_clock;
   using duration_type = std::chrono::duration<double, std::milli>;
@@ -559,7 +556,11 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
   std::vector<double> thread_wall(debug_timing ? num_threads : 0, 0.0);
 #pragma omp parallel
   {
+#ifdef _OPENMP
     const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
     const asci_contrib_topk_comparator<wfn_t<N>> contrib_cmp{};
     auto t_wall_st = clock_type::now();
 
@@ -569,6 +570,10 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
         std::max(size_t(1024), max_size / num_threads);
     accumulated_pairs.reserve(per_thread_reserve);
 
+    // Running top-k cutoff for this thread-local accumulator.
+    bool threshold_ready = false;
+    double threshold_abs_rv = 0.0;
+
     // Working set for the current constraint — grows during excitation
     // generation, then gets S&A'd and merged into accumulated_pairs.
     // clear() preserves capacity, so after the first constraint this
@@ -628,7 +633,7 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
 
             // AAAA excitations
             generate_constraint_doubles_contributions_ss(
-                c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs,
+                c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), V_pqrs,
                 norb, h_el_tol, h_diag, E_ASCI, ham_gen, working_pairs, O_buf,
                 V_buf, virt_ind_buf, occ_ind_buf);
 
@@ -648,7 +653,7 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
               // BBBB excitations
               append_ss_doubles_asci_contributions<Spin::Beta>(
                   c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha,
-                  orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI,
+                  orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI,
                   ham_gen, working_pairs);
 
               // No excitation (push inf to remove from list)
@@ -677,27 +682,51 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
           working_pairs.erase(uit, working_pairs.end());
         }
 
-        // Merge into accumulated results
-        accumulated_pairs.insert(
-            accumulated_pairs.end(),
-            std::make_move_iterator(working_pairs.begin()),
-            std::make_move_iterator(working_pairs.end()));
+        // If top-k cutoff is active, keep only candidates that can survive.
+        if (threshold_ready) {
+          auto keep_end = std::partition(
+              working_pairs.begin(), working_pairs.end(),
+              [threshold_abs_rv](const auto& contrib) {
+                return std::abs(contrib.rv()) >= threshold_abs_rv;
+              });
+          working_pairs.erase(keep_end, working_pairs.end());
+        }
+
+        bool merged_new_candidates = !working_pairs.empty();
+        if (merged_new_candidates) {
+          // Merge filtered candidates into accumulated results.
+          accumulated_pairs.insert(
+              accumulated_pairs.end(),
+              std::make_move_iterator(working_pairs.begin()),
+              std::make_move_iterator(working_pairs.end()));
+        }
         working_pairs.clear();
         size_t accumulated_size = accumulated_pairs.size();
+        if (ic % 10 == 0) {
+          logger->info(
+              "  * After Merge at CON = {}, accumulated_size = {} at CON = {}",
+              ic, accumulated_size, ic);
+        }
+
+        const bool need_initial_threshold =
+            (!threshold_ready && ntdets > 0 && accumulated_size >= ntdets);
+        const bool need_threshold_refresh =
+            (threshold_ready && merged_new_candidates && ntdets > 0 &&
+             accumulated_size > ntdets);
 
-        if (accumulated_size > ntdets) {
+        if (need_initial_threshold || need_threshold_refresh) {
           const size_t cutoff_idx = ntdets - 1;
           std::nth_element(accumulated_pairs.begin(),
                            accumulated_pairs.begin() + cutoff_idx,
                            accumulated_pairs.end(), contrib_cmp);
-          const double threshold =
-              std::abs(accumulated_pairs[cutoff_idx].rv());
+          threshold_abs_rv = std::abs(accumulated_pairs[cutoff_idx].rv());
+          threshold_ready = true;
           // Repartition only the tail so entries tied at the cutoff are moved
           // directly behind the nth position before trimming.
           auto new_end = std::partition(
               accumulated_pairs.begin() + cutoff_idx + 1,
-              accumulated_pairs.end(), [threshold](const auto& contrib) {
-                return std::abs(contrib.rv()) >= threshold;
+              accumulated_pairs.end(), [threshold_abs_rv](const auto& contrib) {
+                return std::abs(contrib.rv()) >= threshold_abs_rv;
               });
           accumulated_pairs.erase(new_end, accumulated_pairs.end());
         }
@@ -779,7 +808,6 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
  * @param[in] T_pq One-electron integral matrix
  * @param[in] G_red Reduced same-spin two-electron integral tensor
  * @param[in] V_red Reduced opposite-spin two-electron integral tensor
- * @param[in] G_pqrs Full same-spin two-electron integral tensor
  * @param[in] V_pqrs Full opposite-spin two-electron integral tensor
  * @param[in] ham_gen Hamiltonian generator for matrix element evaluation
  * @param[in] comm MPI communicator for parallel execution (if MPI enabled)
@@ -791,8 +819,7 @@ std::vector<wfn_t<N>> asci_search(
     wavefunction_iterator_t<N> cdets_begin,
     wavefunction_iterator_t<N> cdets_end, const double E_ASCI,
     const std::vector<double>& C, size_t norb, const double* T_pq,
-    const double* G_red, const double* V_red, const double* G_pqrs,
-    const double* V_pqrs,
+    const double* G_red, const double* V_red, const double* V_pqrs,
     HamiltonianGenerator<wfn_t<N>>& ham_gen MACIS_MPI_CODE(, MPI_Comm comm)) {
   using clock_type = std::chrono::high_resolution_clock;
   using duration_type = std::chrono::duration<double>;
@@ -877,7 +904,7 @@ std::vector<wfn_t<N>> asci_search(
   asci_contrib_container<wfn_t<N>> asci_pairs;
   asci_pairs = asci_contributions_constraint(
       asci_settings, ndets_max, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq,
-      G_red, V_red, G_pqrs, V_pqrs, ham_gen MACIS_MPI_CODE(, comm));
+      G_red, V_red, V_pqrs, ham_gen MACIS_MPI_CODE(, comm));
   auto pairs_en = clock_type::now();
 
   {
diff --git a/external/macis/include/macis/asci/grow.hpp b/external/macis/include/macis/asci/grow.hpp
index 139d6ac73..f5c70bfe2 100644
--- a/external/macis/include/macis/asci/grow.hpp
+++ b/external/macis/include/macis/asci/grow.hpp
@@ -100,9 +100,8 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
     if (asci_settings.taper_grow_factor > 0 &&
         static_cast<size_t>(std::ceil(wfn.size() * current_grow_factor)) >
             asci_settings.ntdets_max) {
-      effective_grow_factor =
-          std::max(asci_settings.min_grow_factor,
-                   asci_settings.taper_grow_factor);
+      effective_grow_factor = std::max(asci_settings.min_grow_factor,
+                                       asci_settings.taper_grow_factor);
       logger->info("  * Tapering grow_factor from {:.2f} to {:.2f}",
                    current_grow_factor, effective_grow_factor);
     }
@@ -128,8 +127,8 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
     double E;
     auto ai_st = hrt_t::now();
     std::tie(E, wfn, X) = asci_iter<N, index_t>(
-        asci_settings, grow_mcscf, ndets_new, E0, std::move(wfn),
-        std::move(X), ham_gen, norb, &h_cache MACIS_MPI_CODE(, comm));
+        asci_settings, grow_mcscf, ndets_new, E0, std::move(wfn), std::move(X),
+        ham_gen, norb, &h_cache MACIS_MPI_CODE(, comm));
     auto ai_en = hrt_t::now();
     dur_t ai_dur = ai_en - ai_st;
     logger->trace("  * ASCI_ITER_DUR = {:.2e} ms", ai_dur.count());
diff --git a/external/macis/include/macis/asci/iteration.hpp b/external/macis/include/macis/asci/iteration.hpp
index e1e454b24..189789c95 100644
--- a/external/macis/include/macis/asci/iteration.hpp
+++ b/external/macis/include/macis/asci/iteration.hpp
@@ -8,12 +8,11 @@
  */
 
 #pragma once
-#include <numeric>
-
 #include <blas.hh>
 #include <macis/asci/determinant_search.hpp>
 #include <macis/mcscf/mcscf.hpp>
 #include <macis/solvers/selected_ci_diag.hpp>
+#include <numeric>
 
 namespace macis {
 
@@ -51,8 +50,8 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
                size_t ndets_max, double E0, std::vector<wfn_t<N>> wfn,
                std::vector<double> X, HamiltonianGenerator<wfn_t<N>>& ham_gen,
                size_t norb,
-               CachedHamiltonianState<wfn_t<N>, index_t>* h_cache = nullptr
-               MACIS_MPI_CODE(, MPI_Comm comm = MPI_COMM_WORLD)) {
+               CachedHamiltonianState<wfn_t<N>, index_t>* h_cache =
+                   nullptr MACIS_MPI_CODE(, MPI_Comm comm = MPI_COMM_WORLD)) {
   // Sort wfn on coefficient weights
   if (wfn.size() > 1) reorder_ci_on_coeff(wfn, X);
 
@@ -120,7 +119,7 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
   // Perform the ASCI search
   wfn = asci_search(asci_settings, ndets_max, wfn.begin(), wfn.begin() + nkeep,
                     E0, X, norb, ham_gen.T(), ham_gen.G_red(), ham_gen.V_red(),
-                    ham_gen.G(), ham_gen.V(), ham_gen MACIS_MPI_CODE(, comm));
+                    ham_gen.V(), ham_gen MACIS_MPI_CODE(, comm));
 
   std::sort(wfn.begin(), wfn.end(), wfn_comp{});
 
@@ -138,8 +137,8 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
     size_t n_matched = 0;
     const size_t old_size = old_wfn.size();
     for (size_t i = 0; i < old_size; ++i) {
-      auto it = std::lower_bound(wfn.begin(), wfn.end(),
-                                 old_wfn[i], wfn_comp{});
+      auto it =
+          std::lower_bound(wfn.begin(), wfn.end(), old_wfn[i], wfn_comp{});
       if (it != wfn.end() && *it == old_wfn[i]) {
         size_t new_idx = static_cast<size_t>(std::distance(wfn.begin(), it));
         X_local[new_idx] = old_X[i];
@@ -161,8 +160,10 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
     if (norm < asci_settings.min_warm_start_overlap) {
       X_local.clear();  // triggers identity guess in selected_ci_diag
       if (logger)
-        logger->info("  WARM_START: norm {:.4f} < {:.4f} threshold, using diagonal guess",
-                     norm, asci_settings.min_warm_start_overlap);
+        logger->info(
+            "  WARM_START: norm {:.4f} < {:.4f} threshold, using diagonal "
+            "guess",
+            norm, asci_settings.min_warm_start_overlap);
     } else {
       blas::scal(X_local.size(), 1.0 / norm, X_local.data(), 1);
     }
@@ -170,9 +171,8 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
 
   double E = selected_ci_diag<index_t>(
       wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol,
-      mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol,
-      X_local, h_cache, asci_settings.min_patch_overlap
-      MACIS_MPI_CODE(, comm));
+      mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol, X_local,
+      h_cache, asci_settings.min_patch_overlap MACIS_MPI_CODE(, comm));
 
 #ifdef MACIS_ENABLE_MPI
   auto world_size = comm_size(comm);
diff --git a/external/macis/include/macis/asci/mask_constraints.hpp b/external/macis/include/macis/asci/mask_constraints.hpp
index 8d20ac522..6abc5e8a7 100644
--- a/external/macis/include/macis/asci/mask_constraints.hpp
+++ b/external/macis/include/macis/asci/mask_constraints.hpp
@@ -8,11 +8,12 @@
  */
 
 #pragma once
+#include <spdlog/spdlog.h>
+
 #include <macis/asci/alpha_constraint.hpp>
 #include <macis/sd_operations.hpp>
 #include <macis/types.hpp>
 #include <macis/util/mpi.hpp>
-#include <spdlog/spdlog.h>
 #include <variant>
 
 namespace macis {
@@ -467,8 +468,9 @@ void generate_constraint_singles_contributions_ss(
  * @param[in] occ_same Occupied orbital indices for the same spin
  * @param[in] occ_othr Occupied orbital indices for the opposite spin
  * @param[in] eps Orbital energies for the same spin
- * @param[in] G Same-spin two-electron integral tensor
- * @param[in] LDG Leading dimension of G tensor
+ * @param[in] V Two-electron integral tensor used to build same-spin
+ * antisymmetrized terms on the fly
+ * @param[in] LDV Leading dimension of V tensor
  * @param[in] h_el_tol Threshold for matrix element magnitude
  * @param[in] root_diag Root diagonal correction term
  * @param[in] E0 Reference energy
@@ -486,8 +488,8 @@ template <typename WfnType, typename ConType>
 void generate_constraint_doubles_contributions_ss(
     double coeff, WfnType det, ConType constraint,
     const std::vector<uint32_t>& occ_same,
-    const std::vector<uint32_t>& occ_othr, const double* eps, const double* G,
-    const size_t LDG, double h_el_tol, double root_diag, double E0,
+    const std::vector<uint32_t>& occ_othr, const double* eps, const double* V,
+    const size_t LDV, double h_el_tol, double root_diag, double E0,
     HamiltonianGeneratorBase<double>& ham_gen,
     asci_contrib_container<WfnType>& asci_contributions,
     std::vector<typename ConType::constraint_type>& O_buf,
@@ -502,12 +504,11 @@ void generate_constraint_doubles_contributions_ss(
   const auto nv_pairs = V_buf.size();
   if (!no_pairs or !nv_pairs) return;
 
-  const size_t LDG2 = LDG * LDG;
+  const size_t LDV2 = LDV * LDV;
   for (int _ij = 0; _ij < no_pairs; ++_ij) {
     const auto ij = O_buf[_ij];
     const auto i = ffs(ij) - 1;
     const auto j = fls(ij);
-    const auto G_ij = G + (j + i * LDG2) * LDG;
     const auto ex_ij =
         wfn_traits::template single_excitation_no_check<Spin::Alpha>(
             det, i, j);  // det ^ ij;
@@ -516,7 +517,11 @@ void generate_constraint_doubles_contributions_ss(
       const auto a = ffs(ab) - 1;
       const auto b = fls(ab);
 
-      const auto G_aibj = G_ij[b + a * LDG2];
+      const auto jb = b + j * LDV;
+      const auto ib = b + i * LDV;
+      const auto V_aibj = (V + (a + i * LDV) * LDV2)[jb];
+      const auto V_ajbi = (V + (a + j * LDV) * LDV2)[ib];
+      const auto G_aibj = V_aibj - V_ajbi;
 
       // Early Exit
       if (std::abs(coeff * G_aibj) < h_el_tol) continue;
@@ -555,15 +560,15 @@ template <typename WfnType, typename ConType>
 void generate_constraint_doubles_contributions_ss(
     double coeff, WfnType det, ConType constraint,
     const std::vector<uint32_t>& occ_same,
-    const std::vector<uint32_t>& occ_othr, const double* eps, const double* G,
-    const size_t LDG, double h_el_tol, double root_diag, double E0,
+    const std::vector<uint32_t>& occ_othr, const double* eps, const double* V,
+    const size_t LDV, double h_el_tol, double root_diag, double E0,
     HamiltonianGeneratorBase<double>& ham_gen,
     asci_contrib_container<WfnType>& asci_contributions) {
   using constraint_type = typename ConType::constraint_type;
   std::vector<constraint_type> O_buf, V_buf;
   std::vector<uint32_t> virt_ind_buf, occ_ind_buf;
   generate_constraint_doubles_contributions_ss(
-      coeff, det, constraint, occ_same, occ_othr, eps, G, LDG, h_el_tol,
+      coeff, det, constraint, occ_same, occ_othr, eps, V, LDV, h_el_tol,
       root_diag, E0, ham_gen, asci_contributions, O_buf, V_buf, virt_ind_buf,
       occ_ind_buf);
 }
@@ -834,7 +839,8 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr,
 
   auto cgen_logger = spdlog::get("asci_search");
   if (cgen_logger) {
-    size_t max_w = constraint_sizes.empty() ? 0 : constraint_sizes.front().second;
+    size_t max_w =
+        constraint_sizes.empty() ? 0 : constraint_sizes.front().second;
     cgen_logger->debug(
         "  * CGEN: ncon={}, total_work={}, avg/worker={}, max_single={}, "
         "nlevels={}",
@@ -948,9 +954,9 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr,
       // Constraints with C_min == 0 cannot be further decomposed.
       // Keep them in-place even when they exceed local_average.
       auto it = std::partition(
-          constraint_sizes.begin(), constraint_sizes.end(),
-          [=](const auto& a) {
-            return a.second <= local_average or a.first.C_min() == 0 or a.first.C_min() == 1;
+          constraint_sizes.begin(), constraint_sizes.end(), [=](const auto& a) {
+            return a.second <= local_average or a.first.C_min() == 0 or
+                   a.first.C_min() == 1;
           });
 
       // Remove constraints from full list
@@ -989,9 +995,9 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr,
     }
 
     // Recompute average with updated total_work and constraint count
-    total_work = std::accumulate(
-        constraint_sizes.begin(), constraint_sizes.end(), 0ul,
-        [](auto s, const auto& p) { return s + p.second; });
+    total_work =
+        std::accumulate(constraint_sizes.begin(), constraint_sizes.end(), 0ul,
+                        [](auto s, const auto& p) { return s + p.second; });
     local_average = std::max(1ul, total_work / world_size);
 
     if (cgen_logger) {
@@ -1001,8 +1007,8 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr,
       cgen_logger->debug(
           "  * CGEN level {}: split {} -> ncon={}, total_work={}, "
           "avg/worker={}, max_single={}",
-          ilevel, tps_to_next.size(), constraint_sizes.size(),
-          total_work, local_average, max_w);
+          ilevel, tps_to_next.size(), constraint_sizes.size(), total_work,
+          local_average, max_w);
     }
   }  // Recurse into constraints
 
diff --git a/external/macis/include/macis/asci/pt2.hpp b/external/macis/include/macis/asci/pt2.hpp
index 62045daf0..4ce63a390 100644
--- a/external/macis/include/macis/asci/pt2.hpp
+++ b/external/macis/include/macis/asci/pt2.hpp
@@ -38,7 +38,6 @@ namespace macis {
  * @param[in] T_pq One-electron integral matrix (kinetic + nuclear attraction)
  * @param[in] G_red Reduced two-electron repulsion integrals (same-spin)
  * @param[in] V_red Reduced two-electron exchange integrals (same-spin)
- * @param[in] G_pqrs Full two-electron repulsion integral tensor (same-spin)
  * @param[in] V_pqrs Full two-electron repulsion integral tensor (opposite-spin)
  * @param[in] ham_gen Hamiltonian generator for matrix element evaluation
  * @param[in] comm MPI communicator for parallel execution
@@ -63,8 +62,7 @@ double asci_pt2_constraint(ASCISettings asci_settings,
                            wavefunction_iterator_t<N> cdets_end,
                            const double E_ASCI, const std::vector<double>& C,
                            size_t norb, const double* T_pq, const double* G_red,
-                           const double* V_red, const double* G_pqrs,
-                           const double* V_pqrs,
+                           const double* V_red, const double* V_pqrs,
                            HamiltonianGenerator<wfn_t<N>>& ham_gen,
                            MPI_Comm comm) {
   using clock_type = std::chrono::high_resolution_clock;
@@ -244,7 +242,11 @@ double asci_pt2_constraint(ASCISettings asci_settings,
   //     5, norb, n_sing_beta, n_doub_beta, uniq_alpha, comm);
   auto constraints = gen_constraints_general<wfn_t<N>>(
       asci_settings.pt2_max_constraint_level, norb, n_sing_beta, n_doub_beta,
+#ifdef _OPENMP
       uniq_alpha, world_size * omp_get_max_threads(),
+#else
+      uniq_alpha, world_size,
+#endif
       asci_settings.pt2_min_constraint_level,
       asci_settings.pt2_constraint_refine_force);
   auto gen_c_en = clock_type::now();
@@ -322,7 +324,7 @@ double asci_pt2_constraint(ASCISettings asci_settings,
 
             // AAAA excitations
             generate_constraint_doubles_contributions_ss(
-                c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs,
+                c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), V_pqrs,
                 norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs);
 
             // AABB excitations
@@ -341,7 +343,7 @@ double asci_pt2_constraint(ASCISettings asci_settings,
               // BBBB excitations
               append_ss_doubles_asci_contributions<Spin::Beta>(
                   c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha,
-                  orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI,
+                  orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI,
                   ham_gen, asci_pairs);
 
               // No excitation (push inf to remove from list)
@@ -412,9 +414,15 @@ double asci_pt2_constraint(ASCISettings asci_settings,
       const size_t c_end = std::min(ncon_total, ic + ntake);
       for (; ic < c_end; ++ic) {
         const auto& con = constraints[ic].first;
-        if (asci_settings.pt2_print_progress)
+        if (asci_settings.pt2_print_progress) {
+#ifdef _OPENMP
+          const int tid = omp_get_thread_num();
+#else
+          const int tid = 0;
+#endif
           logger->info("[pt2_small rank {:4d} tid:{:4d}] {:10d} / {:10d}",
-                       world_rank, omp_get_thread_num(), ic, ncon_total);
+                       world_rank, tid, ic, ncon_total);
+        }
 
         for (size_t i_alpha = 0; i_alpha < nuniq_alpha; ++i_alpha) {
           const size_t old_pair_size = asci_pairs.size();
@@ -456,7 +464,7 @@ double asci_pt2_constraint(ASCISettings asci_settings,
 
             // AAAA excitations
             generate_constraint_doubles_contributions_ss(
-                c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs,
+                c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), V_pqrs,
                 norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs);
 
             // AABB excitations
@@ -475,7 +483,7 @@ double asci_pt2_constraint(ASCISettings asci_settings,
               // BBBB excitations
               append_ss_doubles_asci_contributions<Spin::Beta>(
                   c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha,
-                  orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI,
+                  orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI,
                   ham_gen, asci_pairs);
 
               // No excitation (push inf to remove from list)
@@ -490,12 +498,18 @@ double asci_pt2_constraint(ASCISettings asci_settings,
             auto uit = stable_sort_and_accumulate_asci_pairs(asci_pairs.begin(),
                                                              asci_pairs.end());
             asci_pairs.erase(uit, asci_pairs.end());
-            if (asci_settings.pt2_print_progress)
+            if (asci_settings.pt2_print_progress) {
+#ifdef _OPENMP
+              const int tid_prune = omp_get_thread_num();
+#else
+              const int tid_prune = 0;
+#endif
               logger->info(
                   "[pt2_prune rank {:4d} tid:{:4d}] IC = {} / {} IA = {} / {} "
                   "SZ = {}",
-                  world_rank, omp_get_thread_num(), ic, ncon_total, i_alpha,
-                  nuniq_alpha, asci_pairs.size());
+                  world_rank, tid_prune, ic, ncon_total, i_alpha, nuniq_alpha,
+                  asci_pairs.size());
+            }
 
             if (asci_pairs.size() > asci_settings.pt2_reserve_count) {
               logger->warn("PRUNED SIZE LARGER THAN RESERVE COUNT");
diff --git a/external/macis/include/macis/asci/refine.hpp b/external/macis/include/macis/asci/refine.hpp
index e44fbb712..434faf845 100644
--- a/external/macis/include/macis/asci/refine.hpp
+++ b/external/macis/include/macis/asci/refine.hpp
@@ -9,7 +9,6 @@
 
 #pragma once
 #include <algorithm>
-
 #include <macis/asci/iteration.hpp>
 
 namespace macis {
@@ -131,8 +130,8 @@ auto asci_refine(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
 
         std::vector<wfn_t<N>> union_wfn;
         union_wfn.reserve(wfn.size() + prev_wfn.size());
-        std::set_union(prev_wfn.begin(), prev_wfn.end(), wfn.begin(),
-                       wfn.end(), std::back_inserter(union_wfn), wfn_comp{});
+        std::set_union(prev_wfn.begin(), prev_wfn.end(), wfn.begin(), wfn.end(),
+                       std::back_inserter(union_wfn), wfn_comp{});
 
         logger->info(
             "  Oscillation detected — stabilizing via union of last two "
@@ -140,8 +139,8 @@ auto asci_refine(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
             prev_wfn.size(), wfn.size(), union_wfn.size());
 
         // Rediagonalize in the enlarged space — keep the cache since the
-        // union is a superset of the cached dets (high overlap → patched build).
-        // Warm-start from current X mapped onto the union ordering.
+        // union is a superset of the cached dets (high overlap → patched
+        // build). Warm-start from current X mapped onto the union ordering.
         std::vector<double> X_union(union_wfn.size(), 0.0);
         {
           // wfn is sorted and is a subset of union_wfn (also sorted).
@@ -174,8 +173,7 @@ auto asci_refine(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
             if (n % ws) {
               auto wr = comm_rank(comm);
               auto* rem = X_full.data() + ws * lc;
-              if (wr == ws - 1)
-                std::copy_n(X_union.data() + lc, n % ws, rem);
+              if (wr == ws - 1) std::copy_n(X_union.data() + lc, n % ws, rem);
               MPI_Bcast(rem, n % ws, MPI_DOUBLE, ws - 1, comm);
             }
             X_union = std::move(X_full);
@@ -187,9 +185,9 @@ auto asci_refine(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
 
         // Extend iteration budget by the number of oscillation iterations
         // that were wasted, capped so we don't loop forever.
-        const size_t extension = std::min(
-            static_cast<size_t>(oscillation_count),
-            max_extensions - total_extensions);
+        const size_t extension =
+            std::min(static_cast<size_t>(oscillation_count),
+                     max_extensions - total_extensions);
         if (extension > 0) {
           max_iter += extension;
           total_extensions += extension;
@@ -221,11 +219,10 @@ auto asci_refine(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
   else {
     std::string msg = "ASCI Refine did not converge";
     if (total_extensions > 0) {
-      msg +=
-          " (oscillation detected, " + std::to_string(total_extensions) +
-          " extra iterations granted). "
-          "Consider using percentage core_selection_strategy, increasing "
-          "ncdets_max, or loosening refine_energy_tol.";
+      msg += " (oscillation detected, " + std::to_string(total_extensions) +
+             " extra iterations granted). "
+             "Consider using percentage core_selection_strategy, increasing "
+             "ncdets_max, or loosening refine_energy_tol.";
     }
     throw std::runtime_error(msg);
   }
diff --git a/external/macis/include/macis/hamiltonian_generator/base.hpp b/external/macis/include/macis/hamiltonian_generator/base.hpp
index 203e3926c..87699fd7f 100644
--- a/external/macis/include/macis/hamiltonian_generator/base.hpp
+++ b/external/macis/include/macis/hamiltonian_generator/base.hpp
@@ -49,12 +49,6 @@ class HamiltonianGeneratorBase {
   /// @brief Two-electron integrals V_pqrs = (pq|rs)
   rank4_span_t V_pqrs_;
 
-  /// @brief Storage for antisymmetrized two-electron integrals G(i,j,k,l) =
-  /// (ij|kl) - (il|kj)
-  std::vector<Scalar> G_pqrs_data_;
-  /// @brief Span view of antisymmetrized two-electron integrals
-  rank4_span_t G_pqrs_;
-
   /// @brief Storage for reduced antisymmetrized integrals G_red(i,j,k) =
   /// G(i,j,k,k)
   std::vector<Scalar> G_red_data_;
@@ -116,12 +110,6 @@ class HamiltonianGeneratorBase {
    */
   inline auto* V_red() const { return V_red_data_.data(); }
 
-  /**
-   * @brief Get pointer to antisymmetrized integral data
-   * @return Pointer to the underlying data of G_pqrs_data_
-   */
-  inline auto* G() const { return G_pqrs_data_.data(); }
-
   /**
    * @brief Get pointer to two-electron integral data
    * @return Pointer to the underlying data of V_pqrs_
diff --git a/external/macis/include/macis/hamiltonian_generator/connection_build_utils.hpp b/external/macis/include/macis/hamiltonian_generator/connection_build_utils.hpp
index d1ae7a9a0..b163c945b 100644
--- a/external/macis/include/macis/hamiltonian_generator/connection_build_utils.hpp
+++ b/external/macis/include/macis/hamiltonian_generator/connection_build_utils.hpp
@@ -8,12 +8,11 @@
  */
 
 #pragma once
+#include <algorithm>
+#include <cstring>
 #include <macis/hamiltonian_generator.hpp>
 #include <macis/types.hpp>
 #include <macis/wfn/raw_bitset.hpp>
-
-#include <algorithm>
-#include <cstring>
 #include <numeric>
 #include <vector>
 
@@ -74,8 +73,7 @@ struct SpinCache {
         std::copy_n(tmp.data(), n_alpha_elec,
                     &occ_alpha_flat[i * n_alpha_elec]);
         spin_wfn_traits::state_to_occ(beta[i], tmp);
-        std::copy_n(tmp.data(), n_beta_elec,
-                    &occ_beta_flat[i * n_beta_elec]);
+        std::copy_n(tmp.data(), n_beta_elec, &occ_beta_flat[i * n_beta_elec]);
       }
     }
   }
@@ -95,9 +93,7 @@ struct SpinCache {
 inline uint64_t pack_pair(uint32_t lo, uint32_t hi) {
   return (static_cast<uint64_t>(lo) << 32) | hi;
 }
-inline uint32_t pair_lo(uint64_t p) {
-  return static_cast<uint32_t>(p >> 32);
-}
+inline uint32_t pair_lo(uint64_t p) { return static_cast<uint32_t>(p >> 32); }
 inline uint32_t pair_hi(uint64_t p) {
   return static_cast<uint32_t>(p & 0xFFFFFFFF);
 }
@@ -125,8 +121,8 @@ inline void sort_unique_pairs(std::vector<uint64_t>& pairs) {
 template <typename index_t, typename WfnType>
 sparsexx::csr_matrix<double, index_t> build_csr_from_pairs(
     size_t ndets, const std::vector<uint64_t>& pairs,
-    const SpinCache<WfnType>& cache,
-    HamiltonianGenerator<WfnType>& ham_gen, double H_thresh) {
+    const SpinCache<WfnType>& cache, HamiltonianGenerator<WfnType>& ham_gen,
+    double H_thresh) {
   using csr_type = sparsexx::csr_matrix<double, index_t>;
   if (ndets == 0) return csr_type(0, 0, 0, 0);
 
@@ -156,8 +152,8 @@ sparsexx::csr_matrix<double, index_t> build_csr_from_pairs(
       auto ex_a = cache.alpha[i] ^ cache.alpha[j];
       auto ex_b = cache.beta[i] ^ cache.beta[j];
       pair_vals[k] = ham_gen.matrix_element(cache.alpha[i], cache.alpha[j],
-                                            ex_a, cache.beta[i],
-                                            cache.beta[j], ex_b, bra_oa, bra_ob);
+                                            ex_a, cache.beta[i], cache.beta[j],
+                                            ex_b, bra_oa, bra_ob);
     }
   }
 
@@ -230,10 +226,9 @@ sparsexx::csr_matrix<double, index_t> build_csr_from_pairs(
       if (rlen <= 1) continue;
       perm.resize(rlen);
       std::iota(perm.begin(), perm.end(), size_t(0));
-      std::sort(perm.begin(), perm.end(),
-                [&](size_t a, size_t b) {
-                  return colind[rs + a] < colind[rs + b];
-                });
+      std::sort(perm.begin(), perm.end(), [&](size_t a, size_t b) {
+        return colind[rs + a] < colind[rs + b];
+      });
       tmp_ci.resize(rlen);
       tmp_nz.resize(rlen);
       for (size_t k = 0; k < rlen; ++k) {
@@ -255,15 +250,11 @@ sparsexx::csr_matrix<double, index_t> build_csr_from_pairs(
 // is protected) and for any logging.
 // -----------------------------------------------------------------------
 template <typename index_t, typename WfnType>
-auto pair_based_build_impl_(
-    size_t ndets,
-    std::vector<uint64_t>& all_pairs,
-    SpinCache<WfnType>& cache,
-    HamiltonianGenerator<WfnType>& gen,
-    double H_thresh)
+auto pair_based_build_impl_(size_t ndets, std::vector<uint64_t>& all_pairs,
+                            SpinCache<WfnType>& cache,
+                            HamiltonianGenerator<WfnType>& gen, double H_thresh)
     -> sparsexx::csr_matrix<double, index_t> {
-  if (ndets == 0)
-    return sparsexx::csr_matrix<double, index_t>(0, 0, 0, 0);
+  if (ndets == 0) return sparsexx::csr_matrix<double, index_t>(0, 0, 0, 0);
   return build_csr_from_pairs<index_t>(ndets, all_pairs, cache, gen, H_thresh);
 }
 
diff --git a/external/macis/include/macis/hamiltonian_generator/dynamic_bit_masking.hpp b/external/macis/include/macis/hamiltonian_generator/dynamic_bit_masking.hpp
index 86b8fc128..b6a62cbdb 100644
--- a/external/macis/include/macis/hamiltonian_generator/dynamic_bit_masking.hpp
+++ b/external/macis/include/macis/hamiltonian_generator/dynamic_bit_masking.hpp
@@ -8,11 +8,10 @@
  */
 
 #pragma once
+#include <array>
 #include <macis/hamiltonian_generator/connection_build_utils.hpp>
 #include <macis/hamiltonian_generator/sorted_double_loop.hpp>
 
-#include <array>
-
 namespace macis {
 
 /**
@@ -152,7 +151,9 @@ class DynamicBitMaskHamiltonianGenerator
     all_pairs.reserve(total_raw);
     for (auto& tp : thread_pairs) {
       all_pairs.insert(all_pairs.end(), tp.begin(), tp.end());
-      { std::vector<uint64_t>().swap(tp); }
+      {
+        std::vector<uint64_t>().swap(tp);
+      }
     }
     detail::sort_unique_pairs(all_pairs);
 
@@ -167,8 +168,8 @@ class DynamicBitMaskHamiltonianGenerator
 
   // ---- Combo descriptor ----
   struct Combo {
-    std::vector<int> keep;   // indices of masks to keep
-    int key_bits;            // total packed bits in composite key
+    std::vector<int> keep;  // indices of masks to keep
+    int key_bits;           // total packed bits in composite key
   };
 
   // ---- Generate all C(n_masks, 4) combinations ----
@@ -225,10 +226,10 @@ class DynamicBitMaskHamiltonianGenerator
   }
 
   // ---- Assign variable orbitals to masks (round-robin by activity) ----
-  static void assign_masks(
-      const std::vector<uint32_t>& variable_orbs, int n_masks,
-      std::vector<std::vector<uint32_t>>& mask_bits,
-      std::vector<int>& bits_per_mask) {
+  static void assign_masks(const std::vector<uint32_t>& variable_orbs,
+                           int n_masks,
+                           std::vector<std::vector<uint32_t>>& mask_bits,
+                           std::vector<int>& bits_per_mask) {
     mask_bits.resize(n_masks);
     bits_per_mask.resize(n_masks, 0);
     for (auto& mb : mask_bits) mb.clear();
@@ -253,10 +254,9 @@ class DynamicBitMaskHamiltonianGenerator
   }
 
   // ---- Compose a uint64 key from kept masks ----
-  static uint64_t compose_key_u64(
-      const uint64_t* mv, int n_masks_stride,
-      const std::vector<int>& keep,
-      const std::vector<int>& bits_per_mask) {
+  static uint64_t compose_key_u64(const uint64_t* mv, int n_masks_stride,
+                                  const std::vector<int>& keep,
+                                  const std::vector<int>& bits_per_mask) {
     uint64_t key = 0;
     int shift = 0;
     for (int ki = 0; ki < static_cast<int>(keep.size()); ++ki) {
@@ -277,10 +277,10 @@ class DynamicBitMaskHamiltonianGenerator
     bool key_eq(const Key128& o) const { return lo == o.lo && hi == o.hi; }
   };
 
-  static Key128 compose_key_128(
-      const uint64_t* mv, int n_masks_stride,
-      const std::vector<int>& keep,
-      const std::vector<int>& bits_per_mask, uint32_t det_idx) {
+  static Key128 compose_key_128(const uint64_t* mv, int n_masks_stride,
+                                const std::vector<int>& keep,
+                                const std::vector<int>& bits_per_mask,
+                                uint32_t det_idx) {
     uint64_t lo = 0, hi = 0;
     int shift = 0;
     for (int ki = 0; ki < static_cast<int>(keep.size()); ++ki) {
@@ -299,12 +299,12 @@ class DynamicBitMaskHamiltonianGenerator
   }
 
   // ---- Process one combo (uint64 key path) ----
-  void process_combo_u64_(
-      const Combo& combo, size_t ndets,
-      const std::vector<uint64_t>& masked_values, int n_masks_total,
-      const std::vector<int>& bits_per_mask,
-      const WfnType* dets,
-      std::vector<uint64_t>& out_pairs) const {
+  void process_combo_u64_(const Combo& combo, size_t ndets,
+                          const std::vector<uint64_t>& masked_values,
+                          int n_masks_total,
+                          const std::vector<int>& bits_per_mask,
+                          const WfnType* dets,
+                          std::vector<uint64_t>& out_pairs) const {
     // Build (key, det_idx) pairs
     struct KeyIdx {
       uint64_t key;
@@ -314,7 +314,7 @@ class DynamicBitMaskHamiltonianGenerator
     std::vector<KeyIdx> keys(ndets);
     for (size_t d = 0; d < ndets; ++d) {
       keys[d] = {compose_key_u64(&masked_values[d * n_masks_total],
-                                  n_masks_total, combo.keep, bits_per_mask),
+                                 n_masks_total, combo.keep, bits_per_mask),
                  static_cast<uint32_t>(d)};
     }
 
@@ -343,17 +343,17 @@ class DynamicBitMaskHamiltonianGenerator
   }
 
   // ---- Process one combo (128-bit key path) ----
-  void process_combo_128_(
-      const Combo& combo, size_t ndets,
-      const std::vector<uint64_t>& masked_values, int n_masks_total,
-      const std::vector<int>& bits_per_mask,
-      const WfnType* dets,
-      std::vector<uint64_t>& out_pairs) const {
+  void process_combo_128_(const Combo& combo, size_t ndets,
+                          const std::vector<uint64_t>& masked_values,
+                          int n_masks_total,
+                          const std::vector<int>& bits_per_mask,
+                          const WfnType* dets,
+                          std::vector<uint64_t>& out_pairs) const {
     std::vector<Key128> keys(ndets);
     for (size_t d = 0; d < ndets; ++d) {
-      keys[d] = compose_key_128(&masked_values[d * n_masks_total],
-                                n_masks_total, combo.keep, bits_per_mask,
-                                static_cast<uint32_t>(d));
+      keys[d] =
+          compose_key_128(&masked_values[d * n_masks_total], n_masks_total,
+                          combo.keep, bits_per_mask, static_cast<uint32_t>(d));
     }
 
     std::sort(keys.begin(), keys.end());
@@ -384,8 +384,8 @@ class DynamicBitMaskHamiltonianGenerator
       double H_thresh) {
     const size_t ndets = std::distance(dets_begin, dets_end);
     auto [all_pairs, cache] = enumerate_connected_pairs_(dets_begin, dets_end);
-    return detail::pair_based_build_impl_<index_t>(
-        ndets, all_pairs, cache, *this, H_thresh);
+    return detail::pair_based_build_impl_<index_t>(ndets, all_pairs, cache,
+                                                   *this, H_thresh);
   }
 };
 
diff --git a/external/macis/include/macis/hamiltonian_generator/matrix_elements.hpp b/external/macis/include/macis/hamiltonian_generator/matrix_elements.hpp
index 2ffebf628..c5bad93da 100644
--- a/external/macis/include/macis/hamiltonian_generator/matrix_elements.hpp
+++ b/external/macis/include/macis/hamiltonian_generator/matrix_elements.hpp
@@ -101,8 +101,8 @@ double HamiltonianGenerator<WfnType>::matrix_element(
  * in one spin)
  *
  * Calculates the matrix element for determinants that differ by a double
- * excitation within the same spin channel. Uses antisymmetrized integrals
- * G_pqrs.
+ * excitation within the same spin channel. Uses antisymmetrized values built
+ * on the fly from V_pqrs.
  *
  * @tparam WfnType Wavefunction type defining determinant representation
  * @param bra Bra spin determinant
@@ -116,7 +116,8 @@ double HamiltonianGenerator<WfnType>::matrix_element_4(spin_det_t bra,
                                                        spin_det_t ex) const {
   auto [o1, v1, o2, v2, sign] = doubles_sign_indices(bra, ket, ex);
 
-  return sign * G_pqrs_(v1, o1, v2, o2);
+  const auto g_v1o1v2o2 = V_pqrs_(v1, o1, v2, o2) - V_pqrs_(v1, o2, v2, o1);
+  return sign * g_v1o1v2o2;
 }
 
 /**
diff --git a/external/macis/include/macis/hamiltonian_generator/residue_arrays.hpp b/external/macis/include/macis/hamiltonian_generator/residue_arrays.hpp
index 381533f6d..2c83860fe 100644
--- a/external/macis/include/macis/hamiltonian_generator/residue_arrays.hpp
+++ b/external/macis/include/macis/hamiltonian_generator/residue_arrays.hpp
@@ -77,20 +77,18 @@ class ResidueArrayHamiltonianGenerator
         if (std::abs(val) < 1e-16) continue;
 
         if (i != last_bra) {
-          bra_oa.assign(cache.occ_a(i),
-                        cache.occ_a(i) + cache.n_alpha_elec);
-          bra_ob.assign(cache.occ_b(i),
-                        cache.occ_b(i) + cache.n_beta_elec);
+          bra_oa.assign(cache.occ_a(i), cache.occ_a(i) + cache.n_alpha_elec);
+          bra_ob.assign(cache.occ_b(i), cache.occ_b(i) + cache.n_beta_elec);
           last_bra = i;
         }
 
         auto ex_a = cache.alpha[i] ^ cache.alpha[j];
         auto ex_b = cache.beta[i] ^ cache.beta[j];
 
-        rdm_contributions_spin_dep<true>(
-            cache.alpha[i], cache.alpha[j], ex_a, cache.beta[i],
-            cache.beta[j], ex_b, bra_oa, bra_ob, val, ordm_aa, ordm_bb,
-            trdm_aaaa, trdm_bbbb, trdm_aabb);
+        rdm_contributions_spin_dep<true>(cache.alpha[i], cache.alpha[j], ex_a,
+                                         cache.beta[i], cache.beta[j], ex_b,
+                                         bra_oa, bra_ob, val, ordm_aa, ordm_bb,
+                                         trdm_aaaa, trdm_bbbb, trdm_aabb);
       }
 
 #pragma omp for schedule(static)
@@ -98,13 +96,10 @@ class ResidueArrayHamiltonianGenerator
         double val = C[i] * C[i];
         if (std::abs(val) < 1e-16) continue;
 
-        bra_oa.assign(cache.occ_a(i),
-                      cache.occ_a(i) + cache.n_alpha_elec);
-        bra_ob.assign(cache.occ_b(i),
-                      cache.occ_b(i) + cache.n_beta_elec);
-        rdm_contributions_diag_spin_dep(bra_oa, bra_ob, val, ordm_aa,
-                                        ordm_bb, trdm_aaaa, trdm_bbbb,
-                                        trdm_aabb);
+        bra_oa.assign(cache.occ_a(i), cache.occ_a(i) + cache.n_alpha_elec);
+        bra_ob.assign(cache.occ_b(i), cache.occ_b(i) + cache.n_beta_elec);
+        rdm_contributions_diag_spin_dep(bra_oa, bra_ob, val, ordm_aa, ordm_bb,
+                                        trdm_aaaa, trdm_bbbb, trdm_aabb);
       }
     }
 
@@ -240,7 +235,9 @@ class ResidueArrayHamiltonianGenerator
       }
     }
 
-    { std::vector<ResiduePair>().swap(residue_arr); }
+    {
+      std::vector<ResiduePair>().swap(residue_arr);
+    }
 
     size_t total_raw = 0;
     for (auto& tp : thread_pairs) total_raw += tp.size();
@@ -248,7 +245,9 @@ class ResidueArrayHamiltonianGenerator
     all_pairs.reserve(total_raw);
     for (auto& tp : thread_pairs) {
       all_pairs.insert(all_pairs.end(), tp.begin(), tp.end());
-      { std::vector<uint64_t>().swap(tp); }
+      {
+        std::vector<uint64_t>().swap(tp);
+      }
     }
     detail::sort_unique_pairs(all_pairs);
 
@@ -257,13 +256,13 @@ class ResidueArrayHamiltonianGenerator
 
  private:
   template <typename index_t>
-  sparse_matrix_type<index_t> residue_array_build_(
-      full_det_iterator dets_begin, full_det_iterator dets_end,
-      double H_thresh) {
+  sparse_matrix_type<index_t> residue_array_build_(full_det_iterator dets_begin,
+                                                   full_det_iterator dets_end,
+                                                   double H_thresh) {
     const size_t ndets = std::distance(dets_begin, dets_end);
     auto [all_pairs, cache] = enumerate_connected_pairs_(dets_begin, dets_end);
-    return detail::pair_based_build_impl_<index_t>(
-        ndets, all_pairs, cache, *this, H_thresh);
+    return detail::pair_based_build_impl_<index_t>(ndets, all_pairs, cache,
+                                                   *this, H_thresh);
   }
 };
 
diff --git a/external/macis/include/macis/hamiltonian_generator/sorted_double_loop.hpp b/external/macis/include/macis/hamiltonian_generator/sorted_double_loop.hpp
index 0e46070e5..ee3750cae 100644
--- a/external/macis/include/macis/hamiltonian_generator/sorted_double_loop.hpp
+++ b/external/macis/include/macis/hamiltonian_generator/sorted_double_loop.hpp
@@ -8,12 +8,13 @@
  */
 
 #pragma once
+#include <spdlog/spdlog.h>
+
 #include <chrono>
 #include <macis/hamiltonian_generator.hpp>
 #include <macis/sd_operations.hpp>
 #include <macis/util/entropies.hpp>
 #include <macis/util/rdms.hpp>
-#include <spdlog/spdlog.h>
 
 #ifdef _OPENMP
 #include <omp.h>
diff --git a/external/macis/include/macis/mcscf/cas.hpp b/external/macis/include/macis/mcscf/cas.hpp
index 477beda60..1c9b79eaa 100644
--- a/external/macis/include/macis/mcscf/cas.hpp
+++ b/external/macis/include/macis/mcscf/cas.hpp
@@ -52,8 +52,7 @@ double compute_casci_rdms(
   auto dets = generate_hilbert_space<wfn_type>(norb.get(), nalpha, nbeta);
   double E0 = selected_ci_diag<int64_t>(
       dets.begin(), dets.end(), ham_gen, settings.ci_matel_tol,
-      settings.ci_max_subspace, settings.ci_res_tol, C
-      MACIS_MPI_CODE(, comm));
+      settings.ci_max_subspace, settings.ci_res_tol, C MACIS_MPI_CODE(, comm));
 
   // Compute RDMs
   if (ORDM and TRDM)
diff --git a/external/macis/include/macis/solvers/incremental_h_build.hpp b/external/macis/include/macis/solvers/incremental_h_build.hpp
index 399956b1a..f19f21cec 100644
--- a/external/macis/include/macis/solvers/incremental_h_build.hpp
+++ b/external/macis/include/macis/solvers/incremental_h_build.hpp
@@ -8,13 +8,12 @@
  */
 
 #pragma once
-#include <macis/csr_hamiltonian.hpp>
-#include <macis/types.hpp>
-
 #include <algorithm>
 #include <cassert>
 #include <chrono>
 #include <cstring>
+#include <macis/csr_hamiltonian.hpp>
+#include <macis/types.hpp>
 #include <numeric>
 #include <optional>
 #include <vector>
@@ -39,13 +38,11 @@ class PatchedSparseOperator {
  public:
   using csr_type = sparsexx::csr_matrix<double, index_t>;
 
-  PatchedSparseOperator(
-      const csr_type& old_H, size_t n_new,
-      std::vector<size_t> old_to_new,
-      csr_type H_nn, csr_type H_on,
-      std::vector<size_t> kept_new_indices,
-      std::vector<size_t> added_new_indices,
-      std::vector<double> diagonal)
+  PatchedSparseOperator(const csr_type& old_H, size_t n_new,
+                        std::vector<size_t> old_to_new, csr_type H_nn,
+                        csr_type H_on, std::vector<size_t> kept_new_indices,
+                        std::vector<size_t> added_new_indices,
+                        std::vector<double> diagonal)
       : old_H_(old_H),
         n_new_(n_new),
         n_old_(old_H.m()),
@@ -135,8 +132,8 @@ class PatchedSparseOperator {
         // Lazily allocate / resize cached scratch vectors
         if (transpose_scratch_.size() != static_cast<size_t>(nthreads) ||
             transpose_scratch_n_added_ != n_added) {
-          transpose_scratch_.assign(
-              nthreads, std::vector<double>(n_added, 0.0));
+          transpose_scratch_.assign(nthreads,
+                                    std::vector<double>(n_added, 0.0));
           transpose_scratch_n_added_ = n_added;
         } else {
           for (auto& v : transpose_scratch_)
@@ -144,7 +141,11 @@ class PatchedSparseOperator {
         }
 #pragma omp parallel
         {
+#ifdef _OPENMP
           auto& local = transpose_scratch_[omp_get_thread_num()];
+#else
+          auto& local = transpose_scratch_[0];
+#endif
 #pragma omp for schedule(static)
           for (size_t i_local = 0; i_local < n_kept; ++i_local) {
             double v_kept = alpha * V[kept_new_[i_local]];
@@ -221,7 +222,8 @@ std::optional<PatchedSparseOperator<index_t>> build_patched_operator(
     double min_overlap = 0.3) {
   using csr_type = sparsexx::csr_matrix<double, index_t>;
   using wfn_traits = wavefunction_traits<WfnType>;
-  using spin_wfn_traits = wavefunction_traits<typename wfn_traits::spin_wfn_type>;
+  using spin_wfn_traits =
+      wavefunction_traits<typename wfn_traits::spin_wfn_type>;
   using wfn_comp = typename wfn_traits::spin_comparator;
   using clock_type = std::chrono::high_resolution_clock;
   using dur_s = std::chrono::duration<double>;
@@ -264,14 +266,17 @@ std::optional<PatchedSparseOperator<index_t>> build_patched_operator(
                 : 0.0;
 
   if (logger) {
-    logger->info("  PATCH: n_old={}, n_new={}, n_kept={}, n_added={}, overlap={:.1f}%",
-                 n_old, n_new, n_kept, n_added, 100.0 * overlap_frac);
+    logger->info(
+        "  PATCH: n_old={}, n_new={}, n_kept={}, n_added={}, overlap={:.1f}%",
+        n_old, n_new, n_kept, n_added, 100.0 * overlap_frac);
   }
 
   if (overlap_frac < min_overlap) {
     if (logger)
-      logger->info("  PATCH: overlap {:.1f}% < {:.0f}% threshold, falling back to full build",
-                   100.0 * overlap_frac, 100.0 * min_overlap);
+      logger->info(
+          "  PATCH: overlap {:.1f}% < {:.0f}% threshold, falling back to full "
+          "build",
+          100.0 * overlap_frac, 100.0 * min_overlap);
     return std::nullopt;
   }
 
@@ -283,8 +288,8 @@ std::optional<PatchedSparseOperator<index_t>> build_patched_operator(
   csr_type H_nn(0, 0, 0, 0);
   if (n_added > 0) {
     H_nn = make_csr_hamiltonian_block<index_t>(
-        added_dets.begin(), added_dets.end(),
-        added_dets.begin(), added_dets.end(), ham_gen, h_el_tol);
+        added_dets.begin(), added_dets.end(), added_dets.begin(),
+        added_dets.end(), ham_gen, h_el_tol);
   }
   auto nn_en = clock_type::now();
 
@@ -296,8 +301,8 @@ std::optional<PatchedSparseOperator<index_t>> build_patched_operator(
   csr_type H_on(0, 0, 0, 0);
   if (n_kept > 0 && n_added > 0) {
     H_on = make_csr_hamiltonian_block<index_t>(
-        kept_dets.begin(), kept_dets.end(),
-        added_dets.begin(), added_dets.end(), ham_gen, h_el_tol);
+        kept_dets.begin(), kept_dets.end(), added_dets.begin(),
+        added_dets.end(), ham_gen, h_el_tol);
   }
   auto on_en = clock_type::now();
 
@@ -336,18 +341,16 @@ std::optional<PatchedSparseOperator<index_t>> build_patched_operator(
   auto diag_en = clock_type::now();
 
   if (logger) {
-    logger->info("  PATCH: nn_nnz={}, nn_dur={:.3e}s, on_nnz={}, on_dur={:.3e}s, diag_dur={:.3e}s",
-                 H_nn.nnz(), dur_s(nn_en - nn_st).count(),
-                 H_on.nnz(), dur_s(on_en - on_st).count(),
-                 dur_s(diag_en - diag_st).count());
+    logger->info(
+        "  PATCH: nn_nnz={}, nn_dur={:.3e}s, on_nnz={}, on_dur={:.3e}s, "
+        "diag_dur={:.3e}s",
+        H_nn.nnz(), dur_s(nn_en - nn_st).count(), H_on.nnz(),
+        dur_s(on_en - on_st).count(), dur_s(diag_en - diag_st).count());
   }
 
   return PatchedSparseOperator<index_t>(
-      cache.H, n_new,
-      std::move(old_to_new),
-      std::move(H_nn), std::move(H_on),
-      std::move(kept_new), std::move(added_new),
-      std::move(diagonal));
+      cache.H, n_new, std::move(old_to_new), std::move(H_nn), std::move(H_on),
+      std::move(kept_new), std::move(added_new), std::move(diagonal));
 }
 
 }  // namespace macis
diff --git a/external/macis/include/macis/solvers/selected_ci_diag.hpp b/external/macis/include/macis/solvers/selected_ci_diag.hpp
index 3890a0bc6..c44372649 100644
--- a/external/macis/include/macis/solvers/selected_ci_diag.hpp
+++ b/external/macis/include/macis/solvers/selected_ci_diag.hpp
@@ -9,14 +9,13 @@
 
 #pragma once
 #include <chrono>
-#include <optional>
-
 #include <macis/csr_hamiltonian.hpp>
 #include <macis/hamiltonian_generator.hpp>
 #include <macis/solvers/davidson.hpp>
 #include <macis/solvers/incremental_h_build.hpp>
 #include <macis/types.hpp>
 #include <macis/util/mpi.hpp>
+#include <optional>
 #include <sparsexx/io/write_dist_mm.hpp>
 #include <sparsexx/matrix_types/dense_conversions.hpp>
 #include <sparsexx/util/submatrix.hpp>
@@ -170,23 +169,20 @@ double serial_selected_ci_diag(const SpMatType& H, size_t davidson_max_m,
  *                           when cache is nullptr).
  */
 template <typename index_t, typename WfnType, typename WfnIterator>
-double selected_ci_diag(
-    WfnIterator dets_begin, WfnIterator dets_end,
-    HamiltonianGenerator<WfnType>& ham_gen, double h_el_tol,
-    size_t davidson_max_m, double davidson_res_tol,
-    std::vector<double>& C_local,
-    CachedHamiltonianState<WfnType, index_t>* cache,
-    double min_patch_overlap
-    MACIS_MPI_CODE(, MPI_Comm comm = MPI_COMM_WORLD)) {
+double selected_ci_diag(WfnIterator dets_begin, WfnIterator dets_end,
+                        HamiltonianGenerator<WfnType>& ham_gen, double h_el_tol,
+                        size_t davidson_max_m, double davidson_res_tol,
+                        std::vector<double>& C_local,
+                        CachedHamiltonianState<WfnType, index_t>* cache,
+                        double min_patch_overlap
+                            MACIS_MPI_CODE(, MPI_Comm comm = MPI_COMM_WORLD)) {
   auto logger = spdlog::get("ci_solver");
   if (!logger) {
     logger = spdlog::stdout_color_mt("ci_solver");
   }
   // Ensure sub-loggers exist for downstream code
-  if (!spdlog::get("h_build"))
-    spdlog::stdout_color_mt("h_build");
-  if (!spdlog::get("h_build_inc"))
-    spdlog::stdout_color_mt("h_build_inc");
+  if (!spdlog::get("h_build")) spdlog::stdout_color_mt("h_build");
+  if (!spdlog::get("h_build_inc")) spdlog::stdout_color_mt("h_build_inc");
 
   const size_t ndets = std::distance(dets_begin, dets_end);
   const bool incremental = cache != nullptr;
@@ -210,8 +206,7 @@ double selected_ci_diag(
     auto patched_op = build_patched_operator<index_t>(
         *cache, new_dets, ham_gen, h_el_tol, min_patch_overlap);
     auto H_en = clock_type::now();
-    logger->info("  PATCH_DUR = {:.5e} ms",
-                 duration_type(H_en - H_st).count());
+    logger->info("  PATCH_DUR = {:.5e} ms", duration_type(H_en - H_st).count());
 
     if (patched_op) {
       // Set up guess
@@ -231,9 +226,9 @@ double selected_ci_diag(
       }
 
       auto dav_st = clock_type::now();
-      auto [niter, eigval] = davidson(
-          ndets, davidson_max_m, *patched_op,
-          patched_op->diagonal().data(), davidson_res_tol, C_local.data());
+      auto [niter, eigval] = davidson(ndets, davidson_max_m, *patched_op,
+                                      patched_op->diagonal().data(),
+                                      davidson_res_tol, C_local.data());
       auto dav_en = clock_type::now();
 
       logger->info("  {} = {:4}, {} = {:.6e} Eh, {} = {:.5e} ms", "DAV_NITER",
@@ -241,7 +236,6 @@ double selected_ci_diag(
                    duration_type(dav_en - dav_st).count());
       E = eigval;
 
-
       return E;
     }
     // patched_op is nullopt — fall through to full build
@@ -273,8 +267,8 @@ double selected_ci_diag(
     logger->info(
         "  H_DUR_MAX = {:.2e} ms, H_DUR_MIN = {:.2e} ms, H_DUR_AVG = {:.2e} ms",
         max_hdur, min_hdur, avg_hdur);
-    logger->info("  NNZ_MAX = {}, NNZ_MIN = {}, NNZ_AVG = {}", max_nnz,
-                 min_nnz, total_nnz / double(world_size));
+    logger->info("  NNZ_MAX = {}, NNZ_MIN = {}, NNZ_AVG = {}", max_nnz, min_nnz,
+                 total_nnz / double(world_size));
   }
 #else
   size_t total_nnz = H.nnz();
@@ -304,17 +298,15 @@ double selected_ci_diag(
 
 /// @brief Convenience overload without incremental cache.
 template <typename index_t, typename WfnType, typename WfnIterator>
-double selected_ci_diag(
-    WfnIterator dets_begin, WfnIterator dets_end,
-    HamiltonianGenerator<WfnType>& ham_gen, double h_el_tol,
-    size_t davidson_max_m, double davidson_res_tol,
-    std::vector<double>& C_local
-    MACIS_MPI_CODE(, MPI_Comm comm = MPI_COMM_WORLD)) {
+double selected_ci_diag(WfnIterator dets_begin, WfnIterator dets_end,
+                        HamiltonianGenerator<WfnType>& ham_gen, double h_el_tol,
+                        size_t davidson_max_m, double davidson_res_tol,
+                        std::vector<double>& C_local
+                            MACIS_MPI_CODE(, MPI_Comm comm = MPI_COMM_WORLD)) {
   return selected_ci_diag<index_t>(
-      dets_begin, dets_end, ham_gen, h_el_tol, davidson_max_m,
-      davidson_res_tol, C_local,
-      static_cast<CachedHamiltonianState<WfnType, index_t>*>(nullptr), 0.3
-      MACIS_MPI_CODE(, comm));
+      dets_begin, dets_end, ham_gen, h_el_tol, davidson_max_m, davidson_res_tol,
+      C_local, static_cast<CachedHamiltonianState<WfnType, index_t>*>(nullptr),
+      0.3 MACIS_MPI_CODE(, comm));
 }
 
 }  // namespace macis
diff --git a/external/macis/src/macis/CMakeLists.txt b/external/macis/src/macis/CMakeLists.txt
index 10e90d4d1..e587237e0 100644
--- a/external/macis/src/macis/CMakeLists.txt
+++ b/external/macis/src/macis/CMakeLists.txt
@@ -74,7 +74,9 @@ target_link_libraries(macis PUBLIC mdspan)
 include(macis-ips4o)
 target_include_directories(macis SYSTEM PUBLIC
   $<BUILD_INTERFACE:${ips4o_SOURCE_DIR}>)
-target_link_libraries(macis PUBLIC atomic)
+if(NOT APPLE)
+  target_link_libraries(macis PUBLIC atomic)
+endif()
 
 
 # Installation
diff --git a/external/macis/src/macis/hamiltonian_generator/base.ipp b/external/macis/src/macis/hamiltonian_generator/base.ipp
index 6f363e933..20fce8cab 100644
--- a/external/macis/src/macis/hamiltonian_generator/base.ipp
+++ b/external/macis/src/macis/hamiltonian_generator/base.ipp
@@ -39,19 +39,9 @@ void HamiltonianGeneratorBase<Scalar>::generate_integral_intermediates_(
   size_t no = norb_;
   size_t no2 = no * no;
   size_t no3 = no2 * no;
-  size_t no4 = no3 * no;
-
-  // G(i,j,k,l) = V(i,j,k,l) - V(i,l,k,j)
-  G_pqrs_data_ = std::vector<Scalar>(V.data_handle(), V.data_handle() + no4);
-  G_pqrs_ = rank4_span_t(G_pqrs_data_.data(), no, no, no, no);
-  for (auto i = 0ul; i < no; ++i)
-    for (auto j = 0ul; j < no; ++j)
-      for (auto k = 0ul; k < no; ++k)
-        for (auto l = 0ul; l < no; ++l) {
-          G_pqrs_(i, j, k, l) -= V(i, l, k, j);
-        }
-
-  // G_red(i,j,k) = G(i,j,k,k) = G(k,k,i,j)
+  std::cout << "Generating integral intermediates for Hamiltonian generator with "
+            << no << " orbitals..." << std::endl;
+  // G_red(i,j,k) = G(i,j,k,k) = V(i,j,k,k) - V(i,k,k,j)
   // V_red(i,j,k) = V(i,j,k,k) = V(k,k,i,j)
   G_red_data_.resize(no3);
   V_red_data_.resize(no3);
@@ -60,11 +50,12 @@ void HamiltonianGeneratorBase<Scalar>::generate_integral_intermediates_(
   for (auto j = 0ul; j < no; ++j)
     for (auto i = 0ul; i < no; ++i)
       for (auto k = 0ul; k < no; ++k) {
-        G_red_(k, i, j) = G_pqrs_(k, k, i, j);
+        G_red_(k, i, j) = V(k, k, i, j) - V(k, j, i, k);
         V_red_(k, i, j) = V(k, k, i, j);
       }
-
+  std::cout << "Finished generating integral intermediates." << std::endl;
   // G2_red(i,j) = 0.5 * G(i,i,j,j)
+  //             = 0.5 * (V(i,i,j,j) - V(i,j,j,i))
   // V2_red(i,j) = V(i,i,j,j)
   G2_red_data_.resize(no2);
   V2_red_data_.resize(no2);
@@ -72,9 +63,10 @@ void HamiltonianGeneratorBase<Scalar>::generate_integral_intermediates_(
   V2_red_ = matrix_span<Scalar>(V2_red_data_.data(), no, no);
   for (auto j = 0ul; j < no; ++j)
     for (auto i = 0ul; i < no; ++i) {
-      G2_red_(i, j) = 0.5 * G_pqrs_(i, i, j, j);
+      G2_red_(i, j) = 0.5 * (V(i, i, j, j) - V(i, j, j, i));
       V2_red_(i, j) = V(i, i, j, j);
     }
+  std::cout << "Finished generating doubly reduced integral intermediates." << std::endl;
 }
 
 template <typename Scalar>
diff --git a/external/macis/tests/asci.cxx b/external/macis/tests/asci.cxx
index f6f2f0495..fe68e0711 100644
--- a/external/macis/tests/asci.cxx
+++ b/external/macis/tests/asci.cxx
@@ -561,7 +561,7 @@ TEST_CASE("ASCI") {
   // ASCI-PT2
   auto EPT2 = macis::asci_pt2_constraint(
       asci_settings, dets.begin(), dets.end(), E0, C, norb, ham_gen.T(),
-      ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), ham_gen.V(),
+      ham_gen.G_red(), ham_gen.V_red(), ham_gen.V(),
       ham_gen MACIS_MPI_CODE(, MPI_COMM_WORLD));
 
   // std::cout << std::scientific << std::setprecision(12);
@@ -808,7 +808,8 @@ TEST_CASE("CoreSelectionStrategy") {
           asci_settings.ntdets_max, std::max<size_t>(100, 8 * dets.size()));
       std::tie(E0, dets, C) = macis::asci_iter<64, int32_t>(
           asci_settings, mcscf_settings, ndets_new, E0, std::move(dets),
-          std::move(C), ham_gen, norb, nullptr MACIS_MPI_CODE(, MPI_COMM_WORLD));
+          std::move(C), ham_gen, norb,
+          nullptr MACIS_MPI_CODE(, MPI_COMM_WORLD));
     }
 
     // Verify we got some determinants and the wavefunction is normalized
@@ -871,14 +872,14 @@ TEST_CASE("CoreSelectionStrategy") {
     // Run one iteration with low threshold (70%)
     auto [E0_low, dets_low, C_low] = macis::asci_iter<64, int32_t>(
         asci_settings_low, mcscf_settings, asci_settings_low.ntdets_max,
-        E0_base, dets_base, C_base, ham_gen,
-        norb, nullptr MACIS_MPI_CODE(, MPI_COMM_WORLD));
+        E0_base, dets_base, C_base, ham_gen, norb,
+        nullptr MACIS_MPI_CODE(, MPI_COMM_WORLD));
 
     // Run one iteration with high threshold (99%)
     auto [E0_high, dets_high, C_high] = macis::asci_iter<64, int32_t>(
         asci_settings_high, mcscf_settings, asci_settings_high.ntdets_max,
-        E0_base, dets_base, C_base, ham_gen,
-        norb, nullptr MACIS_MPI_CODE(, MPI_COMM_WORLD));
+        E0_base, dets_base, C_base, ham_gen, norb,
+        nullptr MACIS_MPI_CODE(, MPI_COMM_WORLD));
 
     // Higher threshold should result in MORE determinants being kept in core
     // and thus potentially finding more new determinants
diff --git a/python/src/pybind11/data/hamiltonian.cpp b/python/src/pybind11/data/hamiltonian.cpp
index 80f1520c6..070bef1ab 100644
--- a/python/src/pybind11/data/hamiltonian.cpp
+++ b/python/src/pybind11/data/hamiltonian.cpp
@@ -243,25 +243,25 @@ Get the type of this container as a string.
   py::class_<CholeskyHamiltonianContainer, HamiltonianContainer,
              py::smart_holder>
       cholesky_container(data, "CholeskyHamiltonianContainer", R"(
-Represents a molecular Hamiltonian using Cholesky-decomposed two-electron integrals.
+Represents a molecular Hamiltonian using Cholesky-decomposed three-center integrals.
 This class stores molecular Hamiltonian data for quantum chemistry calculations,
 specifically designed for active space methods. It contains:
 
 * One-electron integrals (kinetic + nuclear attraction) in MO representation
-* Two-electron integrals (electron-electron repulsion) in MO representation
-* Cholesky vectors approximating the AO two-electron integrals
+* Three-center two-electron integrals (ij|Q) in MO representation
 * Molecular orbital information for the active space
 * Core energy contributions from inactive orbitals and nuclear repulsion
 
+Four-center integrals are lazily computed from three-center integrals on first access.
+
 Examples:
     >>> import numpy as np
     >>> # Create restricted Hamiltonian
     >>> one_body = np.random.rand(4, 4)  # 4 orbitals
-    >>> two_body = np.random.rand(256)   # 4^4 elements
-    >>> ao_cholesky_vectors = np.random.rand(4*4, 10)   # 10 Cholesky vectors
+    >>> three_center = np.random.rand(16, 10)  # (norb^2) x naux
     >>> inactive_fock_matrix = np.random.rand(4, 4)
     >>> container = CholeskyHamiltonianContainer(
-    ...     one_body, two_body, orbitals, 10.5, inactive_fock_matrix, ao_cholesky_vectors
+    ...     one_body, three_center, orbitals, 10.5, inactive_fock_matrix
     ... )
     >>> # Wrap in Hamiltonian interface
     >>> hamiltonian = Hamiltonian(container)
@@ -269,83 +269,133 @@ specifically designed for active space methods. It contains:
 
   // Restricted constructor
   cholesky_container.def(
-      py::init<const Eigen::MatrixXd&, const Eigen::VectorXd&,
+      py::init<const Eigen::MatrixXd&, const Eigen::MatrixXd&,
                std::shared_ptr<Orbitals>, double, const Eigen::MatrixXd&,
-               const Eigen::MatrixXd&, HamiltonianType>(),
+               std::optional<Eigen::MatrixXd>, HamiltonianType>(),
       R"(
 Constructor for restricted active space Hamiltonian with Cholesky-decomposed integrals.
 
 Args:
     one_body_integrals (numpy.ndarray): One-electron integrals matrix [norb x norb]
-    two_body_integrals (numpy.ndarray): Two-electron integrals vector [norb^4]
+    three_center_integrals (numpy.ndarray): Three-center two-electron integrals
+        in MO basis [(norb*norb) x naux]
     orbitals (Orbitals): Molecular orbital data
     core_energy (float): Core energy (nuclear repulsion + inactive orbitals)
     inactive_fock_matrix (numpy.ndarray): Inactive Fock matrix [norb x norb]
-    ao_cholesky_vectors (numpy.ndarray): AO basis Cholesky vectors [norb^2 x nvec]
+    ao_cholesky_vectors (numpy.ndarray or None, optional): AO Cholesky vectors
+        for potential reuse. Defaults to None.
     type (HamiltonianType, optional): Type of Hamiltonian (Hermitian by default)
 
 Examples:
     >>> import numpy as np
     >>> one_body = np.random.rand(4, 4)
-    >>> two_body = np.random.rand(256)  # 4^4 elements
-    >>> ao_cholesky_vecs = np.random.rand(16, 10)  # 16 = 4^2, 10 vectors
+    >>> three_center = np.random.rand(16, 10)  # 16 = 4^2, 10 auxiliary basis functions
     >>> inactive_fock_matrix = np.random.rand(4, 4)
     >>> container = CholeskyHamiltonianContainer(
-    ...     one_body, two_body, orbitals, 10.5, inactive_fock_matrix, ao_cholesky_vectors
+    ...     one_body, three_center, orbitals, 10.5, inactive_fock_matrix
     ... )
 )",
-      py::arg("one_body_integrals"), py::arg("two_body_integrals"),
+      py::arg("one_body_integrals"), py::arg("three_center_integrals"),
       py::arg("orbitals"), py::arg("core_energy"),
-      py::arg("inactive_fock_matrix"), py::arg("ao_cholesky_vectors"),
+      py::arg("inactive_fock_matrix"),
+      py::arg("ao_cholesky_vectors") = py::none(),
       py::arg("type") = HamiltonianType::Hermitian);
 
   // Unrestricted constructor
   cholesky_container.def(
       py::init<const Eigen::MatrixXd&, const Eigen::MatrixXd&,
-               const Eigen::VectorXd&, const Eigen::VectorXd&,
-               const Eigen::VectorXd&, std::shared_ptr<Orbitals>, double,
                const Eigen::MatrixXd&, const Eigen::MatrixXd&,
-               const Eigen::MatrixXd&, HamiltonianType>(),
+               std::shared_ptr<Orbitals>, double, const Eigen::MatrixXd&,
+               const Eigen::MatrixXd&, std::optional<Eigen::MatrixXd>,
+               HamiltonianType>(),
       R"(
 Constructor for unrestricted active space Hamiltonian with Cholesky-decomposed integrals.
 
 Args:
     one_body_integrals_alpha (numpy.ndarray): Alpha one-electron integrals [norb x norb]
     one_body_integrals_beta (numpy.ndarray): Beta one-electron integrals [norb x norb]
-    two_body_integrals_aaaa (numpy.ndarray): Alpha-alpha-alpha-alpha integrals [norb^4]
-    two_body_integrals_aabb (numpy.ndarray): Alpha-beta-alpha-beta integrals [norb^4]
-    two_body_integrals_bbbb (numpy.ndarray): Beta-beta-beta-beta integrals [norb^4]
+    three_center_integrals_aa (numpy.ndarray): Alpha-alpha three-center integrals
+        [(norb*norb) x naux], orbital pair index in row-major order
+    three_center_integrals_bb (numpy.ndarray): Beta-beta three-center integrals
+        [(norb*norb) x naux], orbital pair index in row-major order
     orbitals (Orbitals): Molecular orbital data
     core_energy (float): Core energy (nuclear repulsion + inactive orbitals)
     inactive_fock_matrix_alpha (numpy.ndarray): Alpha inactive Fock matrix [norb x norb]
     inactive_fock_matrix_beta (numpy.ndarray): Beta inactive Fock matrix [norb x norb]
-    ao_cholesky_vectors (numpy.ndarray): AO basis Cholesky vectors [norb^2 x nvec]
+    ao_cholesky_vectors (numpy.ndarray or None, optional): AO Cholesky vectors
+        for potential reuse. Defaults to None.
     type (HamiltonianType, optional): Type of Hamiltonian (Hermitian by default)
 
 Examples:
     >>> import numpy as np
     >>> one_body_a = np.random.rand(4, 4)
     >>> one_body_b = np.random.rand(4, 4)
-    >>> two_body_aaaa = np.random.rand(256)
-    >>> two_body_aabb = np.random.rand(256)
-    >>> two_body_bbbb = np.random.rand(256)
-    >>> cholesky_vecs = np.random.rand(16, 10)
+    >>> three_center_aa = np.random.rand(16, 10)  # 16 = 4^2, 10 aux
+    >>> three_center_bb = np.random.rand(16, 10)
     >>> fock_a = np.random.rand(4, 4)
     >>> fock_b = np.random.rand(4, 4)
     >>> container = CholeskyHamiltonianContainer(
     ...     one_body_a, one_body_b,
-    ...     two_body_aaaa, two_body_aabb, two_body_bbbb,
-    ...     orbitals, 10.5, fock_a, fock_b, cholesky_vecs
+    ...     three_center_aa, three_center_bb,
+    ...     orbitals, 10.5, fock_a, fock_b
     ... )
 )",
       py::arg("one_body_integrals_alpha"), py::arg("one_body_integrals_beta"),
-      py::arg("two_body_integrals_aaaa"), py::arg("two_body_integrals_aabb"),
-      py::arg("two_body_integrals_bbbb"), py::arg("orbitals"),
+      py::arg("three_center_integrals_aa"),
+      py::arg("three_center_integrals_bb"), py::arg("orbitals"),
       py::arg("core_energy"), py::arg("inactive_fock_matrix_alpha"),
-      py::arg("inactive_fock_matrix_beta"), py::arg("ao_cholesky_vectors"),
+      py::arg("inactive_fock_matrix_beta"),
+      py::arg("ao_cholesky_vectors") = py::none(),
       py::arg("type") = HamiltonianType::Hermitian);
 
-  // Two-body integral access
+  // Three-center integral access
+  bind_getter_as_property(
+      cholesky_container, "get_three_center_integrals",
+      &CholeskyHamiltonianContainer::get_three_center_integrals,
+      R"(
+Get three-center integrals in MO basis for all spin channels.
+
+Returns:
+    tuple[numpy.ndarray, numpy.ndarray]: Pair of (aa, bb) three-center
+    two-electron integral matrices, each of dimension [(norb*norb) x naux]
+    with the orbital pair index stored in row-major order.
+
+Raises:
+    RuntimeError: If three-center integrals are not set
+)",
+      py::return_value_policy::reference_internal);
+
+  // AO Cholesky vectors access
+  cholesky_container.def(
+      "get_ao_cholesky_vectors",
+      [](const CholeskyHamiltonianContainer& self) -> py::object {
+        const auto& opt = self.get_ao_cholesky_vectors();
+        if (!opt) return py::none();
+        const Eigen::MatrixXd& mat = *opt;
+        // Return a zero-copy view. We tie lifetime to self via
+        // py::return_value_policy semantics by passing a capsule that
+        // prevents GC.  The reference is valid as long as the container
+        // (and thus `self`) is alive; pybind11's prevent-gc mechanism
+        // handles that through the `self` capture in the keep-alive.
+        return py::array_t<double>(
+            {mat.rows(), mat.cols()},  // shape
+            {static_cast<py::ssize_t>(sizeof(double)),
+             static_cast<py::ssize_t>(mat.rows()) *
+                 static_cast<py::ssize_t>(sizeof(double))},  // strides
+                                                             // (col-major)
+            mat.data(),                                      // data pointer
+            py::cast(self));  // prevent GC of self while array is alive
+      },
+      R"(
+Get the optional AO Cholesky vectors (zero-copy view).
+
+Returns:
+    numpy.ndarray or None: AO Cholesky vectors matrix [nao^2 x nchol],
+    or None if not stored. The returned array shares memory with the
+    internal storage and should not be modified.
+)");
+
+  // Two-body integral access (lazily computed from three-center integrals)
   bind_getter_as_property(cholesky_container, "get_two_body_integrals",
                           &CholeskyHamiltonianContainer::get_two_body_integrals,
                           R"(
diff --git a/python/src/qdk_chemistry/plugins/qiskit/_interop/qir.py b/python/src/qdk_chemistry/plugins/qiskit/_interop/qir.py
index c30ddcfa6..26d0525e8 100644
--- a/python/src/qdk_chemistry/plugins/qiskit/_interop/qir.py
+++ b/python/src/qdk_chemistry/plugins/qiskit/_interop/qir.py
@@ -72,7 +72,11 @@ def _qubit(self, q: pyqir.Value) -> int:
             The index of the qubit in the Qiskit circuit.
 
         """
-        return pyqir.qubit_id(q)
+        # Use `qubit_id` on older pyqir instances
+        if hasattr(pyqir, "qubit_id"):
+            return pyqir.qubit_id(q)
+        # Newer pyqir versions use the more generic `ptr_id` for both qubits and results
+        return pyqir.ptr_id(q)
 
     def _clbit(self, r: pyqir.Value) -> int:
         """Get circuit classical bit index for a QIR result value.
@@ -84,7 +88,11 @@ def _clbit(self, r: pyqir.Value) -> int:
             The index of the classical bit in the Qiskit circuit.
 
         """
-        return pyqir.result_id(r)
+        # Use `result_id` on older pyqir instances
+        if hasattr(pyqir, "result_id"):
+            return pyqir.result_id(r)
+        # Newer pyqir versions use the more generic `ptr_id` for both qubits and results
+        return pyqir.ptr_id(r)
 
     def _angle(self, a: pyqir.Value) -> float:
         """Extract angle value from a QIR constant.
diff --git a/python/tests/test_hamiltonian.py b/python/tests/test_hamiltonian.py
index a9d7c6642..195414fd3 100644
--- a/python/tests/test_hamiltonian.py
+++ b/python/tests/test_hamiltonian.py
@@ -607,12 +607,11 @@ def test_default_constructor(self):
         """Test construction of Cholesky Hamiltonian."""
         one_body = np.eye(2)
         rng = np.random.default_rng(0)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))  # 4 = 2^2 AOs, 10 vectors
         coeffs = np.array([[1.0, 0.0], [0.0, 1.0]])
         orbitals = Orbitals(coeffs, None, None, create_test_basis_set(2))
 
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.5, np.array([])))
         assert isinstance(h, Hamiltonian)
         assert h.has_one_body_integrals()
         assert h.has_two_body_integrals()
@@ -622,23 +621,21 @@ def test_size_and_electron_counts(self):
         """Test Hamiltonian size properties."""
         one_body = np.eye(3)
         rng = np.random.default_rng(1)
-        two_body = rng.random(3**4)
         cholesky_vecs = rng.random((9, 15))  # 9 = 3^2 AOs, 15 vectors
         orbitals = create_test_orbitals(3)
 
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 0.0, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 0.0, np.array([])))
         assert h.get_orbitals().get_num_molecular_orbitals() == 3
 
     def test_full_constructor(self):
         """Test full constructor with all parameters."""
         one_body = np.array([[1.0, 0.5], [0.5, 2.0]])
         rng = np.random.default_rng(0)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         coeffs = np.array([[1.0, 0.0], [0.0, 1.0]])
         orbitals = Orbitals(coeffs, None, None, create_test_basis_set(2))
 
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.5, np.array([])))
         assert h.has_one_body_integrals()
         assert h.has_two_body_integrals()
         assert h.has_orbitals()
@@ -649,18 +646,12 @@ def test_full_constructor(self):
         assert np.array_equal(aa, one_body)
         assert np.array_equal(bb, one_body)
 
-        aaaa, aabb, bbbb = h.get_two_body_integrals()
-        assert np.array_equal(aaaa, two_body)
-        assert np.array_equal(aabb, two_body)
-        assert np.array_equal(bbbb, two_body)
-
     def test_one_body_integrals(self):
         """Test one-body integral access."""
         one_body = np.array([[1.0, 0.2], [0.2, 1.5]])
-        two_body = np.zeros(2**4)
         cholesky_vecs = np.random.default_rng(2).random((4, 8))
         orbitals = create_test_orbitals(2)
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 0.0, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 0.0, np.array([])))
         assert h.has_one_body_integrals()
         assert np.array_equal(h.get_one_body_integrals()[0], one_body)
 
@@ -668,47 +659,37 @@ def test_two_body_integrals(self):
         """Test two-body integral access."""
         one_body = np.eye(2)
         rng = np.random.default_rng(1)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         orbitals = create_test_orbitals(2)
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 0.0, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 0.0, np.array([])))
         assert h.has_two_body_integrals()
-        # get_two_body_integrals returns (aaaa, aabb, bbbb) tuple
-        aaaa, aabb, bbbb = h.get_two_body_integrals()
-        # For restricted case, all should be the same and equal to two_body
-        assert np.array_equal(aaaa, two_body)
-        assert np.array_equal(aabb, two_body)
-        assert np.array_equal(bbbb, two_body)
 
     def test_two_body_element_access(self):
         """Test individual two-body element access."""
         one_body = np.eye(2)
         rng = np.random.default_rng(3)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         orbitals = create_test_orbitals(2)
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 0.0, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 0.0, np.array([])))
         val = h.get_two_body_element(0, 1, 1, 0)
         assert isinstance(val, float)
 
     def test_active_space_management(self):
         """Test core energy handling."""
         one_body = np.eye(3)
-        two_body = np.zeros(3**4)
         cholesky_vecs = np.random.default_rng(4).random((9, 15))
         orbitals = create_test_orbitals(3)
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 2.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 2.5, np.array([])))
         assert h.get_core_energy() == 2.5
 
     def test_json_serialization(self):
         """Test JSON serialization and deserialization."""
         one_body = np.array([[1.0, 0.5], [0.5, 2.0]])
         rng = np.random.default_rng(42)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         coeffs = np.array([[1.0, 0.0], [0.0, 1.0]])
         orbitals = Orbitals(coeffs, None, None, create_test_basis_set(2))
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.5, np.array([])))
 
         data = json.loads(h.to_json())
         assert isinstance(data, dict)
@@ -761,11 +742,10 @@ def test_json_file_io(self):
         """Test JSON file I/O."""
         one_body = np.array([[1.0, 0.5], [0.5, 2.0]])
         rng = np.random.default_rng(42)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         coeffs = np.array([[1.0, 0.0], [0.0, 1.0]])
         orbitals = Orbitals(coeffs, None, None, create_test_basis_set(2))
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.5, np.array([])))
 
         with tempfile.NamedTemporaryFile(suffix=".hamiltonian.json", delete=False) as f:
             filename = f.name
@@ -818,11 +798,10 @@ def test_hdf5_file_io(self):
         """Test HDF5 file I/O."""
         one_body = np.array([[1.0, 0.5], [0.5, 2.0]])
         rng = np.random.default_rng(42)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         coeffs = np.array([[1.0, 0.0], [0.0, 1.0]])
         orbitals = Orbitals(coeffs, None, None, create_test_basis_set(2))
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.5, np.array([])))
 
         with tempfile.NamedTemporaryFile(suffix=".hamiltonian.h5", delete=False) as f:
             filename = f.name
@@ -875,11 +854,10 @@ def test_generic_file_io(self):
         """Test generic file I/O with format specification."""
         one_body = np.array([[1.0, 0.5], [0.5, 2.0]])
         rng = np.random.default_rng(42)
-        two_body = rng.random(2**4)
         cholesky_vecs = rng.random((4, 10))
         coeffs = np.array([[1.0, 0.0], [0.0, 1.0]])
         orbitals = Orbitals(coeffs, None, None, create_test_basis_set(2))
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.5, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.5, np.array([])))
 
         # Test JSON format
         with tempfile.NamedTemporaryFile(suffix=".hamiltonian.json", delete=False) as f:
@@ -919,10 +897,9 @@ def test_pickling_hamiltonian(self):
         """Test that Cholesky Hamiltonian can be pickled and unpickled correctly."""
         one_body = np.eye(3)
         rng = np.random.default_rng(5)
-        two_body = rng.random(3**4)
         cholesky_vecs = rng.random((9, 15))
         orbitals = create_test_orbitals(3)
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 0.0, np.array([]), cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 0.0, np.array([])))
 
         # Test pickling round-trip
         pickled_data = pickle.dumps(h)
@@ -967,11 +944,10 @@ def test_restricted_hamiltonian_construction(self):
         rng = np.random.default_rng(42)
         one_body = rng.random((3, 3))
         one_body = 0.5 * (one_body + one_body.T)  # Make symmetric
-        two_body = rng.random(3**4)
         cholesky_vecs = rng.random((9, 15))
         inactive_fock = rng.random((3, 3))
 
-        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, two_body, orbitals, 1.0, inactive_fock, cholesky_vecs))
+        h = Hamiltonian(CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals, 1.0, inactive_fock))
 
         # Verify Hamiltonian properties
         assert h.is_restricted()
@@ -985,11 +961,6 @@ def test_restricted_hamiltonian_construction(self):
         assert np.array_equal(h.get_one_body_integrals()[0], one_body)
         assert np.array_equal(h.get_one_body_integrals()[1], one_body)
 
-        aaaa, aabb, bbbb = h.get_two_body_integrals()
-        assert np.array_equal(aaaa, two_body)
-        assert np.array_equal(aabb, two_body)
-        assert np.array_equal(bbbb, two_body)
-
     def test_unrestricted_hamiltonian_construction(self):
         """Test unrestricted Cholesky Hamiltonian construction and properties."""
         # Create unrestricted orbitals with different alpha/beta coefficients
@@ -1006,10 +977,8 @@ def test_unrestricted_hamiltonian_construction(self):
         rng = np.random.default_rng(123)
         one_body_alpha = np.array([[1.0, 0.2], [0.2, 1.5]])
         one_body_beta = np.array([[1.1, 0.3], [0.3, 1.6]])
-        two_body_aaaa = rng.random(2**4)
-        two_body_aabb = rng.random(2**4)
-        two_body_bbbb = rng.random(2**4)
-        cholesky_vecs = rng.random((4, 10))
+        cholesky_vecs_aa = rng.random((4, 10))
+        cholesky_vecs_bb = rng.random((4, 10))
         inactive_fock_alpha = np.array([[0.5, 0.1], [0.1, 0.7]])
         inactive_fock_beta = np.array([[0.6, 0.2], [0.2, 0.8]])
 
@@ -1017,14 +986,12 @@ def test_unrestricted_hamiltonian_construction(self):
             CholeskyHamiltonianContainer(
                 one_body_alpha,
                 one_body_beta,
-                two_body_aaaa,
-                two_body_aabb,
-                two_body_bbbb,
+                cholesky_vecs_aa,
+                cholesky_vecs_bb,
                 orbitals,
                 2.0,
                 inactive_fock_alpha,
                 inactive_fock_beta,
-                cholesky_vecs,
             )
         )
 
@@ -1039,10 +1006,6 @@ def test_unrestricted_hamiltonian_construction(self):
         # Verify separate alpha/beta integral access
         assert np.array_equal(h.get_one_body_integrals()[0], one_body_alpha)
         assert np.array_equal(h.get_one_body_integrals()[1], one_body_beta)
-        aaaa, aabb, bbbb = h.get_two_body_integrals()
-        assert np.array_equal(aaaa, two_body_aaaa)
-        assert np.array_equal(aabb, two_body_aabb)
-        assert np.array_equal(bbbb, two_body_bbbb)
 
     def test_unrestricted_vs_restricted_serialization(self):
         """Test that restricted/unrestricted nature is preserved in serialization."""
@@ -1052,10 +1015,9 @@ def test_unrestricted_vs_restricted_serialization(self):
         orbitals_restricted = Orbitals(coeffs, None, None, basis_set)
 
         one_body = np.array([[1.0, 0.1], [0.1, 1.0]])
-        two_body = np.ones(16) * 0.5
         cholesky_vecs = np.random.default_rng(6).random((4, 8))
         h_restricted = Hamiltonian(
-            CholeskyHamiltonianContainer(one_body, two_body, orbitals_restricted, 1.0, np.eye(2), cholesky_vecs)
+            CholeskyHamiltonianContainer(one_body, cholesky_vecs, orbitals_restricted, 1.0, np.eye(2))
         )
 
         # Test unrestricted Hamiltonian
@@ -1065,22 +1027,18 @@ def test_unrestricted_vs_restricted_serialization(self):
 
         one_body_alpha = np.array([[1.0, 0.1], [0.1, 1.0]])
         one_body_beta = np.array([[1.1, 0.2], [0.2, 1.1]])
-        two_body_aaaa = np.ones(16) * 1.0
-        two_body_aabb = np.ones(16) * 2.0
-        two_body_bbbb = np.ones(16) * 3.0
-        cholesky_vecs_unres = np.random.default_rng(7).random((4, 8))
+        cholesky_vecs_aa = np.random.default_rng(7).random((4, 8))
+        cholesky_vecs_bb = np.random.default_rng(7).random((4, 8))
         h_unrestricted = Hamiltonian(
             CholeskyHamiltonianContainer(
                 one_body_alpha,
                 one_body_beta,
-                two_body_aaaa,
-                two_body_aabb,
-                two_body_bbbb,
+                cholesky_vecs_aa,
+                cholesky_vecs_bb,
                 orbitals_unrestricted,
                 2.0,
                 np.eye(2),
                 np.eye(2),
-                cholesky_vecs_unres,
             )
         )
 
@@ -1103,6 +1061,56 @@ def test_unrestricted_vs_restricted_serialization(self):
             h_unrestricted.get_one_body_integrals()[1], h_unrestricted_json.get_one_body_integrals()[1]
         )
 
+    def test_ao_cholesky_vectors_absent_by_default(self):
+        """Test that AO Cholesky vectors are None when not provided."""
+        one_body = np.eye(2)
+        rng = np.random.default_rng(20)
+        three_center = rng.random((4, 10))
+        orbitals = create_test_orbitals(2)
+
+        container = CholeskyHamiltonianContainer(one_body, three_center, orbitals, 1.0, np.array([]))
+        assert container.get_ao_cholesky_vectors() is None
+
+    def test_ao_cholesky_vectors_present(self):
+        """Test construction with AO Cholesky vectors and retrieval."""
+        one_body = np.eye(2)
+        rng = np.random.default_rng(21)
+        three_center = rng.random((4, 10))
+        ao_vecs = rng.random((9, 5))  # e.g. 3^2 AOs, 5 Cholesky vectors
+        orbitals = create_test_orbitals(2)
+
+        container = CholeskyHamiltonianContainer(
+            one_body, three_center, orbitals, 1.0, np.array([]), ao_cholesky_vectors=ao_vecs
+        )
+        result = container.get_ao_cholesky_vectors()
+        assert result is not None
+        np.testing.assert_array_almost_equal(result, ao_vecs)
+
+    def test_ao_cholesky_vectors_roundtrip_json(self):
+        """Test that AO Cholesky vectors survive JSON round-trip."""
+        one_body = np.eye(2)
+        rng = np.random.default_rng(22)
+        three_center = rng.random((4, 10))
+        ao_vecs = rng.random((9, 5))
+        orbitals = create_test_orbitals(2)
+
+        container = CholeskyHamiltonianContainer(
+            one_body, three_center, orbitals, 1.0, np.array([]), ao_cholesky_vectors=ao_vecs
+        )
+        h = Hamiltonian(container)
+
+        h_roundtrip = Hamiltonian.from_json(h.to_json())
+        assert h_roundtrip.get_container_type() == "cholesky"
+
+        roundtrip_data = json.loads(h_roundtrip.to_json())
+        assert "ao_cholesky_vectors" in roundtrip_data["container"]
+        assert np.allclose(
+            np.array(roundtrip_data["container"]["ao_cholesky_vectors"]),
+            ao_vecs,
+            rtol=float_comparison_relative_tolerance,
+            atol=float_comparison_absolute_tolerance,
+        )
+
 
 def test_hamiltonian_data_type_name():
     """Test that Hamiltonian has the correct _data_type_name class attribute."""