diff --git a/gmm/gmm.py b/gmm/gmm.py
index 2880152..8bb58cc 100644
--- a/gmm/gmm.py
+++ b/gmm/gmm.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Callable, Optional, Union, Literal
+from typing import Callable, Optional, Union, Literal, Dict, Tuple
 
 import numpy as np
 import pandas as pd
@@ -7,6 +7,10 @@
 import torch
 import torchmin
 
+import pyensmallen as pe
+from joblib import Parallel, delayed, parallel_backend
+import time
+
 
 class GMMEstimator(ABC):
     """Abstract base class for GMM estimators."""
@@ -52,43 +56,115 @@ def fit(
         verbose: bool = False,
         fit_method: Optional[str] = None,
         iid: bool = True,
+        bootstrap: bool = False,
+        n_bootstrap: int = 1000,
     ) -> None:
         pass
 
+    @abstractmethod
+    def bootstrap_se(
+        self, n_bootstrap: int = 1000, seed: Optional[int] = None
+    ) -> np.ndarray:
+        """
+        Compute bootstrap standard errors using the linear approximation method.
+
+        Args:
+            n_bootstrap: Number of bootstrap iterations
+            seed: Random seed for reproducibility
+
+        Returns:
+            Bootstrap standard errors for each parameter
+        """
+        pass
+
     @abstractmethod
     def jacobian_moment_cond(self) -> np.ndarray:
         pass
 
-    def summary(self, prec: int = 4, alpha: float = 0.05) -> pd.DataFrame:
-        if not hasattr(self, "theta_") and not hasattr(self, "std_errors_"):
+    def summary(
+        self, prec: int = 4, alpha: float = 0.05, method: str = "asymptotic"
+    ) -> pd.DataFrame:
+        """
+        Generate summary statistics for the fitted model.
+
+        Args:
+            prec: Precision for rounding results
+            alpha: Significance level for confidence intervals
+            method: Method for calculating standard errors, either "asymptotic" or "bootstrap"
+
+        Returns:
+            DataFrame with model summary statistics
+        """
+        if not hasattr(self, "theta_"):
             raise ValueError(
                 "Estimator not fitted yet. Make sure you call `fit()` before `summary()`."
             )
+
+        # Determine which standard errors to use
+        if method == "bootstrap":
+            if not hasattr(self, "bootstrap_std_errors_"):
+                raise ValueError(
+                    "Bootstrap standard errors not available. Call fit() with bootstrap=True or "
+                    "manually call bootstrap_se() before calling summary with method='bootstrap'."
+                )
+            std_errors = self.bootstrap_std_errors_
+        else:  # Default to asymptotic
+            if not hasattr(self, "std_errors_"):
+                raise ValueError(
+                    "Asymptotic standard errors not available. There might have been an error during estimation."
+                )
+            std_errors = self.std_errors_
+
         return pd.DataFrame(
             {
                 "coef": np.round(self.theta_, prec),
-                "std err": np.round(self.std_errors_, prec),
-                "t": np.round(self.theta_ / self.std_errors_, prec),
+                "std err": np.round(std_errors, prec),
+                "t": np.round(self.theta_ / std_errors, prec),
                 "p-value": np.round(
-                    2
-                    * (
-                        1 - scipy.stats.norm.cdf(np.abs(self.theta_ / self.std_errors_))
-                    ),
+                    2 * (1 - scipy.stats.norm.cdf(np.abs(self.theta_ / std_errors))),
                     prec,
                 ),
-                f"[{alpha/2}": np.round(
-                    self.theta_
-                    - scipy.stats.norm.ppf(1 - alpha / 2) * self.std_errors_,
+                f"[{alpha / 2}": np.round(
+                    self.theta_ - scipy.stats.norm.ppf(1 - alpha / 2) * std_errors,
                     prec,
                 ),
-                f"{1 - alpha/2}]": np.round(
-                    self.theta_
-                    + scipy.stats.norm.ppf(1 - alpha / 2) * self.std_errors_,
+                f"{1 - alpha / 2}]": np.round(
+                    self.theta_ + scipy.stats.norm.ppf(1 - alpha / 2) * std_errors,
                     prec,
                 ),
             }
         )
 
+    def compare_se_methods(self, prec: int = 4) -> pd.DataFrame:
+        """
+        Compare asymptotic and bootstrap standard errors side by side.
+
+        Args:
+            prec: Precision for rounding results
+
+        Returns:
+            DataFrame comparing asymptotic and bootstrap standard errors
+        """
+        if not hasattr(self, "theta_") or not hasattr(self, "std_errors_"):
+            raise ValueError("Estimator not fitted with asymptotic standard errors.")
+
+        if not hasattr(self, "bootstrap_std_errors_"):
+            raise ValueError(
+                "Bootstrap standard errors not available. Call bootstrap_se() first."
+            )
+
+        return pd.DataFrame(
+            {
+                "parameter": [f"beta_{i}" for i in range(len(self.theta_))],
+                "estimate": np.round(self.theta_, prec),
+                "asymptotic_se": np.round(self.std_errors_, prec),
+                "bootstrap_se": np.round(self.bootstrap_std_errors_, prec),
+                "se_ratio": np.round(
+                    self.bootstrap_std_errors_ / self.std_errors_, prec
+                ),
+            }
+        )
+
 
 class GMMEstimatorScipy(GMMEstimator):
     """Class to create GMM estimator using scipy"""
@@ -132,6 +208,8 @@ def fit(
         verbose: bool = False,
         fit_method: Optional[str] = None,
         iid: bool = True,
+        bootstrap: bool = False,
+        n_bootstrap: int = 1000,
     ) -> None:
         if fit_method is None:
             fit_method = "L-BFGS-B"
@@ -144,6 +222,8 @@ def fit(
             options={"disp": verbose},
         )
         self.theta_ = result.x
+
+        # Calculate asymptotic standard errors
         try:
             self.Gamma_ = self.jacobian_moment_cond()
             if iid:
@@ -158,14 +238,85 @@ def fit(
                     @ self.Gamma_
                     @ np.linalg.inv(self.Gamma_.T @ self.W_ @ self.Gamma_)
                 )
-            self.std_errors_ = np.sqrt(self.n_ * np.diag(self.vtheta_))
-        except:
+            self.std_errors_ = np.sqrt(np.diag(self.vtheta_) / self.n_)
+        except Exception as e:
+            if verbose:
+                print(f"Error computing asymptotic standard errors: {e}")
             self.std_errors_ = None
 
+        # Calculate bootstrap standard errors if requested
+        if bootstrap:
+            try:
+                self.bootstrap_std_errors_ = self.bootstrap_se(n_bootstrap=n_bootstrap)
+            except Exception as e:
+                if verbose:
+                    print(f"Error computing bootstrap standard errors: {e}")
+                self.bootstrap_std_errors_ = None
+
     def jacobian_moment_cond(self) -> np.ndarray:
         self.jac_est_ = -self.z_.T @ self.x_
         return self.jac_est_
 
+    def bootstrap_se(
+        self, n_bootstrap: int = 1000, seed: Optional[int] = None
+    ) -> np.ndarray:
+        """
+        Compute bootstrap standard errors using the linear approximation method.
+
+        Args:
+            n_bootstrap: Number of bootstrap iterations
+            seed: Random seed for reproducibility
+
+        Returns:
+            Bootstrap standard errors for each parameter
+        """
+        if not hasattr(self, "theta_"):
+            raise ValueError("Model must be fitted first")
+
+        # Set random seed if provided
+        if seed is not None:
+            np.random.seed(seed)
+
+        n = self.n_  # Sample size
+
+        # influence function: \psi = − (G'WG)^{-1} G W g (z_t , theta_0)
+        # φ (z, τ ) = D (τ)^{-1} m (z, τ) ; D(τ) = D(W) = G'W G; m(z, τ) = G'W g(z, theta_0)
+
+        # Step 1: Calculate the matrix M = -(G'WG)^(-1)G'W
+        G = self.jacobian_moment_cond()  # Jacobian of moment conditions
+        M = -np.linalg.inv(G.T @ self.W_ @ G) @ G.T @ self.W_
+
+        # Step 2: Calculate the individual moment conditions for each observation
+        # For the IV case: z_i * (y_i - x_i @ theta)
+        residuals = self.y_ - self.x_ @ self.theta_
+        individual_moments = self.z_ * residuals[:, np.newaxis]
+
+        # Mean of the moments
+        mean_moments = np.mean(individual_moments, axis=0)
+
+        # Step 3: Generate bootstrap samples and calculate theta*
+        bootstrap_thetas = np.zeros((n_bootstrap, len(self.theta_)))
+
+        for b in range(n_bootstrap):
+            # Draw a bootstrap sample with replacement
+            indices = np.random.choice(n, n, replace=True)
+
+            # Calculate the mean of the bootstrap sample moments
+            bootstrap_moments = np.mean(individual_moments[indices], axis=0)
+
+            # Center the bootstrap moments
+            centered_moments = bootstrap_moments - mean_moments
+
+            # Calculate theta* using the linear approximation
+            # sqrt(n)(θ̂* - θ̂) = M * sqrt(n) * (Z* - Z̄)
+            # Therefore: θ̂* = θ̂ + M @ (Z* - Z̄)
+            bootstrap_thetas[b] = self.theta_ + M @ centered_moments
+
+        # Step 4: Calculate bootstrap standard errors
+        bootstrap_se = np.std(bootstrap_thetas, axis=0)
+
+        return bootstrap_se
+
     @staticmethod
     def iv_moment(
         z: np.ndarray, y: np.ndarray, x: np.ndarray, beta: np.ndarray
@@ -218,6 +369,8 @@ def fit(
         verbose: bool = False,
         fit_method: Optional[str] = None,
         iid: bool = True,
+        bootstrap: bool = False,
+        n_bootstrap: int = 1000,
     ) -> None:
         if fit_method is None:
             fit_method = "l-bfgs"
@@ -235,6 +388,8 @@ def fit(
         )
         self.W_ = self.W_.detach().numpy()
         self.theta_ = result.x.detach().numpy()
+
+        # Calculate asymptotic standard errors
         try:
             self.Omega_ = np.linalg.inv(self.W_)
             self.Gamma_ = self.jacobian_moment_cond()
@@ -250,10 +405,21 @@ def fit(
                     @ self.Gamma_
                     @ np.linalg.inv(self.Gamma_.T @ self.W_ @ self.Gamma_)
                 )
-            self.std_errors_ = np.sqrt(self.n_ * np.diag(self.vtheta_))
-        except:
+            self.std_errors_ = np.sqrt(np.diag(self.vtheta_) / self.n_)
+        except Exception as e:
+            if verbose:
+                print(f"Error computing asymptotic standard errors: {e}")
             self.std_errors_ = None
 
+        # Calculate bootstrap standard errors if requested
+        if bootstrap:
+            try:
+                self.bootstrap_std_errors_ = self.bootstrap_se(n_bootstrap=n_bootstrap)
+            except Exception as e:
+                if verbose:
+                    print(f"Error computing bootstrap standard errors: {e}")
+                self.bootstrap_std_errors_ = None
+
     def jacobian_moment_cond(self) -> np.ndarray:
         self.jac_ = torch.func.jacfwd(self.moment_cond, argnums=3)
         self.jac_est_ = (
@@ -264,6 +430,68 @@ def jacobian_moment_cond(self) -> np.ndarray:
         )
         return self.jac_est_
 
+    def bootstrap_se(
+        self, n_bootstrap: int = 1000, seed: Optional[int] = None
+    ) -> np.ndarray:
+        """
+        Compute bootstrap standard errors using the linear approximation method.
+
+        Args:
+            n_bootstrap: Number of bootstrap iterations
+            seed: Random seed for reproducibility
+
+        Returns:
+            Bootstrap standard errors for each parameter
+        """
+        if not hasattr(self, "theta_"):
+            raise ValueError("Model must be fitted first")
+
+        # Set random seed if provided
+        if seed is not None:
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+
+        n = self.n_  # Sample size
+
+        # Step 1: Calculate the matrix M = -(G'WG)^(-1)G'W
+        G = self.jacobian_moment_cond()  # Jacobian of moment conditions
+        M = -np.linalg.inv(G.T @ self.W_ @ G) @ G.T @ self.W_
+
+        # Step 2: Calculate the individual moment conditions for each observation
+        # Convert theta to tensor for moment calculation
+        theta_tensor = torch.tensor(self.theta_, dtype=torch.float64)
+
+        # For the IV case, calculate residuals
+        residuals = (self.y_ - self.x_ @ theta_tensor).detach().numpy()
+
+        # Calculate individual moments (converting torch tensors to numpy)
+        z_np = self.z_.detach().numpy()
+        individual_moments = z_np * residuals[:, np.newaxis]
+
+        # Mean of the moments
+        mean_moments = np.mean(individual_moments, axis=0)
+
+        # Step 3: Generate bootstrap samples and calculate theta*
+        bootstrap_thetas = np.zeros((n_bootstrap, len(self.theta_)))
+
+        for b in range(n_bootstrap):
+            # Draw a bootstrap sample with replacement
+            indices = np.random.choice(n, n, replace=True)
+
+            # Calculate the mean of the bootstrap sample moments
+            bootstrap_moments = np.mean(individual_moments[indices], axis=0)
+
+            # Center the bootstrap moments
+            centered_moments = bootstrap_moments - mean_moments
+
+            # Calculate theta* using the linear approximation
+            bootstrap_thetas[b] = self.theta_ + M @ centered_moments
+
+        # Step 4: Calculate bootstrap standard errors
+        bootstrap_se = np.std(bootstrap_thetas, axis=0)
+
+        return bootstrap_se
+
     @staticmethod
     def iv_moment(
         z: torch.Tensor, y: torch.Tensor, x: torch.Tensor, beta: torch.Tensor
@@ -271,7 +499,248 @@ def iv_moment(
         return z * (y - x @ beta).unsqueeze(-1)
 
 
+class GMMEstimatorPyEnsmallen(GMMEstimator):
+    """Class to create GMM estimator using PyEnsmallen"""
+
+    def __init__(
+        self,
+        moment_cond: Callable,
+        weighting_matrix: Union[str, np.ndarray] = "optimal",
+        backend: str = "pyensmallen",
+    ):
+        super().__init__(moment_cond, weighting_matrix, backend)
+        self.z_: Optional[np.ndarray] = None
+        self.y_: Optional[np.ndarray] = None
+        self.x_: Optional[np.ndarray] = None
+        self.n_: Optional[int] = None
+        self.k_: Optional[int] = None
+        self.W_: Optional[np.ndarray] = None
+        self.theta_: Optional[np.ndarray] = None
+        self.Gamma_: Optional[np.ndarray] = None
+        self.vtheta_: Optional[np.ndarray] = None
+        self.std_errors_: Optional[np.ndarray] = None
+        self.Omega_: Optional[np.ndarray] = None
+        self.bootstrap_thetas_: Optional[np.ndarray] = None
+        self.bootstrap_std_errors_: Optional[np.ndarray] = None
+
+    def gmm_objective(self, beta: np.ndarray, gradient: np.ndarray) -> float:
+        """
+        Compute GMM objective function value and gradient.
+        This follows PyEnsmallen's expected interface where the gradient is filled in-place.
+
+        Args:
+            beta: Parameter vector
+            gradient: Will be filled with gradient values
+
+        Returns:
+            Objective function value
+        """
+        moments = self.moment_cond(self.z_, self.y_, self.x_, beta)
+
+        if self.weighting_matrix == "optimal":
+            self.W_ = self.optimal_weighting_matrix(moments)
+        elif isinstance(self.weighting_matrix, str):
+            self.W_ = np.eye(moments.shape[1])
+        elif self.W_ is None:
+            self.W_ = np.asarray(self.weighting_matrix)
+
+        mavg = moments.mean(axis=0)
+        obj_value = float(mavg.T @ self.W_ @ mavg)
+
+        # Calculate the gradient
+        # For the generic case, we use numerical differentiation
+        # This could be replaced with analytical gradients for specific moment conditions
+        h = 1e-8  # Small step size
+        for i in range(len(beta)):
+            beta_plus = beta.copy()
+            beta_plus[i] += h
+            moments_plus = self.moment_cond(self.z_, self.y_, self.x_, beta_plus)
+            mavg_plus = moments_plus.mean(axis=0)
+            obj_plus = float(mavg_plus.T @ self.W_ @ mavg_plus)
+            gradient[i] = (obj_plus - obj_value) / h
+
+        return obj_value
+
+    def optimal_weighting_matrix(self, moments: np.ndarray) -> np.ndarray:
+        """Calculate optimal weighting matrix: (E[g_i g_i'])^(-1)"""
+        return np.linalg.inv((1 / self.n_) * (moments.T @ moments))
+
+    def fit(
+        self,
+        z: np.ndarray,
+        y: np.ndarray,
+        x: np.ndarray,
+        verbose: bool = False,
+        fit_method: Optional[str] = None,
+        iid: bool = True,
+    ) -> None:
+        """
+        Fit the GMM model using PyEnsmallen optimizer.
+
+        Args:
+            z: Instrument matrix
+            y: Outcome vector
+            x: Covariate matrix (including intercept)
+            verbose: Whether to print optimization details
+            fit_method: Specific optimization method (ignored, uses L-BFGS)
+            iid: Whether to assume IID errors for standard error calculation
+        """
+        # Store data
+        self.z_, self.y_, self.x_ = z, y, x
+        self.n_, self.k_ = x.shape
+
+        # Initialize optimizer (PyEnsmallen uses L-BFGS by default)
+        optimizer = pe.L_BFGS()
+
+        # Initial point (zeros or small random values)
+        initial_point = np.zeros(self.k_)
+
+        # Run optimization
+        self.theta_ = optimizer.optimize(
+            lambda params, gradient: self.gmm_objective(params, gradient), initial_point
+        )
+
+        # Compute standard errors
+        try:
+            # Calculate Jacobian of moment conditions
+            self.Gamma_ = self.jacobian_moment_cond()
+
+            # Calculate variance-covariance matrix
+            if iid:
+                # Simpler formula for IID case
+                self.vtheta_ = np.linalg.inv(self.Gamma_.T @ self.W_ @ self.Gamma_)
+            else:
+                # Robust formula for non-IID case (HAC)
+                self.Omega_ = np.linalg.inv(self.W_)
+                self.vtheta_ = (
+                    np.linalg.inv(self.Gamma_.T @ self.W_ @ self.Gamma_)
+                    @ self.Gamma_.T
+                    @ self.W_
+                    @ self.Omega_
+                    @ self.W_
+                    @ self.Gamma_
+                    @ np.linalg.inv(self.Gamma_.T @ self.W_ @ self.Gamma_)
+                )
+
+            # Standard errors (divided by sqrt(n))
+            self.std_errors_ = np.sqrt(np.diag(self.vtheta_) / self.n_)
+        except Exception as e:
+            if verbose:
+                print(f"Error computing standard errors: {e}")
+            self.std_errors_ = None
+
+    def jacobian_moment_cond(self) -> np.ndarray:
+        """
+        Compute Jacobian of moment conditions.
+        For IV moment conditions, this is -E[z'x].
+        For other moment conditions, would need specific implementations.
+        """
+        # For standard IV moments we know the analytical form
+        if (
+            isinstance(self.moment_cond, type(self.iv_moment))
+            or self.moment_cond.__name__ == "iv_moment"
+        ):
+            jac = -self.z_.T @ self.x_ / self.n_
+        else:
+            # Numerical approximation for other moment conditions
+            jac = np.zeros((self.z_.shape[1], self.k_))
+            h = 1e-8
+            beta = self.theta_
+
+            for i in range(self.k_):
+                beta_plus = beta.copy()
+                beta_plus[i] += h
+                moments = self.moment_cond(self.z_, self.y_, self.x_, beta)
+                moments_plus = self.moment_cond(self.z_, self.y_, self.x_, beta_plus)
+                jac[:, i] = (moments_plus.mean(axis=0) - moments.mean(axis=0)) / h
+
+        return jac
+
+    @staticmethod
+    def iv_moment(
+        z: np.ndarray, y: np.ndarray, x: np.ndarray, beta: np.ndarray
+    ) -> np.ndarray:
+        """Standard IV moment condition: z_i * (y_i - x_i'β)"""
+        return z * (y - x @ beta)[:, np.newaxis]
+
+    def bootstrap_se(
+        self,
+        n_bootstrap: int = 1000,
+        seed: Optional[int] = None,
+        verbose: bool = False,
+        n_jobs: int = 1,
+    ) -> np.ndarray:
+        """
+        Compute bootstrap standard errors using full optimization for each sample.
+
+        Args:
+            n_bootstrap: Number of bootstrap iterations
+            seed: Random seed for reproducibility
+            verbose: Whether to print progress
+            n_jobs: Number of parallel jobs (-1 for all cores)
+
+        Returns:
+            Bootstrap standard errors
+        """
+        if not hasattr(self, "theta_"):
+            raise ValueError("Model must be fitted first")
+
+        # Set random seed if provided
+        if seed is not None:
+            np.random.seed(seed)
+
+        # Create bootstrap indices
+        n = self.n_
+        bootstrap_indices = np.random.choice(n, size=(n_bootstrap, n), replace=True)
+
+        # Define the bootstrap function to be run in parallel
+        def run_bootstrap(bootstrap_index, indices):
+            # Set a unique seed for each bootstrap iteration to avoid RNG conflicts
+            np.random.seed(seed + bootstrap_index if seed is not None else None)
+
+            # Sample data using the indices
+            z_boot = self.z_[indices]
+            y_boot = self.y_[indices]
+            x_boot = self.x_[indices]
+
+            # Create a new estimator and fit
+            estimator = GMMEstimatorPyEnsmallen(self.moment_cond, self.weighting_matrix)
+            estimator.fit(z_boot, y_boot, x_boot, verbose=False)
+
+            # Return the bootstrap estimates
+            return estimator.theta_
+
+        # Run bootstrap in parallel
+        if verbose:
+            print(f"Running {n_bootstrap} bootstrap iterations with {n_jobs} jobs...")
+            start_time = time.time()
+
+        # Use loky backend with controlled threading to avoid parallelism issues
+        with parallel_backend("loky", inner_max_num_threads=1):
+            bootstrap_results = Parallel(n_jobs=n_jobs)(
+                delayed(run_bootstrap)(i, indices)
+                for i, indices in enumerate(bootstrap_indices)
+            )
+
+        if verbose:
+            end_time = time.time()
+            print(f"Bootstrap completed in {end_time - start_time:.2f} seconds")
+
+        # Convert results to array
+        bootstrap_thetas = np.array(bootstrap_results)
+
+        # Calculate bootstrap standard errors
+        bootstrap_se = np.std(bootstrap_thetas, axis=0)
+
+        # Store the bootstrap results
+        self.bootstrap_thetas_ = bootstrap_thetas
+        self.bootstrap_std_errors_ = bootstrap_se
+
+        return bootstrap_se
+
+
 _BACKENDS = {
     "scipy": GMMEstimatorScipy,
     "torch": GMMEstimatorTorch,
+    "pyensmallen": GMMEstimatorPyEnsmallen,
 }
diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb
index 0f2872f..2cc75aa 100644
--- a/notebooks/example.ipynb
+++ b/notebooks/example.ipynb
@@ -9,10 +9,12 @@
     "import numpy as np\n",
     "import statsmodels.api as sm\n",
     "import linearmodels as lm\n",
+    "import time\n",
+    "import pyensmallen as pe\n",
     "\n",
     "np.random.seed(94305)\n",
     "\n",
-    "from gmm.gmm import GMMEstimator  # noqa: E402"
+    "from gmm.gmm import GMMEstimator  # noqa: E402\n"
    ]
   },
   {
@@ -28,7 +30,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def dgp(n = 100_000,\n",
+    "def dgp(n = 1000,\n",
     "        beta = np.array([-0.5, 1.2]),\n",
     "        rho = 0.7,\n",
     "        pi = np.array([0.5, -0.1])):\n",
@@ -63,8 +65,8 @@
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
-      "const         -0.4994      0.004   -128.506      0.000      -0.507      -0.492\n",
-      "x1             1.1953      0.004    308.588      0.000       1.188       1.203\n",
+      "const         -0.4976      0.038    -13.249      0.000      -0.571      -0.424\n",
+      "x1             1.1822      0.036     32.849      0.000       1.112       1.253\n",
       "==============================================================================\n"
      ]
     }
@@ -90,8 +92,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 123 ms, sys: 12.7 ms, total: 136 ms\n",
-      "Wall time: 103 ms\n"
+      "CPU times: user 21.5 ms, sys: 0 ns, total: 21.5 ms\n",
+      "Wall time: 7.89 ms\n"
      ]
     },
     {
@@ -126,30 +128,30 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.4994</td>\n",
-       "      <td>0.0039</td>\n",
-       "      <td>-128.5068</td>\n",
+       "      <td>-0.4976</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>-0.5070</td>\n",
-       "      <td>-0.4918</td>\n",
+       "      <td>-13261.8743</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-0.4977</td>\n",
+       "      <td>-0.4975</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1.1953</td>\n",
-       "      <td>0.0039</td>\n",
-       "      <td>308.5945</td>\n",
+       "      <td>1.1822</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>1.1877</td>\n",
-       "      <td>1.2029</td>\n",
+       "      <td>32909.9285</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.1821</td>\n",
+       "      <td>1.1823</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "     coef  std err         t  p-value  [0.025  0.975]\n",
-       "0 -0.4994   0.0039 -128.5068      0.0 -0.5070 -0.4918\n",
-       "1  1.1953   0.0039  308.5945      0.0  1.1877  1.2029"
+       "     coef  std err           t  p-value  [0.025  0.975]\n",
+       "0 -0.4976      0.0 -13261.8743      0.0 -0.4977 -0.4975\n",
+       "1  1.1822      0.0  32909.9285      0.0  1.1821  1.1823"
       ]
      },
      "execution_count": 4,
@@ -169,7 +171,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### GMM using Torch Minimization"
+    "### GMM using `pyensmallen`"
    ]
   },
   {
@@ -181,8 +183,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 468 ms, sys: 70 ms, total: 538 ms\n",
-      "Wall time: 413 ms\n"
+      "CPU times: user 1.66 ms, sys: 1.98 ms, total: 3.64 ms\n",
+      "Wall time: 3.3 ms\n"
      ]
     },
     {
@@ -217,30 +219,30 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.4994</td>\n",
-       "      <td>0.0039</td>\n",
-       "      <td>-128.5069</td>\n",
+       "      <td>-0.4976</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>-0.5070</td>\n",
-       "      <td>-0.4918</td>\n",
+       "      <td>-13261.9715</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-0.4977</td>\n",
+       "      <td>-0.4975</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1.1953</td>\n",
-       "      <td>0.0039</td>\n",
-       "      <td>308.5946</td>\n",
+       "      <td>1.1822</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>1.1877</td>\n",
-       "      <td>1.2029</td>\n",
+       "      <td>32909.8360</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.1821</td>\n",
+       "      <td>1.1823</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "     coef  std err         t  p-value  [0.025  0.975]\n",
-       "0 -0.4994   0.0039 -128.5069      0.0 -0.5070 -0.4918\n",
-       "1  1.1953   0.0039  308.5946      0.0  1.1877  1.2029"
+       "     coef  std err           t  p-value  [0.025  0.975]\n",
+       "0 -0.4976      0.0 -13261.9715      0.0 -0.4977 -0.4975\n",
+       "1  1.1822      0.0  32909.8360      0.0  1.1821  1.1823"
       ]
      },
      "execution_count": 5,
@@ -248,6 +250,181 @@
      "output_type": "execute_result"
     }
    ],
+   "source": [
+    "%%time\n",
+    "ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]\n",
+    "gmm_pe = GMMEstimator(ψ, backend = \"pyensmallen\")\n",
+    "gmm_pe.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)\n",
+    "gmm_pe.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 2.73 ms, sys: 0 ns, total: 2.73 ms\n",
+      "Wall time: 2.41 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>coef</th>\n",
+       "      <th>std err</th>\n",
+       "      <th>t</th>\n",
+       "      <th>p-value</th>\n",
+       "      <th>[0.025</th>\n",
+       "      <th>0.975]</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.4976</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-13261.9708</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-0.4977</td>\n",
+       "      <td>-0.4975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.1822</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>32909.8371</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.1821</td>\n",
+       "      <td>1.1823</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     coef  std err           t  p-value  [0.025  0.975]\n",
+       "0 -0.4976      0.0 -13261.9708      0.0 -0.4977 -0.4975\n",
+       "1  1.1822      0.0  32909.8371      0.0  1.1821  1.1823"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]\n",
+    "gmm_pe = GMMEstimator(ψ, backend = \"pyensmallen\")\n",
+    "gmm_pe.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)\n",
+    "gmm_pe.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### GMM using Torch Minimization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 944 ms, sys: 53.8 ms, total: 998 ms\n",
+      "Wall time: 730 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>coef</th>\n",
+       "      <th>std err</th>\n",
+       "      <th>t</th>\n",
+       "      <th>p-value</th>\n",
+       "      <th>[0.025</th>\n",
+       "      <th>0.975]</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.4976</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-13261.6551</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-0.4977</td>\n",
+       "      <td>-0.4975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.1822</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>32909.8149</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.1821</td>\n",
+       "      <td>1.1823</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     coef  std err           t  p-value  [0.025  0.975]\n",
+       "0 -0.4976      0.0 -13261.6551      0.0 -0.4977 -0.4975\n",
+       "1  1.1822      0.0  32909.8149      0.0  1.1821  1.1823"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "%%time\n",
     "def moment_cond(z, y, x, beta):\n",
@@ -268,15 +445,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 243 ms, sys: 33 ms, total: 276 ms\n",
-      "Wall time: 82.8 ms\n"
+      "CPU times: user 874 ms, sys: 89.3 ms, total: 963 ms\n",
+      "Wall time: 82.3 ms\n"
      ]
     },
     {
@@ -337,7 +514,7 @@
        "1  1.1953   0.0039  308.5945      0.0  1.1877  1.2029"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -353,13 +530,6 @@
     "gmm.summary()"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Identical estimates and standard errors."
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -731,11 +901,176 @@
    "source": [
     "Identical estimates and standard errors."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### GMM using PyEnsmallen Minimization\n",
+    "\n",
+    "Now let's test the PyEnsmallen backend which should offer improved performance for optimization problems."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "if pyensmallen_available:\n",
+    "    ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]\n",
+    "    gmm_pe = GMMEstimator(ψ, backend=\"pyensmallen\")\n",
+    "    gmm_pe.fit(np.c_[np.ones(z.shape[0]), z], y, X)\n",
+    "    gmm_pe.summary()\n",
+    "else:\n",
+    "    print(\"PyEnsmallen is not installed. Install with: pip install pyensmallen\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Performance Comparison of Different Backends\n",
+    "\n",
+    "Let's compare the performance of all backends on the same problem."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def benchmark_backend(backend, n_runs=5):\n",
+    "    \"\"\"Run GMM estimation with a specific backend multiple times and measure average execution time\"\"\"\n",
+    "    times = []\n",
+    "\n",
+    "    # Define moment condition function based on backend\n",
+    "    if backend == \"torch\":\n",
+    "        def moment_cond(z, y, x, beta):\n",
+    "            residuals = (y - x @ beta).unsqueeze(-1)\n",
+    "            return z * residuals\n",
+    "    else:\n",
+    "        moment_cond = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]\n",
+    "\n",
+    "    # Run estimation multiple times\n",
+    "    for _ in range(n_runs):\n",
+    "        gmm = GMMEstimator(moment_cond, backend=backend)\n",
+    "\n",
+    "        start_time = time.time()\n",
+    "        gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X, verbose=False)\n",
+    "        end_time = time.time()\n",
+    "\n",
+    "        times.append(end_time - start_time)\n",
+    "\n",
+    "    return np.mean(times)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# List of backends to benchmark\n",
+    "backends = [\"scipy\", \"torch\"]\n",
+    "if pyensmallen_available:\n",
+    "    backends.append(\"pyensmallen\")\n",
+    "\n",
+    "# Run benchmarks\n",
+    "benchmark_results = {}\n",
+    "for backend in backends:\n",
+    "    avg_time = benchmark_backend(backend)\n",
+    "    benchmark_results[backend] = avg_time\n",
+    "    print(f\"{backend.capitalize()} average time: {avg_time:.4f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot benchmark results\n",
+    "plt.figure(figsize=(10, 5))\n",
+    "plt.bar(benchmark_results.keys(), benchmark_results.values())\n",
+    "plt.ylabel('Average Time (seconds)')\n",
+    "plt.title('Performance Comparison of GMM Backends')\n",
+    "plt.xticks(rotation=0)\n",
+    "plt.grid(axis='y', alpha=0.3)\n",
+    "\n",
+    "# Add labels on top of bars\n",
+    "for i, (k, v) in enumerate(benchmark_results.items()):\n",
+    "    plt.text(i, v + 0.02, f\"{v:.4f}s\", ha='center')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Comparing Parameter Estimates Across Backends\n",
+    "\n",
+    "Let's verify that all backends produce comparable parameter estimates."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Get parameter estimates from different backends\n",
+    "scipy_moment = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]\n",
+    "gmm_scipy = GMMEstimator(scipy_moment, backend=\"scipy\")\n",
+    "gmm_scipy.fit(np.c_[np.ones(z.shape[0]), z], y, X, verbose=False)\n",
+    "\n",
+    "torch_moment = lambda z, y, x, beta: z * (y - x @ beta).unsqueeze(-1)\n",
+    "gmm_torch = GMMEstimator(torch_moment, backend=\"torch\")\n",
+    "gmm_torch.fit(np.c_[np.ones(z.shape[0]), z], y, X, verbose=False)\n",
+    "\n",
+    "# Compare parameters\n",
+    "results_dict = {\n",
+    "    \"True Value\": np.array([-0.5, 1.2]),  # True parameter values from DGP\n",
+    "    \"SciPy\": gmm_scipy.theta_,\n",
+    "    \"PyTorch\": gmm_torch.theta_,\n",
+    "}\n",
+    "\n",
+    "if pyensmallen_available:\n",
+    "    gmm_pe = GMMEstimator(scipy_moment, backend=\"pyensmallen\")\n",
+    "    gmm_pe.fit(np.c_[np.ones(z.shape[0]), z], y, X, verbose=False)\n",
+    "    results_dict[\"PyEnsmallen\"] = gmm_pe.theta_\n",
+    "\n",
+    "# Create comparison dataframe\n",
+    "comparison_df = pd.DataFrame(results_dict, index=[\"Intercept\", \"Slope\"])\n",
+    "comparison_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate absolute error relative to true values\n",
+    "for backend in list(results_dict.keys())[1:]:\n",
+    "    comparison_df[f\"{backend} Error\"] = np.abs(comparison_df[backend] - comparison_df[\"True Value\"])\n",
+    "\n",
+    "# Display formatted results\n",
+    "comparison_df.round(4)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "metrics",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -749,7 +1084,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.12.5"
   }
  },
  "nbformat": 4,
diff --git a/requirements.txt b/requirements.txt
index 5784538..5d4a204 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ pandas
 scipy
 torch
 pytorch-minimize
+pyensmallen