diff --git a/demo/APO Sample Existing Demo.ipynb b/demo/APO Sample Existing Demo.ipynb
new file mode 100644
index 0000000..8e9df98
--- /dev/null
+++ b/demo/APO Sample Existing Demo.ipynb	
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75f106cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.insert(0, '../')\n",
+    "\n",
+    "print(sys.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "117a382f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import obsidian\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "print(f'obsidian version: ' + obsidian.__version__)\n",
+    "\n",
+    "from obsidian.experiment import AdvExpDesigner\n",
+    "from obsidian.experiment.sampling import sample_with_bias, best_sample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cd0a6b0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#generate random data for this demo\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "n = 1000\n",
+    "demo_data = pd.DataFrame({\n",
+    "    'reagent_conc': np.round(np.random.uniform(0.1, 1.0, n), 2),\n",
+    "    'ionic_strength': np.round(np.random.uniform(10, 100, n), 2),\n",
+    "    'surfactant_conc': np.round(np.random.uniform(0.01, 0.2, n), 3),\n",
+    "    'compound_A': np.round(np.random.uniform(0, 50, n), 2),\n",
+    "    'compound_B': np.round(np.random.uniform(0, 50, n), 2),\n",
+    "    'sugar': np.random.choice(['glucose', 'fructose', 'sucrose'], n),\n",
+    "    'surfactant': np.random.choice(['SDS', 'Tween20', 'TritonX'], n),\n",
+    "    'buffer': np.random.choice(['PBS', 'Tris', 'HEPES'], n),\n",
+    "    'pH': np.round(np.random.uniform(5.5, 8.5, n), 2)\n",
+    "})\n",
+    "\n",
+    "demo_data.index.name = 'FormulationID'\n",
+    "demo_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57ed0226",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Initialize existing experimental data as an AdvExpDesigner object\n",
+    "designer = AdvExpDesigner(design_df=demo_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9feae24b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can sample an existing dataset with or without bias: \n",
+    "Bias dictionary format : {\"column\": [lower_bound, upper_bound, relative_weight]}\n",
+    "\n",
+    "- Weight >1 increases sampling probability for in-range rows.\n",
+    "- Weight <1 decreases it.\n",
+    "- Weight = 0 excludes those rows entirely.\n",
+    "\"\"\"\n",
+    "\n",
+    "bias = {\n",
+    "    \"ionic_strength\": [50, 60, 3.0], \n",
+    "}\n",
+    "\n",
+    "seed = np.random.randint(0,1000)\n",
+    "print(f\"Random seed for reproducibility: {seed}\")\n",
+    "\n",
+    "#We can easily create a random sample of n samples with weights using built in Pandas functions\n",
+    "#enforce = True allows you to force the boundary to be true ; resultant sample may not be space-filling.\n",
+    "sample = sample_with_bias(designer.design, n=1000, replace=False, seed=seed, bias=bias, plot_weights=True, enforce=False)\n",
+    "\n",
+    "sample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab457ed8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#One-hot encode your categorical columns for easy handling in determining Euclidean distance\n",
+    "df_encoded = pd.get_dummies(designer.design, columns=[\"sugar\", \"surfactant\", \"buffer\"], dtype=int) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a46bcd0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "perform random sampling n_trial times, select the best one via criteria metric:\n",
+    "metric:\n",
+    "    - \"maximin\":   maximize the minimum pairwise Euclidean distance\n",
+    "    - \"mean_nn\":   maximize the mean nearest-neighbor Euclidean distance\n",
+    "    - \"hybrid\":    0.6*maximin + 0.4*mean_nn \n",
+    "\"\"\"\n",
+    "seed = np.random.randint(0,1000)\n",
+    "print(f\"Random seed for reproducibility: {seed}\")\n",
+    "\n",
+    "optimal_sample, info = best_sample(\n",
+    "    df_encoded, 10, feature_cols=df_encoded.columns, n_trials=1000,\n",
+    "    bias=bias, plot_weights=True, enforce=False, random_state=seed, metric=\"hybrid\"\n",
+    ")\n",
+    "\n",
+    "print(info)\n",
+    "optimal_sample\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ea1bcd8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#decode from one-hot encoding\n",
+    "normal_cols = list(optimal_sample.columns)[0:6]\n",
+    "encoded_cols = list(optimal_sample.columns)[6:]\n",
+    "decoded = pd.from_dummies(optimal_sample[encoded_cols],sep=\"_\")\n",
+    "optimal_design_decoded = pd.concat([optimal_sample[normal_cols], decoded], axis=1)\n",
+    "optimal_design_decoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2137e315",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(designer.plot_histograms(optimal_design_decoded))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv (3.13.5)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/demo/Advanced Experimental Design.ipynb b/demo/Advanced Experimental Design.ipynb
index b635be5..45d208b 100644
--- a/demo/Advanced Experimental Design.ipynb	
+++ b/demo/Advanced Experimental Design.ipynb	
@@ -33,10 +33,16 @@
     "# Define continuous parameters: key -> (low, high, step)\n",
     "\n",
     "continuous_params = {\n",
-    "    'temperature': (20, 80, 5),          # Linear steps of 5 between 20 and 80\n",
-    "    'concentration': (0.1, 1.0, 0.1),    # Linear steps of 0.1 between 0.1 and 1.0\n",
-    "    'pressure': (1, 16, 'geometric'),    # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
-    "    'time': (10, 1000, 'logarithmic')    # Logarithmic steps (powers of 10) between 10 and 1000\n",
+    "    'temperature': (20, 80, 5),                     # Linear steps of 5 between 20 and 80\n",
+    "    'concentration': (0.1, 1.0, 0.1),               # Linear steps of 0.1 between 0.1 and 1.0\n",
+    "    'pressure': (1, 16, 'geometric'),               # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
+    "    'time': (10, 1000, 'logarithmic'),              # Logarithmic steps (powers of 10) between 10 and 1000\n",
+    "    'flow_rate': [0.5, 1.0, 2.0, 5.0, 10.0],        # Custom discrete levels, equal biases\n",
+    "    'Reagent Concentration': {\n",
+    "        'levels': [1.0, 2.0, 3.0, 5.0, 10.0],       # Custom levels with biased sampling\n",
+    "        'biases': [0.1, 0.2, 0.4, 0.2, 0.1]         # Higher probability for middle values\n",
+    "}\n",
+    "\n",
     "}\n",
     "\n",
     "# Define conditional categorical parameters with subparameters and frequencies: key -> {subkey: {'freq': frequency, 'subparams': ([values], [frequencies])}}\n",
@@ -170,7 +176,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "obsidian",
+   "display_name": ".venv (3.13.5)",
    "language": "python",
    "name": "python3"
   },
@@ -184,7 +190,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,
diff --git a/obsidian/experiment/advanced_design.py b/obsidian/experiment/advanced_design.py
index e7594af..11581cf 100644
--- a/obsidian/experiment/advanced_design.py
+++ b/obsidian/experiment/advanced_design.py
@@ -22,25 +22,41 @@ class AdvExpDesigner:
     """
 
     def __init__(
-        self, continuous_params, conditional_subparameters, subparam_mapping=None
+        self, continuous_params=None, conditional_subparameters=None, subparam_mapping=None, design_df=None
     ):
         """
         Initializes the AdvExpDesigner with experimental parameters and optional subparameter mappings.
 
         :param continuous_params: A dictionary containing the continuous parameters for the design.
+                                  Each parameter can be specified as:
+                                  - (low, high, step): Linear spacing with a fixed step size
+                                  - (low, high, "geometric"): Geometric spacing (doubling)
+                                  - (low, high, "logarithmic"): Logarithmic spacing (powers of 10)
+                                  - [value1, value2, ...]: Custom list of specific levels to sample from
+                                  - {'levels': [value1, value2, ...], 'biases': [weight1, weight2, ...]}:
+                                    Custom levels with optional biases/weights for non-uniform sampling.
+                                    Biases will be normalized to sum to 1.0 if they don't already.
         :param conditional_subparameters: A dictionary containing the conditional subparameters for the design.
         :param subparam_mapping: A dictionary for mapping, will be inferred if not provided.
+        :param design_df: A Pandas DataFrame of an existing experimental design, default None
         """
-        self.continuous_params = continuous_params
-        self.conditional_subparameters = conditional_subparameters
-        self.subparam_mapping = subparam_mapping or infer_subparam_mapping(
-            self.conditional_subparameters
-        )
-        self.continuous_keys = list(self.continuous_params.keys())
-        self.categorical_keys = list(self.conditional_subparameters.keys())
-        self.subparam_key = (
-            list(self.subparam_mapping.values())[0] if self.subparam_mapping else None
-        )
+        self.continuous_params = continuous_params if continuous_params else {}
+        self.conditional_subparameters = conditional_subparameters if conditional_subparameters else {}
+
+        if design_df is not None and not design_df.empty:
+            self.design = design_df
+            self.categorical_keys = design_df.select_dtypes(exclude=['number']).columns.tolist()
+            if continuous_params:
+                self.continuous_keys = list(self.continuous_params.keys())
+            else:
+                self.continuous_keys = design_df.select_dtypes(include=['number']).columns.tolist()
+        else:
+            self.continuous_keys = list(self.continuous_params.keys()) if continuous_params else []
+            self.categorical_keys = list(self.conditional_subparameters.keys()) if conditional_subparameters else []
+
+        self.subparam_mapping = subparam_mapping or infer_subparam_mapping(self.conditional_subparameters)
+        self.subparam_keys = list(self.subparam_mapping.values()) if self.subparam_mapping else []
+        self.subparam_key = self.subparam_keys[0] if self.subparam_keys else None
 
     def generate_design(self, seed, n_samples, optimize_categories=True):
         """
@@ -76,6 +92,7 @@ def evaluate_design(self, design, metrics_to_optimize=None):
                 "Pairwise Distance CV",
                 "Max Continuous Corr",
                 "Max Categorical Corr",
+                "Max Mixed Corr",
             ]
 
         return evaluate_design(
@@ -114,6 +131,7 @@ def optimize_design(
                 "Pairwise Distance CV",
                 "Max Continuous Corr",
                 "Max Categorical Corr",
+                "Max Mixed Corr",
             ]
         if maximize_metrics is None:
             maximize_metrics = "D-optimality"
@@ -269,34 +287,74 @@ def sample_continuous_lhs(continuous_params, n_samples, seed):
     cont_samples = {}
     keys = list(continuous_params.keys())
     for idx, key in enumerate(keys):
-        low, high, step = continuous_params[key]
-        if step == "geometric":
-            possible = []
-            value = low
-            while value <= high:
-                possible.append(value)
-                value *= 2
-            possible = np.array(possible)
-            indices = np.floor(sample_cont[:, idx] * len(possible)).astype(int)
-            indices = np.clip(indices, 0, len(possible) - 1)
-            cont_samples[key] = possible[indices]
-        elif step == "logarithmic":
-            if low <= 0 or high <= 0:
-                raise ValueError(
-                    f"Logarithmic step requires positive low and high for parameter '{key}'"
-                )
-            exp_low = int(np.floor(np.log10(low)))
-            exp_high = int(np.floor(np.log10(high)))
-            possible = 10.0 ** np.arange(exp_low, exp_high + 1)
+        param_spec = continuous_params[key]
+        
+        # Check if parameter specification is a dictionary with 'levels' and optional 'biases'
+        if isinstance(param_spec, dict):
+            levels = param_spec.get('levels')
+            biases = param_spec.get('biases', None)
+            
+            if levels is None:
+                raise ValueError(f"Dictionary specification for parameter '{key}' must include 'levels' key")
+            
+            possible = np.array(levels)
+            
+            if biases is not None:
+                # Use biased sampling with inverse transform method
+                biases = np.array(biases)
+                if len(biases) != len(possible):
+                    raise ValueError(f"Bias length ({len(biases)}) must match levels length ({len(possible)}) for parameter '{key}'")
+                if not np.isclose(sum(biases), 1.0):
+                    biases = biases / np.sum(biases)
+                
+                # Create CDF for inverse transform sampling
+                cdf = np.cumsum(biases)
+                uniform_samples = sample_cont[:, idx]
+                indices = np.searchsorted(cdf, uniform_samples)
+                indices = np.clip(indices, 0, len(possible) - 1)
+                cont_samples[key] = possible[indices]
+            else:
+                # Uniform sampling across custom levels
+                indices = np.floor(sample_cont[:, idx] * len(possible)).astype(int)
+                indices = np.clip(indices, 0, len(possible) - 1)
+                cont_samples[key] = possible[indices]
+        
+        # Check if the parameter specification is a list (custom levels without biases)
+        elif isinstance(param_spec, (list, tuple)) and not (len(param_spec) == 3 and not isinstance(param_spec[0], (list, tuple))):
+            possible = np.array(param_spec)
             indices = np.floor(sample_cont[:, idx] * len(possible)).astype(int)
             indices = np.clip(indices, 0, len(possible) - 1)
             cont_samples[key] = possible[indices]
         else:
-            num_steps = int(round((high - low) / step)) + 1
-            possible = np.linspace(low, high, num_steps)
-            indices = np.floor(sample_cont[:, idx] * num_steps).astype(int)
-            indices = np.clip(indices, 0, num_steps - 1)
-            cont_samples[key] = possible[indices]
+            # Traditional (low, high, step) format
+            low, high, step = param_spec
+            if step == "geometric":
+                possible = []
+                value = low
+                while value <= high:
+                    possible.append(value)
+                    value *= 2
+                possible = np.array(possible)
+                indices = np.floor(sample_cont[:, idx] * len(possible)).astype(int)
+                indices = np.clip(indices, 0, len(possible) - 1)
+                cont_samples[key] = possible[indices]
+            elif step == "logarithmic":
+                if low <= 0 or high <= 0:
+                    raise ValueError(
+                        f"Logarithmic step requires positive low and high for parameter '{key}'"
+                    )
+                exp_low = int(np.floor(np.log10(low)))
+                exp_high = int(np.floor(np.log10(high)))
+                possible = 10.0 ** np.arange(exp_low, exp_high + 1)
+                indices = np.floor(sample_cont[:, idx] * len(possible)).astype(int)
+                indices = np.clip(indices, 0, len(possible) - 1)
+                cont_samples[key] = possible[indices]
+            else:
+                num_steps = int(round((high - low) / step)) + 1
+                possible = np.linspace(low, high, num_steps)
+                indices = np.floor(sample_cont[:, idx] * num_steps).astype(int)
+                indices = np.clip(indices, 0, num_steps - 1)
+                cont_samples[key] = possible[indices]
     return cont_samples
 
 
@@ -426,13 +484,16 @@ def assign_conditional_subparameter(
 
 def infer_subparam_mapping(conditional_subparameters):
     mapping = {}
-    for cat_param, levels in conditional_subparameters.items():
-        subparam_candidates = set()
-        for level_info in levels.values():
-            subparams = [k for k in level_info if k != "freq"]
-            subparam_candidates.update(subparams)
-        if len(subparam_candidates) == 1:
-            mapping[cat_param] = subparam_candidates.pop()
+    if len(conditional_subparameters) == 0:
+        return mapping
+    else:
+        for cat_param, levels in conditional_subparameters.items():
+            subparam_candidates = set()
+            for level_info in levels.values():
+                subparams = [k for k in level_info if k != "freq"]
+                subparam_candidates.update(subparams)
+            if len(subparam_candidates) == 1:
+                mapping[cat_param] = subparam_candidates.pop()
     return mapping
 
 
@@ -495,18 +556,57 @@ def sample_design(
 
     # Round and format continuous variables
     for key in continuous_params.keys():
-        step = continuous_params[key][2]
-        if step == "geometric":
-            design[key] = design[key].round(3)
-        elif step == "logarithmic":
-            design[key] = design[key].round(5)
-        elif isinstance(step, (int, float)):
-            decimals = max(0, -int(np.floor(np.log10(step)))) if step < 1 else 0
-            design[key] = design[key].round(decimals)
-            if isinstance(step, int) or (isinstance(step, float) and step.is_integer()):
+        param_spec = continuous_params[key]
+        
+        # Check if parameter specification is a dictionary with 'levels' and optional 'biases'
+        if isinstance(param_spec, dict):
+            levels = param_spec.get('levels')
+            possible = np.array(levels)
+            # Check if all values are integers
+            if np.all(np.equal(np.mod(possible, 1), 0)):
                 design[key] = design[key].astype(int)
+            else:
+                # Find the maximum number of decimal places in the custom levels
+                max_decimals = 0
+                for val in possible:
+                    if not np.isnan(val):
+                        val_str = str(float(val))
+                        if '.' in val_str:
+                            decimals = len(val_str.split('.')[1].rstrip('0'))
+                            max_decimals = max(max_decimals, decimals)
+                design[key] = design[key].round(max_decimals)
+        
+        # Check if the parameter specification is a list (custom levels)
+        elif isinstance(param_spec, (list, tuple)) and not (len(param_spec) == 3 and not isinstance(param_spec[0], (list, tuple))):
+            # Custom levels - determine appropriate rounding from the values
+            possible = np.array(param_spec)
+            # Check if all values are integers
+            if np.all(np.equal(np.mod(possible, 1), 0)):
+                design[key] = design[key].astype(int)
+            else:
+                # Find the maximum number of decimal places in the custom levels
+                max_decimals = 0
+                for val in possible:
+                    if not np.isnan(val):
+                        val_str = str(float(val))
+                        if '.' in val_str:
+                            decimals = len(val_str.split('.')[1].rstrip('0'))
+                            max_decimals = max(max_decimals, decimals)
+                design[key] = design[key].round(max_decimals)
         else:
-            design[key] = design[key].round(5)
+            # Traditional (low, high, step) format
+            step = param_spec[2]
+            if step == "geometric":
+                design[key] = design[key].round(3)
+            elif step == "logarithmic":
+                design[key] = design[key].round(5)
+            elif isinstance(step, (int, float)):
+                decimals = max(0, -int(np.floor(np.log10(step)))) if step < 1 else 0
+                design[key] = design[key].round(decimals)
+                if isinstance(step, int) or (isinstance(step, float) and step.is_integer()):
+                    design[key] = design[key].astype(int)
+            else:
+                design[key] = design[key].round(5)
 
     # Round subparameters like pH
     for subparam_key in subparam_mapping.values():
@@ -600,8 +700,14 @@ def calculate_mixed_correlation_matrix(df, categorical_vars=None):
 # --- Design Quality Metrics ---
 
 
-def calculate_d_optimality(design, continuous_params_keys, pH_key):
-    continuous_keys = continuous_params_keys + [pH_key]
+def calculate_d_optimality(design, continuous_params_keys, subparam_keys):
+    """Calculate D-optimality including all continuous parameters and subparameters."""
+    if subparam_keys is None:
+        subparam_keys = []
+    elif isinstance(subparam_keys, str):
+        subparam_keys = [subparam_keys]
+    
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
     X_model = np.column_stack((np.ones(X_std.shape[0]), X_std))
@@ -609,8 +715,14 @@ def calculate_d_optimality(design, continuous_params_keys, pH_key):
     return linalg.det(XtX)
 
 
-def calculate_a_optimality(design, continuous_params_keys, pH_key):
-    continuous_keys = continuous_params_keys + [pH_key]
+def calculate_a_optimality(design, continuous_params_keys, subparam_keys):
+    """Calculate A-optimality including all continuous parameters and subparameters."""
+    if subparam_keys is None:
+        subparam_keys = []
+    elif isinstance(subparam_keys, str):
+        subparam_keys = [subparam_keys]
+    
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
     X_model = np.column_stack((np.ones(X_std.shape[0]), X_std))
@@ -623,8 +735,14 @@ def calculate_a_optimality(design, continuous_params_keys, pH_key):
         return np.inf
 
 
-def calculate_condition_number(design, continuous_params_keys, pH_key):
-    continuous_keys = continuous_params_keys + [pH_key]
+def calculate_condition_number(design, continuous_params_keys, subparam_keys):
+    """Calculate condition number including all continuous parameters and subparameters."""
+    if subparam_keys is None:
+        subparam_keys = []
+    elif isinstance(subparam_keys, str):
+        subparam_keys = [subparam_keys]
+    
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
     X_model = np.column_stack((np.ones(X_std.shape[0]), X_std))
@@ -632,8 +750,14 @@ def calculate_condition_number(design, continuous_params_keys, pH_key):
     return np.linalg.cond(XtX)
 
 
-def calculate_pairwise_distance_uniformity(design, continuous_params_keys, pH_key):
-    continuous_keys = continuous_params_keys + [pH_key]
+def calculate_pairwise_distance_uniformity(design, continuous_params_keys, subparam_keys):
+    """Calculate pairwise distance uniformity including all continuous parameters and subparameters."""
+    if subparam_keys is None:
+        subparam_keys = []
+    elif isinstance(subparam_keys, str):
+        subparam_keys = [subparam_keys]
+    
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
 
@@ -644,8 +768,14 @@ def calculate_pairwise_distance_uniformity(design, continuous_params_keys, pH_ke
     return std_dist / mean_dist if mean_dist != 0 else np.inf
 
 
-def calculate_max_continuous_correlation(design, continuous_params_keys, pH_key):
-    continuous_keys = continuous_params_keys + [pH_key]
+def calculate_max_continuous_correlation(design, continuous_params_keys, subparam_keys):
+    """Calculate max continuous correlation including all continuous parameters and subparameters."""
+    if subparam_keys is None:
+        subparam_keys = []
+    elif isinstance(subparam_keys, str):
+        subparam_keys = [subparam_keys]
+    
+    continuous_keys = continuous_params_keys + subparam_keys
     corr_matrix = design[continuous_keys].corr().abs()
     np.fill_diagonal(corr_matrix.values, 0)
     return corr_matrix.values.max()
@@ -659,12 +789,50 @@ def calculate_max_categorical_correlation(design, categorical_keys):
     return corr_matrix.abs().values.max()
 
 
+def calculate_max_mixed_correlation(design, continuous_keys, categorical_keys, subparam_mapping):
+    """
+    Calculate the maximum correlation between categorical and continuous/subparameter variables.
+    This captures mixed correlations that are neither purely continuous nor purely categorical.
+    Excludes parent-subparameter correlations (e.g., buffer ↔ pH) which are structural.
+    
+    :param design: The design DataFrame
+    :param continuous_keys: List of continuous parameter names
+    :param categorical_keys: List of categorical parameter names
+    :param subparam_mapping: Dictionary mapping categorical variables to their subparameters
+    :return: Maximum absolute mixed correlation value
+    """
+    # Get all subparameter columns
+    subparam_keys = list(subparam_mapping.values()) if subparam_mapping else []
+    
+    # Get all continuous columns (parameters + subparameters)
+    all_continuous = continuous_keys + subparam_keys
+    
+    # Calculate full correlation matrix
+    full_corr = calculate_mixed_correlation_matrix(design, categorical_vars=categorical_keys)
+    
+    # Extract mixed correlations: categorical ↔ continuous/subparameter
+    # Exclude parent-subparameter relationships (those are structural by design)
+    max_mixed = 0.0
+    for cat in categorical_keys:
+        for cont in all_continuous:
+            # Skip if this is a parent-subparameter relationship
+            if subparam_mapping and cat in subparam_mapping and subparam_mapping[cat] == cont:
+                continue  # Skip structural parent-subparameter correlation
+                
+            if cat in full_corr.index and cont in full_corr.columns:
+                corr_val = abs(full_corr.loc[cat, cont])
+                max_mixed = max(max_mixed, corr_val)
+    
+    return max_mixed
+
+
 # --- Dimensionality Reduction Plots ---
 
 
 def plot_pca(design, continuous_params_keys, subparam_mapping, hue=None):
-    pH_key = list(subparam_mapping.values())[0]
-    continuous_keys = continuous_params_keys + [pH_key]
+    """Plot PCA including all continuous parameters and subparameters."""
+    subparam_keys = list(subparam_mapping.values()) if subparam_mapping else []
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
     pca = PCA(n_components=2)
@@ -689,8 +857,9 @@ def plot_pca(design, continuous_params_keys, subparam_mapping, hue=None):
 def plot_mds(
     design, continuous_params_keys, subparam_mapping, hue=None, metric="euclidean"
 ):
-    pH_key = list(subparam_mapping.values())[0]
-    continuous_keys = continuous_params_keys + [pH_key]
+    """Plot MDS including all continuous parameters and subparameters."""
+    subparam_keys = list(subparam_mapping.values()) if subparam_mapping else []
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
 
@@ -722,9 +891,9 @@ def plot_umap(
     min_dist=0.1,
     metric="euclidean",
 ):
-
-    categorical_keys = list(subparam_mapping.values())[0]
-    continuous_keys = continuous_params_keys + [categorical_keys]
+    """Plot UMAP including all continuous parameters and subparameters."""
+    subparam_keys = list(subparam_mapping.values()) if subparam_mapping else []
+    continuous_keys = continuous_params_keys + subparam_keys
     X = design[continuous_keys].values
     X_std = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
 
@@ -780,27 +949,31 @@ def evaluate_design(
     :param design: The design DataFrame to evaluate.
     :param continuous_keys: List of continuous parameter names.
     :param categorical_keys: List of categorical parameter names.
-    :param subparam_mapping: Dictionary mapping categorical variable to its subparameter (e.g., {'buffer_type': 'pH'}).
+    :param subparam_mapping: Dictionary mapping categorical variable to its subparameter (e.g., {'buffer': 'pH', 'sugar': '[Sugar] (%)'}).
     :param metrics_to_optimize: List of metric names to compute.
     :return: Dictionary of computed metrics.
     """
-    subparam_key = list(subparam_mapping.values())[0] if subparam_mapping else None
+    # Get all subparameter keys from the mapping
+    subparam_keys = list(subparam_mapping.values()) if subparam_mapping else []
 
     metrics = {
-        "D-optimality": calculate_d_optimality(design, continuous_keys, subparam_key),
-        "A-optimality": calculate_a_optimality(design, continuous_keys, subparam_key),
+        "D-optimality": calculate_d_optimality(design, continuous_keys, subparam_keys),
+        "A-optimality": calculate_a_optimality(design, continuous_keys, subparam_keys),
         "Condition Number": calculate_condition_number(
-            design, continuous_keys, subparam_key
+            design, continuous_keys, subparam_keys
         ),
         "Pairwise Distance CV": calculate_pairwise_distance_uniformity(
-            design, continuous_keys, subparam_key
+            design, continuous_keys, subparam_keys
         ),
         "Max Continuous Corr": calculate_max_continuous_correlation(
-            design, continuous_keys, subparam_key
+            design, continuous_keys, subparam_keys
         ),
         "Max Categorical Corr": calculate_max_categorical_correlation(
             design, categorical_keys
         ),
+        "Max Mixed Corr": calculate_max_mixed_correlation(
+            design, continuous_keys, categorical_keys, subparam_mapping
+        ),
     }
 
     return {k: v for k, v in metrics.items() if k in metrics_to_optimize}
@@ -859,6 +1032,7 @@ def find_best_design_parallel(
             "Pairwise Distance CV",
             "Max Continuous Corr",
             "Max Categorical Corr",
+            "Max Mixed Corr",
         ]
     if maximize_metrics is None:
         maximize_metrics = [True] + [False] * (len(metrics_to_optimize) - 1)
@@ -914,24 +1088,28 @@ def find_best_design_parallel(
 def plot_design_quality_evolution(metrics_df):
     metrics_df = metrics_df.sort_values("seed")
 
-    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
-    metrics = [
-        "D-optimality",
-        "A-optimality",
-        "Pairwise Distance CV",
-        "Max Continuous Corr",
-        "Max Categorical Corr",
-        "score",
-    ]
+    metrics = metrics_df.columns
+    n_metrics = len(metrics)
+    
+    # Calculate grid dimensions
+    n_cols = 3
+    n_rows = (n_metrics + n_cols - 1) // n_cols
+    
+    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows))
+    axes = axes.flatten() if n_metrics > 1 else [axes]  # Handle single subplot case
 
     for i, metric in enumerate(metrics):
-        ax = axes[i // 3, i % 3]
+        ax = axes[i]
         ax.bar(metrics_df["seed"].astype(str), metrics_df[metric])
         ax.set_title(f"{metric} vs Seed")
         ax.set_xlabel("Seed")
         ax.set_ylabel(metric)
         ax.tick_params(axis='x', rotation=45)
         ax.grid(axis="y")
+    
+    # Hide unused subplots
+    for i in range(n_metrics, len(axes)):
+        axes[i].set_visible(False)
 
     plt.tight_layout()
     plt.show()
@@ -1055,6 +1233,7 @@ def evaluate_candidate(
         "Pairwise Distance CV": calculate_pairwise_distance_uniformity(combined_design, continuous_keys, subparam_key),
         "Max Continuous Corr": calculate_max_continuous_correlation(combined_design, continuous_keys, subparam_key),
         "Max Categorical Corr": calculate_max_categorical_correlation(combined_design, categorical_keys),
+        "Max Mixed Corr": calculate_max_mixed_correlation(combined_design, continuous_keys, categorical_keys, subparam_mapping),
     }
 
     return {
@@ -1087,6 +1266,7 @@ def extend_design(
             "Pairwise Distance CV",
             "Max Continuous Corr",
             "Max Categorical Corr",
+            "Max Mixed Corr",
         ]
     if maximize_metrics is None:
         maximize_metrics = [True] + [False] * (len(metrics_to_optimize) - 1)
diff --git a/obsidian/experiment/sampling.py b/obsidian/experiment/sampling.py
new file mode 100644
index 0000000..be4a48a
--- /dev/null
+++ b/obsidian/experiment/sampling.py
@@ -0,0 +1,144 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+def generate_weights(df, n, bias, plot_weights=False, enforce=False):
+    """
+    Generates a Pandas series of weights for each datum given a particular bias.
+
+    df: DataFrame of candidates
+    n:  size of the design to pick
+    bias: dictionary of biases in the format : {"column": [lower_bound, upper_bound, relative_weight]}
+        - Weight >1 increases sampling probability for in-range rows.
+        - Weight <1 decreases it.
+        - Weight = 0 excludes those rows entirely.
+    plot_weights: boolean, whether to plot distribution of weights, default False
+    enforce: boolean, whether to force biases, default False
+
+    Returns: Pandas Series of normalized row weights.
+    """
+    weights = pd.Series(1.0, index=df.index)
+    for col, params in bias.items():
+        lower, upper = params[0], params[1]
+        weight = params[2] if len(params) > 2 else 1.0
+        mask = df[col].between(lower, upper, inclusive="both")
+        if enforce:
+            weights *= mask.astype(float) * weight
+        else:
+            weights *= mask.astype(float) * weight + (~mask).astype(float) * 1.0
+    if enforce:
+        if (weights > 0).sum() < n:
+            raise ValueError(f"Not enough rows ({(weights > 0).sum()}) satisfy all enforce conditions for n={n}.")
+       
+    weights = weights / weights.sum()
+
+    print("Weights min:", weights.min(), "max:", weights.max())
+
+    if plot_weights:
+        plt.figure(figsize=(8, 4))
+        plt.hist(weights, bins=50)
+        plt.title("Distribution of Sampling Weights")
+        plt.xlabel("Weight")
+        plt.ylabel("Count")
+        plt.show()
+
+    return weights
+
+
+def sample_with_bias(df, n, replace=False, seed=None, bias=None, enforce=False, plot_weights=False):
+    """
+    Returns a random Pandas DataFrame sample of data points from a population with or without bias.
+
+    df: DataFrame of candidates
+    n:  int, size of the design to pick
+    replace: boolean, allow or disallow sampling from the same row more than once, default False
+    bias: dictionary of biases in the format : {"column": [lower_bound, upper_bound, relative_weight]}, default None
+        - Weight >1 increases sampling probability for in-range rows.
+        - Weight <1 decreases it.
+        - Weight = 0 excludes those rows entirely.
+    enforce: boolean, whether to force biases, default False
+    plot_weights: boolean, whether to plot distribution of weights, default False
+
+    Returns: Pandas DataFrame of sampled data points.
+    """
+    if bias:
+        w = generate_weights(df, n, bias, plot_weights, enforce)
+        return df.sample(n=n, replace=replace, random_state=seed, weights=w)
+    else:
+        return df.sample(n=n, replace=replace, random_state=seed)
+
+
+def _space_filling_score(Z, metric="hybrid"):
+    """
+    Z: (k, d) standardized features of the candidate sample
+    metric:
+      - "maximin":   maximize the minimum pairwise distance
+      - "mean_nn":   maximize the mean nearest-neighbor distance
+      - "hybrid":    0.6*maximin + 0.4*mean_nn (more stable in practice)
+    """
+    k = Z.shape[0]
+    D = np.sqrt(((Z[:, None, :] - Z[None, :, :])**2).sum(-1))
+    np.fill_diagonal(D, np.inf)
+    d_min = D[np.triu_indices(k, 1)].min()
+    d_mnn = D.min(axis=1).mean()
+    if metric == "maximin":
+        return d_min
+    if metric == "mean_nn":
+        return d_mnn
+    if metric == "hybrid":
+        return 0.6 * d_min + 0.4 * d_mnn
+    raise ValueError("Unknown metric")
+
+
+def best_sample(df, k, feature_cols, *, n_trials=500, bias=None, plot_weights=False, enforce=False,
+                random_state=None, standardize=True, dropna=True, metric="hybrid"):
+    """
+    Repeats random sampling n_trials times and returns the most space-filling sample.
+
+    df: DataFrame of candidates
+    k:  size of the design to pick
+    feature_cols: columns that define “space” (numeric; one-hot encode cats if needed)
+    weights: None | column name | arraylike | Series aligned to df.index
+             (use this to bias, e.g., pH in [6, 6.5] heavier)
+    """
+    base = df[feature_cols]
+    idx = base.dropna().index if dropna else base.index
+    dfv = df.loc[idx]
+    Xfull = base.loc[idx].to_numpy(dtype=float)
+
+    if bias:
+        weights = generate_weights(df, k, bias, plot_weights, enforce)
+    else:
+        weights = None
+
+    # standardize once using the FULL population (not per-trial) for fair geometry
+    if standardize:
+        mu = Xfull.mean(axis=0)
+        sig = Xfull.std(axis=0)
+        sig[sig == 0] = 1.0
+        def toZ(X): return (X - mu) / sig
+    else:
+        def toZ(X): return X
+
+    # prep weights aligned to the filtered df
+    w = None
+    if weights is not None:
+        if isinstance(weights, str):
+            w = dfv[weights]
+        else:
+            w = pd.Series(weights, index=df.index).reindex(dfv.index).fillna(0.0)
+
+    rng = np.random.default_rng(random_state)  # reproducible stream
+    best_df = None
+    best_score = -np.inf
+
+    for _ in range(n_trials):
+        cand = dfv.sample(n=k, replace=False, weights=w, random_state=rng)
+        Z = toZ(cand[feature_cols].to_numpy(dtype=float))
+        s = _space_filling_score(Z, metric=metric)
+        if s > best_score:
+            best_score = s
+            best_df = cand
+
+    return best_df, {"score": best_score, "metric": metric, "n_trials": n_trials}