MSDLLCpapers · fima5 · Aug 13, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 26, 2025
diff --git a/demo/APO Sample Existing Demo.ipynb b/demo/APO Sample Existing Demo.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75f106cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.insert(0, '../')\n",
+    "\n",
+    "print(sys.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "117a382f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import obsidian\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "print(f'obsidian version: ' + obsidian.__version__)\n",
+    "\n",
+    "from obsidian.experiment import AdvExpDesigner\n",
+    "from obsidian.experiment.sampling import sample_with_bias, best_sample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cd0a6b0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#generate random data for this demo\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "n = 1000\n",
+    "demo_data = pd.DataFrame({\n",
+    "    'reagent_conc': np.round(np.random.uniform(0.1, 1.0, n), 2),\n",
+    "    'ionic_strength': np.round(np.random.uniform(10, 100, n), 2),\n",
+    "    'surfactant_conc': np.round(np.random.uniform(0.01, 0.2, n), 3),\n",
+    "    'compound_A': np.round(np.random.uniform(0, 50, n), 2),\n",
+    "    'compound_B': np.round(np.random.uniform(0, 50, n), 2),\n",
+    "    'sugar': np.random.choice(['glucose', 'fructose', 'sucrose'], n),\n",
+    "    'surfactant': np.random.choice(['SDS', 'Tween20', 'TritonX'], n),\n",
+    "    'buffer': np.random.choice(['PBS', 'Tris', 'HEPES'], n),\n",
+    "    'pH': np.round(np.random.uniform(5.5, 8.5, n), 2)\n",
+    "})\n",
+    "\n",
+    "demo_data.index.name = 'FormulationID'\n",
+    "demo_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57ed0226",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Initialize existing experimental data as an AdvExpDesigner object\n",
+    "designer = AdvExpDesigner(design_df=demo_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9feae24b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can sample an existing dataset with or without bias: \n",
+    "Bias dictionary format : {\"column\": [lower_bound, upper_bound, relative_weight]}\n",
+    "\n",
+    "- Weight >1 increases sampling probability for in-range rows.\n",
+    "- Weight <1 decreases it.\n",
+    "- Weight = 0 excludes those rows entirely.\n",
+    "\"\"\"\n",
+    "\n",
+    "bias = {\n",
+    "    \"ionic_strength\": [50, 60, 3.0], \n",
+    "}\n",
+    "\n",
+    "seed = np.random.randint(0,1000)\n",
+    "print(f\"Random seed for reproducibility: {seed}\")\n",
+    "\n",
+    "#We can easily create a random sample of n samples with weights using built in Pandas functions\n",
+    "#enforce = True allows you to force the boundary to be true ; resultant sample may not be space-filling.\n",
+    "sample = sample_with_bias(designer.design, n=1000, replace=False, seed=seed, bias=bias, plot_weights=True, enforce=False)\n",
+    "\n",
+    "sample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab457ed8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#One-hot encode your categorical columns for easy handling in determining Euclidean distance\n",
+    "df_encoded = pd.get_dummies(designer.design, columns=[\"sugar\", \"surfactant\", \"buffer\"], dtype=int) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a46bcd0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "perform random sampling n_trial times, select the best one via criteria metric:\n",
+    "metric:\n",
+    "    - \"maximin\":   maximize the minimum pairwise Euclidean distance\n",
+    "    - \"mean_nn\":   maximize the mean nearest-neighbor Euclidean distance\n",
+    "    - \"hybrid\":    0.6*maximin + 0.4*mean_nn \n",
+    "\"\"\"\n",
+    "seed = np.random.randint(0,1000)\n",
+    "print(f\"Random seed for reproducibility: {seed}\")\n",
+    "\n",
+    "optimal_sample, info = best_sample(\n",
+    "    df_encoded, 10, feature_cols=df_encoded.columns, n_trials=1000,\n",
+    "    bias=bias, plot_weights=True, enforce=False, random_state=seed, metric=\"hybrid\"\n",
+    ")\n",
+    "\n",
+    "print(info)\n",
+    "optimal_sample\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ea1bcd8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#decode from one-hot encoding\n",
+    "normal_cols = list(optimal_sample.columns)[0:6]\n",
+    "encoded_cols = list(optimal_sample.columns)[6:]\n",
+    "decoded = pd.from_dummies(optimal_sample[encoded_cols],sep=\"_\")\n",
+    "optimal_design_decoded = pd.concat([optimal_sample[normal_cols], decoded], axis=1)\n",
+    "optimal_design_decoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2137e315",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(designer.plot_histograms(optimal_design_decoded))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv (3.13.5)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/demo/Advanced Experimental Design.ipynb b/demo/Advanced Experimental Design.ipynb
@@ -33,10 +33,16 @@
     "# Define continuous parameters: key -> (low, high, step)\n",
     "\n",
     "continuous_params = {\n",
-    "    'temperature': (20, 80, 5),          # Linear steps of 5 between 20 and 80\n",
-    "    'concentration': (0.1, 1.0, 0.1),    # Linear steps of 0.1 between 0.1 and 1.0\n",
-    "    'pressure': (1, 16, 'geometric'),    # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
-    "    'time': (10, 1000, 'logarithmic')    # Logarithmic steps (powers of 10) between 10 and 1000\n",
+    "    'temperature': (20, 80, 5),                     # Linear steps of 5 between 20 and 80\n",
+    "    'concentration': (0.1, 1.0, 0.1),               # Linear steps of 0.1 between 0.1 and 1.0\n",
+    "    'pressure': (1, 16, 'geometric'),               # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
+    "    'time': (10, 1000, 'logarithmic'),              # Logarithmic steps (powers of 10) between 10 and 1000\n",
+    "    'flow_rate': [0.5, 1.0, 2.0, 5.0, 10.0],        # Custom discrete levels, equal biases\n",
+    "    'Reagent Concentration': {\n",
+    "        'levels': [1.0, 2.0, 3.0, 5.0, 10.0],       # Custom levels with biased sampling\n",
+    "        'biases': [0.1, 0.2, 0.4, 0.2, 0.1]         # Higher probability for middle values\n",
+    "}\n",
+    "\n",
     "}\n",
     "\n",
     "# Define conditional categorical parameters with subparameters and frequencies: key -> {subkey: {'freq': frequency, 'subparams': ([values], [frequencies])}}\n",
@@ -170,7 +176,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "obsidian",
+   "display_name": ".venv (3.13.5)",
    "language": "python",
    "name": "python3"
   },
@@ -184,7 +190,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,