Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions demo/APO Sample Existing Demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "75f106cc",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '../')\n",
"\n",
"print(sys.path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "117a382f",
"metadata": {},
"outputs": [],
"source": [
"import obsidian\n",
"import pandas as pd\n",
"import numpy as np\n",
"print(f'obsidian version: ' + obsidian.__version__)\n",
"\n",
"from obsidian.experiment import AdvExpDesigner\n",
"from obsidian.experiment.sampling import sample_with_bias, best_sample"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cd0a6b0",
"metadata": {},
"outputs": [],
"source": [
"#generate random data for this demo\n",
"np.random.seed(42)\n",
"\n",
"n = 1000\n",
"demo_data = pd.DataFrame({\n",
" 'reagent_conc': np.round(np.random.uniform(0.1, 1.0, n), 2),\n",
" 'ionic_strength': np.round(np.random.uniform(10, 100, n), 2),\n",
" 'surfactant_conc': np.round(np.random.uniform(0.01, 0.2, n), 3),\n",
" 'compound_A': np.round(np.random.uniform(0, 50, n), 2),\n",
" 'compound_B': np.round(np.random.uniform(0, 50, n), 2),\n",
" 'sugar': np.random.choice(['glucose', 'fructose', 'sucrose'], n),\n",
" 'surfactant': np.random.choice(['SDS', 'Tween20', 'TritonX'], n),\n",
" 'buffer': np.random.choice(['PBS', 'Tris', 'HEPES'], n),\n",
" 'pH': np.round(np.random.uniform(5.5, 8.5, n), 2)\n",
"})\n",
"\n",
"demo_data.index.name = 'FormulationID'\n",
"demo_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57ed0226",
"metadata": {},
"outputs": [],
"source": [
"#Initialize existing experimental data as an AdvExpDesigner object\n",
"designer = AdvExpDesigner(design_df=demo_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9feae24b",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"You can sample an existing dataset with or without bias: \n",
"Bias dictionary format : {\"column\": [lower_bound, upper_bound, relative_weight]}\n",
"\n",
"- Weight >1 increases sampling probability for in-range rows.\n",
"- Weight <1 decreases it.\n",
"- Weight = 0 excludes those rows entirely.\n",
"\"\"\"\n",
"\n",
"bias = {\n",
" \"ionic_strength\": [50, 60, 3.0], \n",
"}\n",
"\n",
"seed = np.random.randint(0,1000)\n",
"print(f\"Random seed for reproducibility: {seed}\")\n",
"\n",
"#We can easily create a random sample of n samples with weights using built in Pandas functions\n",
"#enforce = True allows you to force the boundary to be true ; resultant sample may not be space-filling.\n",
"sample = sample_with_bias(designer.design, n=1000, replace=False, seed=seed, bias=bias, plot_weights=True, enforce=False)\n",
"\n",
"sample"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab457ed8",
"metadata": {},
"outputs": [],
"source": [
"#One-hot encode your categorical columns for easy handling in determining Euclidean distance\n",
"df_encoded = pd.get_dummies(designer.design, columns=[\"sugar\", \"surfactant\", \"buffer\"], dtype=int) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a46bcd0",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"perform random sampling n_trial times, select the best one via criteria metric:\n",
"metric:\n",
" - \"maximin\": maximize the minimum pairwise Euclidean distance\n",
" - \"mean_nn\": maximize the mean nearest-neighbor Euclidean distance\n",
" - \"hybrid\": 0.6*maximin + 0.4*mean_nn \n",
"\"\"\"\n",
"seed = np.random.randint(0,1000)\n",
"print(f\"Random seed for reproducibility: {seed}\")\n",
"\n",
"optimal_sample, info = best_sample(\n",
" df_encoded, 10, feature_cols=df_encoded.columns, n_trials=1000,\n",
" bias=bias, plot_weights=True, enforce=False, random_state=seed, metric=\"hybrid\"\n",
")\n",
"\n",
"print(info)\n",
"optimal_sample\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ea1bcd8",
"metadata": {},
"outputs": [],
"source": [
"#decode from one-hot encoding\n",
"normal_cols = list(optimal_sample.columns)[0:6]\n",
"encoded_cols = list(optimal_sample.columns)[6:]\n",
"decoded = pd.from_dummies(optimal_sample[encoded_cols],sep=\"_\")\n",
"optimal_design_decoded = pd.concat([optimal_sample[normal_cols], decoded], axis=1)\n",
"optimal_design_decoded"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2137e315",
"metadata": {},
"outputs": [],
"source": [
"print(designer.plot_histograms(optimal_design_decoded))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv (3.13.5)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
18 changes: 12 additions & 6 deletions demo/Advanced Experimental Design.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,16 @@
"# Define continuous parameters: key -> (low, high, step)\n",
"\n",
"continuous_params = {\n",
" 'temperature': (20, 80, 5), # Linear steps of 5 between 20 and 80\n",
" 'concentration': (0.1, 1.0, 0.1), # Linear steps of 0.1 between 0.1 and 1.0\n",
" 'pressure': (1, 16, 'geometric'), # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
" 'time': (10, 1000, 'logarithmic') # Logarithmic steps (powers of 10) between 10 and 1000\n",
" 'temperature': (20, 80, 5), # Linear steps of 5 between 20 and 80\n",
" 'concentration': (0.1, 1.0, 0.1), # Linear steps of 0.1 between 0.1 and 1.0\n",
" 'pressure': (1, 16, 'geometric'), # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
" 'time': (10, 1000, 'logarithmic'), # Logarithmic steps (powers of 10) between 10 and 1000\n",
" 'flow_rate': [0.5, 1.0, 2.0, 5.0, 10.0], # Custom discrete levels, equal biases\n",
" 'Reagent Concentration': {\n",
" 'levels': [1.0, 2.0, 3.0, 5.0, 10.0], # Custom levels with biased sampling\n",
" 'biases': [0.1, 0.2, 0.4, 0.2, 0.1] # Higher probability for middle values\n",
"}\n",
"\n",
"}\n",
"\n",
"# Define conditional categorical parameters with subparameters and frequencies: key -> {subkey: {'freq': frequency, 'subparams': ([values], [frequencies])}}\n",
Expand Down Expand Up @@ -170,7 +176,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "obsidian",
"display_name": ".venv (3.13.5)",
"language": "python",
"name": "python3"
},
Expand All @@ -184,7 +190,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down
Loading
Loading