Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion obsidian/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""obsidian: Automated experiment design and black-box optimization"""
__version__ = '0.8.6'

__version__ = "0.8.6"

# Import key objects
from obsidian.campaign import Campaign
from obsidian.optimizer import BayesianOptimizer
from obsidian.surrogates import SurrogateBoTorch
from obsidian.parameters import ParamSpace, Target
from obsidian.rng import get_global_rng, reset_global_rng, USE_OLD_RNG_CONTROL, is_global_rng_initialized

# Ensure that other subpackages are imported properly for documentation
from obsidian.objectives import Objective
Expand All @@ -14,3 +16,4 @@
import obsidian.exceptions as exceptions
import obsidian.acquisition as acquisition
import obsidian.plotting as plotting
import obsidian.rng as rng
29 changes: 22 additions & 7 deletions obsidian/campaign/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from obsidian.constraints import Output_Constraint, const_class_dict
from obsidian.exceptions import IncompatibleObjectiveError
from obsidian.utils import tensordict_to_dict
from obsidian.rng import GlobalRNG
import obsidian

import pandas as pd
Expand Down Expand Up @@ -48,15 +49,29 @@ def __init__(self,
optimizer: Optimizer | None = None,
designer: ExpDesigner | None = None,
objective: Objective | None = None,
seed: int | None = None):
seed: int | None = None,
global_rng: GlobalRNG | None = None
):

self.set_X_space(X_space)
self.data = pd.DataFrame()

optimizer = BayesianOptimizer(X_space, seed=seed) if optimizer is None else optimizer
if obsidian.USE_OLD_RNG_CONTROL:
optimizer_seed = seed
designer_seed = seed
torch_rng = None
else:
if not global_rng:
global_rng = obsidian.get_global_rng(seed)
# optimizer_seed = global_rng.np_rng
# designer_seed = global_rng.np_rng
optimizer_seed = seed
designer_seed = seed
torch_rng = global_rng.torch_rng
self.global_rng = global_rng
optimizer = BayesianOptimizer(X_space, seed=optimizer_seed) if optimizer is None else optimizer
self.set_optimizer(optimizer)

designer = ExpDesigner(X_space, seed=seed) if designer is None else designer
designer = ExpDesigner(X_space, seed=designer_seed, torch_rng=torch_rng) if designer is None else designer
self.set_designer(designer)

self.set_target(target)
Expand Down Expand Up @@ -284,8 +299,8 @@ def initialize(self, **design_kwargs):
Maps ExpDesigner.initialize method
"""
return self.designer.initialize(**design_kwargs)
def fit(self):

def fit(self, fit_options: dict = {}):
"""
Maps Optimizer.fit method

Expand All @@ -296,7 +311,7 @@ def fit(self):
if self.m_exp <= 0:
raise ValueError('Must register data before fitting')

self.optimizer.fit(self.data, target=self.target)
self.optimizer.fit(self.data, target=self.target, fit_options=fit_options)

def suggest(self, **optim_kwargs):
"""
Expand Down
32 changes: 26 additions & 6 deletions obsidian/campaign/explainer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Explainer Class: Surrogate Model Interpretation Methods"""

import obsidian
from obsidian.parameters import Param_Continuous, ParamSpace
from obsidian.optimizer import Optimizer
from obsidian.exceptions import UnfitError
from obsidian.rng import get_new_seed, with_tmp_seed

import shap
from shap import KernelExplainer, Explanation
Expand All @@ -13,6 +15,7 @@
import torch
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from contextlib import nullcontext


class Explainer():
Expand Down Expand Up @@ -102,15 +105,32 @@ def pred_func(X):
y_pred = self.optimizer.predict(X, return_f_inv=True)
mu = y_pred[y_name+' (pred)'].values
return mu



self.shap['pred_func'] = pred_func
self.shap['explainer'] = KernelExplainer(pred_func, X_ref)
self.shap['X_sample'] = self.X_space.unit_demap(
pd.DataFrame(torch.rand(size=(n, self.X_space.n_dim)).numpy(),
columns=X_ref.columns)
if obsidian.USE_OLD_RNG_CONTROL:
torch_rng = None
# dummy context manager
ctx_mgr = nullcontext()
else:
torch_rng = getattr(self.optimizer, "torch_rng", None)
# temporarily override global random states
tmp_seed: int = get_new_seed(1, torch_rng) # type: ignore
ctx_mgr = with_tmp_seed(tmp_seed)

self.shap["X_sample"] = self.X_space.unit_demap(
pd.DataFrame(
torch.rand(size=(n, self.X_space.n_dim), generator=torch_rng).numpy(),
columns=X_ref.columns,
)
)
with ctx_mgr:
self.shap["values"] = self.shap["explainer"].shap_values(
self.shap["X_sample"],
seed=seed,
l1_reg="aic",
)
self.shap['values'] = self.shap['explainer'].shap_values(self.shap['X_sample'],
seed=seed, l1_reg='aic')
self.shap['explanation'] = Explanation(self.shap['values'], feature_names=X_ref.columns)

return
Expand Down
15 changes: 11 additions & 4 deletions obsidian/experiment/benchmark/optithon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pandas as pd
from scipy.special import gamma
from numpy.random import Generator


def Vm_func(X):
Expand Down Expand Up @@ -61,19 +62,21 @@ def response_2(X):
return E_curve


def OT_simulator(X, addNoise=False):
def OT_simulator(X, addNoise=False, generator=None):
if isinstance(X, pd.DataFrame):
X = X.to_numpy()
if isinstance(X, np.ndarray) and X.ndim == 1:
X = X[np.newaxis, :]

if X.shape[1] != 6:
if not addNoise:
if X.shape[1] != 5 and X.shape[1] != 6:
raise ValueError(f'Input to simulate must have 5 or 6 columns for no noise case, not {X.shape[1]}')
elif X.shape[1] != 6:
raise ValueError(f'Input to simulate must have 6 columns, not {X.shape[1]}')

# Break down the data into 3 parts for separate functions
X123 = X[:, 0:3]
X45 = X[:, 3:5]
X6 = X[:, 5]

# First three cols represented in an enzyme kinetics problem
y1 = response_1(X123)
Expand All @@ -87,8 +90,12 @@ def OT_simulator(X, addNoise=False):
# Final column has a mean 0 effect, but determines the scale of noise
# Note: The overall problem is slightly heteroskedastic
if addNoise:
X6 = X[:, 5]
sd = 0.01+0.02*X6
rel_error = np.random.normal(loc=1, scale=sd, size=y.shape[0])
if isinstance(generator, Generator):
rel_error = generator.normal(loc=1, scale=sd, size=y.shape[0])
else:
rel_error = np.random.normal(loc=1, scale=sd, size=y.shape[0])
y *= rel_error

return y
14 changes: 11 additions & 3 deletions obsidian/experiment/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from obsidian.exceptions import UnsupportedError

from botorch.utils.sampling import draw_sobol_samples
import numpy as np
from numpy.typing import ArrayLike
from scipy.stats import qmc

Expand All @@ -29,12 +30,18 @@ class ExpDesigner:

def __init__(self,
X_space: ParamSpace,
seed: int | None = None):
seed: np.random.Generator | int | None = None,
torch_rng: torch.Generator | None = None
):
if not isinstance(X_space, ParamSpace):
raise TypeError('X_space must be an obsidian ParamSpace object')

self.X_space = X_space
self.seed = seed
if torch_rng:
if not isinstance(torch_rng, torch.Generator):
raise TypeError('torch_rng must be a torch.Generator object')
self.torch_rng = torch_rng

def __repr__(self):
"""String representation of object"""
Expand Down Expand Up @@ -69,9 +76,10 @@ def initialize(self,
seed = self.seed

method_dict = {
# TODO: botorch is moving to SPEC-0007, `seed` will be phased out in favor of `rng`
'LHS': lambda d, m: torch.tensor(
qmc.LatinHypercube(d=d, scramble=False, seed=seed, strength=1, optimization='random-cd').random(n=m)),
'Random': lambda d, m: torch.rand(size=(m, d)),
'Random': lambda d, m: torch.rand(size=(m, d), generator=self.torch_rng),
'Sobol': lambda d, m: draw_sobol_samples(
bounds=torch.tensor([0.0, 1.0]).reshape(2, 1).repeat(1, d), n=m, q=1).squeeze(1),
'Custom': lambda d, m: torch.tensor(sample_custom),
Expand Down Expand Up @@ -109,7 +117,7 @@ def initialize(self,
print(f'The number of initialization experiments ({m}) exceeds the required \
number for the requested design ({m_required}). Filling with randomized experiments.')
excess = m - m_required
sample_add = torch.rand(size=(excess, d))
sample_add = torch.rand(size=(excess, d), generator=self.torch_rng)
sample = torch.vstack((sample, sample_add))

sample = pd.DataFrame(sample.numpy(), columns=self.X_space.X_names)
Expand Down
38 changes: 32 additions & 6 deletions obsidian/experiment/simulator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Simulate virtual experimental data"""

from types import ModuleType, MethodType
from obsidian.parameters import ParamSpace

from typing import Callable
import pandas as pd
import numpy as np
import warnings

from numpy.random import Generator
import obsidian

class Simulator:
"""
Expand Down Expand Up @@ -34,6 +37,8 @@ def __init__(self,
response_function: Callable,
name: str | list[str] = 'Response',
eps: float | list[float] = 0.0,
rng: Generator | int | None = None,
error_function: Callable | None = None,
**kwargs):

if not callable(response_function):
Expand All @@ -45,14 +50,32 @@ def __init__(self,
self.response_function = response_function
self.name = name
self.eps = eps if isinstance(eps, list) else [eps]
if isinstance(rng, Generator):
# use provided numpy generator
self.rng: Generator | ModuleType = rng
else:
self._seed = rng
if obsidian.USE_OLD_RNG_CONTROL:
# no random state control here
self.rng = np.random
else:
# use new global RNG control
# note that if the global RNG is already initialized
# and the seed (rng) is different, a ValueError will be raised
global_rng = obsidian.get_global_rng(rng)
self.rng = global_rng.np_rng
if not error_function:
self.error_function = self._default_gaussian_error
else:
self.error_function = MethodType(error_function, self)
self.kwargs = kwargs

def __repr__(self):
"""String representation of object"""
return f" obsidian Simulator(response_function={self.response_function.__name__}, eps={self.eps})"

def simulate(self,
X_prop: pd.DataFrame) -> np.ndarray:
X_prop: pd.DataFrame) -> pd.DataFrame:
"""
Generates a response to a set of experiments.

Expand All @@ -70,15 +93,14 @@ def simulate(self,

y_sim = self.response_function(X)

# Apply error
# Expand length of eps to match number of outputs
if len(self.eps) == 1:
self.eps *= y_sim.ndim
if y_sim.ndim == 1:
y_sim = y_sim.reshape(-1, 1)
for i in range(y_sim.shape[1]):
rel_error = np.random.normal(loc=1, scale=self.eps[i], size=y_sim.shape[0])
y_sim[:, i] *= rel_error

# Apply error
y_sim = self.error_function(X, y_sim)

# Handle naming conventions
y_dims = y_sim.shape[1]
Expand All @@ -95,3 +117,7 @@ def simulate(self,
df_sim = pd.DataFrame(y_sim, columns=self.name)

return df_sim

def _default_gaussian_error(self, X, y_sim: np.ndarray) -> np.ndarray:
rel_error = 1 + self.eps * self.rng.normal(size=y_sim.size).reshape(y_sim.shape)
return y_sim * rel_error
19 changes: 14 additions & 5 deletions obsidian/experiment/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
from obsidian.exceptions import UnsupportedError

import numpy as np
from numpy.random import Generator
from itertools import product
from types import ModuleType


def factorial_DOE(d: int,
n_CP: int = 3,
shuffle: bool = True,
seed: int | None = None,
seed: Generator | int | None = None,
full: bool = False):
"""
Creates a statistically designed factorial experiment (DOE).
Expand All @@ -22,7 +24,7 @@ def factorial_DOE(d: int,
uncertainty and curvature. Default is ``3``.
shuffle (bool, optional): Whether or not to shuffle the design or leave them in the default run
order. Default is ``True``.
seed (int, optional): Randomization seed. Default is ``None``.
seed (Generator | int | None, optional): Randomization seed or generator for numpy. Default is ``None``.
full (bool, optional): Whether or not to run the full DOE. Default is ``False``, which
will lead to an efficient Res4+ design.

Expand Down Expand Up @@ -72,10 +74,17 @@ def factorial_DOE(d: int,

# Add centerpoints then shuffle
X = np.vstack((X, CP))
if seed is not None:
np.random.seed(seed)
if isinstance(seed, Generator):
# if it is actually a generator
rng: Generator | ModuleType = seed
else:
# with or without a seed, we use np.random for now
# in the future, we should instantiate a new random generator from entropy
rng = np.random
if seed is not None:
np.random.seed(seed)
if shuffle:
np.random.shuffle(X)
rng.shuffle(X)
# Rescale from (-1,1) to (0,0.999999)
X = (X+1)/2 - 1e-6

Expand Down
Loading
Loading