diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml index 7c2bc7b..389c90f 100644 --- a/.github/workflows/continuous_delivery.yml +++ b/.github/workflows/continuous_delivery.yml @@ -25,8 +25,13 @@ jobs: - name: Install Poetry run: | - curl -sSL https://install.python-poetry.org | python3 - - export PATH="$HOME/.poetry/bin:$PATH" + if [[ "${{ matrix.python-version }}" == "3.8" ]]; then + curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0 + export PATH="$HOME/.poetry/bin:$PATH" + else + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.poetry/bin:$PATH" + fi - name: Backup pyproject.toml run: cp pyproject.toml pyproject.toml.bak - name: Install project dependencies with Poetry diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 6c3cdac..8311113 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -33,8 +33,13 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Poetry run: | - curl -sSL https://install.python-poetry.org | python3 - - export PATH="$HOME/.poetry/bin:$PATH" + if [[ "${{ matrix.python-version }}" == "3.8" ]]; then + curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0 + export PATH="$HOME/.poetry/bin:$PATH" + else + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.poetry/bin:$PATH" + fi - name: Install project dependencies with Poetry run: | poetry add pydantic@${{ matrix.pydantic-version }} diff --git a/.gitignore b/.gitignore index c206dc6..81e30ec 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,6 @@ MANIFEST # poetry poetry.lock + +# qodo gen +.qodo diff --git a/pybandits/base.py b/pybandits/base.py index 4cae4ad..145c6f1 100644 --- a/pybandits/base.py +++ b/pybandits/base.py @@ -21,7 +21,9 @@ # SOFTWARE. -from typing import Any, Dict, List, NewType, Tuple, Union +from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union + +from typing_extensions import Self from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, @@ -34,24 +36,52 @@ ) ActionId = NewType("ActionId", constr(min_length=1)) +QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]] +UnifiedActionId = Union[ActionId, QuantitativeActionId] Float01 = NewType("Float_0_1", confloat(ge=0, le=1)) Probability = NewType("Probability", Float01) +ProbabilityWeight = Tuple[Probability, float] +MOProbability = List[Probability] +MOProbabilityWeight = List[ProbabilityWeight] +# QuantitativeProbability generalizes probability to include both action quantities and their associated probability +QuantitativeProbability = Tuple[Tuple[Tuple[Float01, ...], Probability], ...] +QuantitativeProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], ProbabilityWeight], ...] +QuantitativeMOProbability = Tuple[Tuple[Tuple[Float01, ...], List[Probability]], ...] +QuantitativeMOProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], List[ProbabilityWeight]], ...] +UnifiedProbability = Union[Probability, QuantitativeProbability] +UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight] +UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability] +UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight] # SmabPredictions is a tuple of two lists: the first list contains the selected action ids, # and the second list contains their associated probabilities -SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +SmabPredictions = NewType( + "SmabPredictions", + Tuple[ + List[UnifiedActionId], + Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]], + ], +) # CmabPredictions is a tuple of three lists: the first list contains the selected action ids, # the second list contains their associated probabilities, # and the third list contains their associated weighted sums CmabPredictions = NewType( - "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]] + "CmabPredictions", + Union[ + Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]], + Tuple[ + List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]] + ], + ], ) Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions]) BinaryReward = NewType("BinaryReward", conint(ge=0, le=1)) ActionRewardLikelihood = NewType( "ActionRewardLikelihood", - Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], + Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]], ) +Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]] ACTION_IDS_PREFIX = "action_ids_" +QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}" class _classproperty(property): @@ -74,6 +104,18 @@ def __init__(self, **data): def model_post_init(self, __context: Any) -> None: pass + def _validate_params_lengths( + self, + **kwargs, + ): + """ + Verify that the given keyword arguments have the same length. + """ + reference = len(next(iter(kwargs.values()))) + for k, v in kwargs.items(): + if v is not None and len(v) != reference: + raise AttributeError(f"Shape mismatch: {k} should have the same length as the other parameters.") + def _apply_version_adjusted_method(self, v2_method_name: str, v1_method_name: str, **kwargs) -> Any: """ Apply the method with the given name, adjusting for the pydantic version. @@ -109,3 +151,46 @@ def model_fields(cls) -> Dict[str, Any]: The model fields. """ return cls.__fields__ + + def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self: + """ + Create a new instance of the model with the same quantities. + + Parameters + ---------- + update : Mapping[str, Any], optional + The quantities to update, by default None + + deep : bool, optional + Whether to copy the quantities deeply, by default False + + Returns + ------- + Self + The new instance of the model. + """ + return self.copy(update=update, deep=deep) + + @classmethod + def model_validate( + cls, + obj: Any, + ) -> Self: + """ + Validate a PyBandits BaseModel model instance. + + Parameters + ---------- + obj : Any + The object to validate. Use state dictionary to generate model from state. + + Raises + ------ + ValidationError: If the object could not be validated. + + Returns + ------- + Self + The validated model instance. + """ + return cls.parse_obj(obj) diff --git a/pybandits/base_model.py b/pybandits/base_model.py new file mode 100644 index 0000000..030e8c5 --- /dev/null +++ b/pybandits/base_model.py @@ -0,0 +1,128 @@ +from abc import ABC, abstractmethod +from typing import Callable, List, Union + +import numpy as np + +from pybandits.base import ( + BinaryReward, + MOProbability, + Probability, + ProbabilityWeight, + PyBanditsBaseModel, + QuantitativeMOProbability, + QuantitativeProbability, + QuantitativeProbabilityWeight, +) +from pybandits.pydantic_version_compatibility import NonNegativeFloat + + +class BaseModel(PyBanditsBaseModel, ABC): + """ + Class to model the prior distributions of standard actions and quantitative actions. + """ + + @abstractmethod + def sample_proba( + self, **kwargs + ) -> Union[ + List[Probability], + List[MOProbability], + List[ProbabilityWeight], + List[QuantitativeProbability], + List[QuantitativeMOProbability], + List[QuantitativeProbabilityWeight], + ]: + """ + Sample the probability of getting a positive reward. + """ + + @abstractmethod + def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + rewards : Union[List[BinaryReward], List[List[BinaryReward]]], + if nested list, len() should follow shape of (n_samples, n_objectives) + The binary reward for each sample. + If strategy is not MultiObjectiveBandit, rewards should be a list, e.g. + rewards = [1, 0, 1, 1, 1, ...] + If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): + rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + """ + + +class BaseModelSO(BaseModel, ABC): + """ + Class to model the prior distributions of standard actions and quantitative actions for single objective. + """ + + @abstractmethod + def sample_proba( + self, **kwargs + ) -> Union[ + List[Probability], List[ProbabilityWeight], List[QuantitativeProbability], List[QuantitativeProbabilityWeight] + ]: + """ + Sample the probability of getting a positive reward. + """ + + @abstractmethod + def update(self, rewards: List[BinaryReward], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + rewards : List[BinaryReward], + The binary reward for each sample. + """ + + +class BaseModelMO(BaseModel, ABC): + """ + Class to model the prior distributions of standard actions and quantitative actions for multi-objective. + + Parameters + ---------- + models : List[BaseModelSO] + The list of models for each objective. + """ + + models: List[BaseModelSO] + + @abstractmethod + def sample_proba(self, **kwargs) -> Union[List[MOProbability], List[QuantitativeMOProbability]]: + """ + Sample the probability of getting a positive reward. + """ + + @abstractmethod + def update(self, rewards: List[List[BinaryReward]], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + rewards : List[List[BinaryReward]], + if nested list, len() should follow shape of (n_samples, n_objectives) + The binary rewards for each sample. + If strategy is not MultiObjectiveBandit, rewards should be a list, e.g. + rewards = [1, 0, 1, 1, 1, ...] + If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): + rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + """ + + +class BaseModelCC(PyBanditsBaseModel, ABC): + """ + Class to model action cost. + + Parameters + ---------- + cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]] + Cost associated to the Beta distribution. + """ + + cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]] diff --git a/pybandits/cmab.py b/pybandits/cmab.py index 9b405a1..aae5d1b 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -19,17 +19,22 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - +from collections import defaultdict from typing import Dict, List, Optional, Set, Union from numpy import array -from numpy.random import choice from numpy.typing import ArrayLike -from pybandits.base import ActionId, BinaryReward, CmabPredictions +from pybandits.base import ( + ActionId, + BinaryReward, + CmabPredictions, + UnifiedActionId, +) from pybandits.mab import BaseMab -from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC +from pybandits.model import BaseBayesianLogisticRegression, BayesianLogisticRegression, BayesianLogisticRegressionCC from pybandits.pydantic_version_compatibility import field_validator, validate_call +from pybandits.quantitative_model import BaseCmabZoomingModel, CmabZoomingModel, CmabZoomingModelCC from pybandits.strategy import ( BestActionIdentificationBandit, ClassicBandit, @@ -43,35 +48,32 @@ class BaseCmabBernoulli(BaseMab): Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegression] + actions : Dict[ActionId, Union[BaseBayesianLogisticRegression, BaseCmabZoomingModel]] The list of possible actions, and their associated Model. - strategy: Strategy + strategy : Strategy The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums. - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegression] - predict_with_proba: bool - predict_actions_randomly: bool + actions: Dict[ActionId, Union[BaseBayesianLogisticRegression, BaseCmabZoomingModel]] + _predict_with_proba: bool + + @staticmethod + def _maybe_crawl_model(model: Union[BaseBayesianLogisticRegression, BaseCmabZoomingModel]): + return list(model.sub_actions.values())[0] if isinstance(model, BaseCmabZoomingModel) else model @field_validator("actions", mode="after") @classmethod - def check_bayesian_logistic_regression_models(cls, v): + def check_models(cls, v): action_models = list(v.values()) first_action = action_models[0] - first_action_type = type(first_action) + test_first_action = cls._maybe_crawl_model(first_action) for action in action_models[1:]: - if not isinstance(action, first_action_type): - raise AttributeError("All actions should follow the same type.") - if not len(action.betas) == len(first_action.betas): + test_action = cls._maybe_crawl_model(action) + if not len(test_action.betas) == len(test_first_action.betas): raise AttributeError("All actions should have the same number of betas.") - if not action.update_method == first_action.update_method: + if not test_action.update_method == test_first_action.update_method: raise AttributeError("All actions should have the same update method.") - if not action.update_kwargs == first_action.update_kwargs: + if not test_action.update_kwargs == test_first_action.update_kwargs: raise AttributeError("All actions should have the same update kwargs.") return v @@ -95,14 +97,13 @@ def predict( Returns ------- - actions: List[ActionId] of shape (n_samples,) + actions: List[ActionId] The actions selected by the multi-armed bandit model. - probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + probs: Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]] The probabilities of getting a positive reward for each action. - ws : List[Dict[ActionId, float]] + ws : Union[List[Dict[UnifiedActionId, float]], List[Dict[UnifiedActionId, List[float]]]] The weighted sum of logistic regression logits. """ - valid_actions = self._get_valid_actions(forbidden_actions) # cast inputs to numpy arrays to facilitate their manipulation context = array(context) @@ -110,47 +111,35 @@ def predict( if len(context) < 1: raise AttributeError("Context must have at least one row") - if self.predict_actions_randomly: - # check that context has the expected number of columns - if context.shape[1] != len(list(self.actions.values())[0].betas): - raise AttributeError("Context must have {n_betas} columns") + # p is a dict of the sampled probability "prob" and weighted_sum "ws", e.g. + # + # p = {'a1': ([0.5, 0.2, 0.3], [200, 100, 130]), 'a2': ([0.4, 0.5, 0.6], [180, 200, 230]), ...} + # | | | | + # prob ws prob ws + probs_weights = self._get_action_probabilities(forbidden_actions=forbidden_actions, context=context) - selected_actions = choice(list(valid_actions), size=len(context)).tolist() # predict actions randomly - probs = len(context) * [{k: 0.5 for k in valid_actions}] # all probs are set to 0.5 - weighted_sums = len(context) * [{k: 0 for k in valid_actions}] # all weighted sum are set to 1 - else: - # p is a dict of the sampled probability "prob" and weighted_sum "ws", e.g. - # - # p = {'a1': ([0.5, 0.2, 0.3], [200, 100, 130]), 'a2': ([0.4, 0.5, 0.6], [180, 200, 230]), ...} - # | | | | - # prob ws prob ws - p = { - action: model.sample_proba(context=context) # sample probabilities for the entire context matrix - for action, model in self.actions.items() - if action in valid_actions - } - - prob = {a: x[0] for a, x in p.items()} # e.g. prob = {'a1': [0.5, 0.4, ...], 'a2': [0.4, 0.3, ...], ...} - ws = {a: x[1] for a, x in p.items()} # e.g. ws = {'a1': [200, 100, ...], 'a2': [100, 50, ...], ...} - - # select either "prob" or "ws" to use as input argument in select_actions() - p_to_select_action = prob if self.predict_with_proba else ws - - # predict actions, probs, weighted_sums - selected_actions = [ - self._select_epsilon_greedy_action( - p={a: p_to_select_action[a][i] for a in p_to_select_action}, actions=self.actions - ) - for i in range(len(context)) - ] - probs = [{a: prob[a][i] for a in prob} for i in range(len(context))] - weighted_sums = [{a: ws[a][i] for a in ws} for i in range(len(context))] + probs = [ + {a: x[0] for a, x in prob_weight.items()} for prob_weight in probs_weights + ] # e.g. prob = {'a1': [0.5, 0.4, ...], 'a2': [0.4, 0.3, ...], ...} + weighted_sums = [ + {a: x[1] for a, x in prob_weight.items()} for prob_weight in probs_weights + ] # e.g. ws = {'a1': [200, 100, ...], 'a2': [100, 50, ...], ...} + + # select either "prob" or "ws" to use as input argument in select_actions() + p_to_select_action = probs if self._predict_with_proba else weighted_sums + + # predict actions, probs, weighted_sums + selected_actions = [self._select_epsilon_greedy_action(p=p, actions=self.actions) for p in p_to_select_action] return selected_actions, probs, weighted_sums @validate_call(config=dict(arbitrary_types_allowed=True)) - def update( - self, context: ArrayLike, actions: List[ActionId], rewards: List[Union[BinaryReward, List[BinaryReward]]] + def _update( + self, + actions: List[UnifiedActionId], + rewards: List[Union[BinaryReward, List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + context: ArrayLike, ): """ Update the contextual Bernoulli bandit given the list of selected actions and their corresponding binary @@ -158,9 +147,8 @@ def update( Parameters ---------- - context: ArrayLike of shape (n_samples, n_features) - Matrix of contextual features. - actions : List[ActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5'] + + actions : List[UnifiedActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5'] The selected action for each sample. rewards : List[Union[BinaryReward, List[BinaryReward]]] of shape (n_samples, n_objectives) The binary reward for each sample. @@ -168,50 +156,55 @@ def update( rewards = [1, 0, 1, 1, 1, ...] If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + quantities : Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + context: ArrayLike of shape (n_samples, n_features) + Matrix of contextual features. """ - self._validate_update_params(actions=actions, rewards=rewards) - if len(context) != len(rewards): - raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") + context = array(context) # cast inputs to numpy arrays to facilitate their manipulation - # cast inputs to numpy arrays to facilitate their manipulation - context, actions, rewards = array(context), array(actions), array(rewards) - - for a in set(actions): - # get context and rewards of the samples associated to action a - context_of_a = context[actions == a] - rewards_of_a = rewards[actions == a].tolist() + rewards_dict = defaultdict(list) - # update model associated to action a - self.actions[a].update(context=context_of_a, rewards=rewards_of_a) - - # always set predict_actions_randomly after update - self.predict_actions_randomly = False + if quantities is None: + for a, r in zip(actions, rewards): + rewards_dict[a].append(r) + for a in set(actions): + mask = [action == a for action in actions] + self.actions[a].update(context=context[mask], rewards=rewards_dict[a]) + else: + quantities_dict = defaultdict(list) + for a, v, r in zip(actions, quantities, rewards): + if v is not None: + quantities_dict[a].append(v) + rewards_dict[a].append(r) + for a in set(actions): + mask = [action == a for action in actions] + if quantities_dict[a]: # quantitative action + self.actions[a].update( + context=context[mask], rewards=rewards_dict[a], quantities=quantities_dict[a] + ) + else: # non-quantitative action + self.actions[a].update(context=context[mask], rewards=rewards_dict[a]) class CmabBernoulli(BaseCmabBernoulli): """ - Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling. + Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling. Reference: Thompson Sampling for Contextual Bandits with Linear Payoffs (Agrawal and Goyal, 2014) https://arxiv.org/pdf/1209.3352.pdf Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] The list of possible actions, and their associated Model. strategy: ClassicBandit The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] strategy: ClassicBandit - predict_with_proba: bool = False - predict_actions_randomly: bool = False + _predict_with_proba: bool = False class CmabBernoulliBAI(BaseCmabBernoulli): @@ -223,21 +216,15 @@ class CmabBernoulliBAI(BaseCmabBernoulli): Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] The list of possible actions, and their associated Model. strategy: BestActionIdentificationBandit The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] strategy: BestActionIdentificationBandit - predict_with_proba: bool = False - predict_actions_randomly: bool = False + _predict_with_proba: bool = False class CmabBernoulliCC(BaseCmabBernoulli): @@ -257,18 +244,12 @@ class CmabBernoulliCC(BaseCmabBernoulli): Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegressionCC] + actions: Dict[ActionId, Union[BayesianLogisticRegressionCC, CmabZoomingModelCC]] The list of possible actions, and their associated Model. strategy: CostControlBandit The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegressionCC] + actions: Dict[ActionId, Union[BayesianLogisticRegressionCC, CmabZoomingModelCC]] strategy: CostControlBandit - predict_with_proba: bool = True - predict_actions_randomly: bool = False + _predict_with_proba: bool = True diff --git a/pybandits/cmab_simulator.py b/pybandits/cmab_simulator.py index be3fb4a..60b17a1 100644 --- a/pybandits/cmab_simulator.py +++ b/pybandits/cmab_simulator.py @@ -21,15 +21,23 @@ # SOFTWARE. import random -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union import numpy as np import pandas as pd -from pybandits.base import ActionId, BinaryReward +from pybandits.base import ActionId, BinaryReward, Probability, UnifiedActionId from pybandits.cmab import BaseCmabBernoulli from pybandits.pydantic_version_compatibility import Field, model_validator -from pybandits.simulator import Simulator +from pybandits.quantitative_model import QuantitativeModel +from pybandits.simulator import ( + DoubleParametricActionProbability, + ParametricActionProbability, + Simulator, +) + +CmabProbabilityValue = Union[ParametricActionProbability, DoubleParametricActionProbability] +CmabActionProbabilityGroundTruth = Dict[ActionId, CmabProbabilityValue] class CmabSimulator(Simulator): @@ -52,6 +60,7 @@ class CmabSimulator(Simulator): If not supplied, all samples are assigned to the group. """ + probs_reward: Optional[Union[CmabActionProbabilityGroundTruth, Dict[str, CmabActionProbabilityGroundTruth]]] = None mab: BaseCmabBernoulli = Field(validation_alias="cmab") context: np.ndarray group: Optional[List] = None @@ -74,32 +83,51 @@ def replace_nulls_and_validate_sizes_and_dtypes(cls, values): if len(context) != len(group): raise ValueError("Mismatch between context length and group length") values["group"] = [str(g) for g in group] - mab_action_ids = list(values["mab"].actions.keys()) - index = list(set(group)) probs_reward = cls._get_value_with_default("probs_reward", values) if probs_reward is None: - probs_reward = pd.DataFrame(0.5, index=index, columns=mab_action_ids) + probs_reward = { + g: { + action: cls._generate_prob_reward(values["context"].shape[1], model.dimension) + if isinstance(model, QuantitativeModel) + else cls._generate_prob_reward(values["context"].shape[1]) + for action, model in values["mab"].actions.items() + } + for g in set(group) + } values["probs_reward"] = probs_reward else: - if probs_reward.shape[0] != len(index): + if probs_reward.shape[0] != len(set(group)): raise ValueError("number of probs_reward rows must match the number of groups.") return values + @model_validator(mode="before") + @classmethod + def validate_probs_reward_columns(cls, values): + if "probs_reward" in values and values["probs_reward"] is not None: + groups = set(values["group"]) + if set(values["probs_reward"].keys()) != groups: + raise ValueError("probs_reward keys must match groups.") + for v in values["probs_reward"].values(): + cls._validate_probs_reward_dict(v, values["mab"].actions) + return values + def _initialize_results(self): """ Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results. """ self._results = pd.DataFrame( - columns=["action", "reward", "group", "selected_prob_reward", "max_prob_reward"], + columns=["action", "reward", "quantities", "group", "selected_prob_reward", "max_prob_reward"], ) - def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + def _draw_rewards( + self, actions: List[UnifiedActionId], metadata: Dict[str, List], update_kwargs: Dict[str, np.ndarray] + ) -> List[BinaryReward]: """ Draw rewards for the selected actions based on metadata according to probs_reward Parameters ---------- - actions : List[ActionId] + actions : List[UnifiedActionId] The actions selected by the multi-armed bandit model. metadata : Dict[str, List] The metadata for the selected actions; should contain the batch groups association. @@ -109,9 +137,38 @@ def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> L reward : List[BinaryReward] A list of binary rewards. """ - rewards = [int(random.random() < self.probs_reward.loc[g, a]) for g, a in zip(metadata["group"], actions)] + rewards = [ + int(random.random() < self._extract_ground_truth(a, g, c)) + for g, a, c in zip(metadata["group"], actions, update_kwargs["context"]) + ] return rewards + def _extract_ground_truth(self, action: UnifiedActionId, group: str, context: np.ndarray) -> Probability: + """ + Extract the ground truth probability for the action. + + Parameters + ---------- + action : UnifiedActionId + The action for which the ground truth probability is extracted. + group : str + The group to which the action was applied. + context : np.ndarray + The context for the action. + + Returns + ------- + Probability + The ground truth probability for the action. + """ + return ( + self.probs_reward[group][action[0]](context, np.array(action[1])) + if isinstance(action, tuple) and action[1] is not None + else self.probs_reward[group][action[0]](context) + if isinstance(action, tuple) + else self.probs_reward[group][action](context) + ) + def _get_batch_step_kwargs_and_metadata( self, batch_index ) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray], Dict[str, List]]: @@ -139,35 +196,38 @@ def _get_batch_step_kwargs_and_metadata( metadata = {"group": self.group[idx_batch_min:idx_batch_max]} return predict_and_update_kwargs, predict_and_update_kwargs, metadata - def _finalize_step(self, batch_results: pd.DataFrame): + def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, np.ndarray]): """ Finalize the step by adding additional information to the batch results. Parameters ---------- batch_results : pd.DataFrame - raw batch results + Raw batch results + update_kwargs : Dict[str, np.ndarray] + Context for the batch Returns ------- batch_results : pd.DataFrame - batch results with added reward probability for selected action and most rewarding action + Batch results with added reward probability for selected action and most rewarding action """ group_id = batch_results.loc[:, "group"] action_id = batch_results.loc[:, "action"] - selected_prob_reward = [self.probs_reward.loc[g, a] for g, a in zip(group_id, action_id)] + quantity = batch_results.loc[:, "quantities"] + selected_prob_reward = [ + self._extract_ground_truth((a, q), g, c) + for a, q, g, c in zip(action_id, quantity, group_id, update_kwargs["context"]) + ] batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward - max_prob_reward = self.probs_reward.loc[group_id].max(axis=1) - batch_results.loc[:, "max_prob_reward"] = max_prob_reward.tolist() + max_prob_reward = [ + max( + self._maximize_prob_reward((lambda q: self.probs_reward[g][a](c, q)), m.dimension) + if isinstance(m, QuantitativeModel) + else self.probs_reward[g][a](c) + for a, m in self.mab.actions.items() + ) + for g, c in zip(group_id, update_kwargs["context"]) + ] + batch_results.loc[:, "max_prob_reward"] = max_prob_reward return batch_results - - def _finalize_results(self): - """ - Finalize the simulation process. Used to add regret and cumulative regret - - Returns - ------- - None - """ - self._results["regret"] = self._results["max_prob_reward"] - self._results["selected_prob_reward"] - self._results["cum_regret"] = self._results["regret"].cumsum() diff --git a/pybandits/mab.py b/pybandits/mab.py index 38c83b5..48eb7de 100644 --- a/pybandits/mab.py +++ b/pybandits/mab.py @@ -23,20 +23,28 @@ import warnings from abc import ABC, abstractmethod from collections import defaultdict -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, get_args +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, get_args, get_origin import numpy as np from pybandits.base import ( ACTION_IDS_PREFIX, + QUANTITATIVE_ACTION_IDS_PREFIX, ActionId, ActionRewardLikelihood, BinaryReward, Float01, + MOProbability, + MOProbabilityWeight, Predictions, + Probability, + ProbabilityWeight, PyBanditsBaseModel, + Serializable, + UnifiedActionId, ) -from pybandits.model import Model +from pybandits.base_model import BaseModel +from pybandits.model import Model, ModelMO from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, PYDANTIC_VERSION_2, @@ -45,8 +53,9 @@ pydantic_version, validate_call, ) +from pybandits.quantitative_model import QuantitativeModel from pybandits.strategy import Strategy -from pybandits.utils import extract_argument_names_from_function +from pybandits.utils import extract_argument_names class BaseMab(PyBanditsBaseModel, ABC): @@ -69,14 +78,14 @@ class BaseMab(PyBanditsBaseModel, ABC): which in turn will be used to instantiate the strategy. """ - actions: Dict[ActionId, Model] + actions: Dict[ActionId, BaseModel] strategy: Strategy epsilon: Optional[Float01] = None - default_action: Optional[ActionId] = None + default_action: Optional[UnifiedActionId] = None def __init__( self, - actions: Dict[ActionId, Model], + actions: Dict[ActionId, BaseModel], epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, **strategy_kwargs, @@ -101,12 +110,6 @@ def at_least_one_action_is_defined(cls, v): raise AttributeError("At least one action should be defined.") elif len(v) == 1: warnings.warn("Only a single action was supplied. This MAB will be deterministic.") - # validate that all actions are of the same configuration - action_models = list(v.values()) - first_action = action_models[0] - first_action_type = type(first_action) - if any(not isinstance(action, first_action_type) for action in action_models[1:]): - raise AttributeError("All actions should follow the same type.") return v if pydantic_version == PYDANTIC_VERSION_1: @@ -120,6 +123,18 @@ def check_default_action(cls, values): raise AttributeError("A default action should only be defined when epsilon is defined.") if default_action and default_action not in values["actions"]: raise AttributeError("The default action must be valid action defined in the actions set.") + if ( + default_action + and isinstance(default_action, tuple) + and not isinstance(values["actions"][default_action[0]], QuantitativeModel) + ): + raise AttributeError("Quantitative default action requires a quantitative action model.") + if ( + default_action + and isinstance(default_action, str) + and not isinstance(values["actions"][default_action], (Model, ModelMO)) + ): + raise AttributeError("Standard default action requires a standard action model.") return values elif pydantic_version == PYDANTIC_VERSION_2: @@ -164,32 +179,52 @@ def _get_valid_actions(self, forbidden_actions: Optional[Set[ActionId]]) -> Set[ return valid_actions - def _validate_update_params( - self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + #################################################################################################################### + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update( + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]] = None, + **kwargs, ): """ - Verify that the given list of action IDs is a subset of the currently defined actions and that - the rewards type matches the strategy type. + Update the multi-armed bandit model. - Parameters - ---------- - actions : List[ActionId] + actions: List[ActionId] The selected action for each sample. rewards: List[Union[BinaryReward, List[BinaryReward]]] The reward for each sample. + quantities: Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + context: Optional[ArrayLike] + The context for each sample. """ invalid = set(actions) - set(self.actions.keys()) if invalid: raise AttributeError(f"The following invalid action(s) were specified: {invalid}.") - if len(actions) != len(rewards): - raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") - - #################################################################################################################### + self._validate_params_lengths(actions=actions, rewards=rewards, quantities=quantities, **kwargs) + if quantities is None: + if not all(isinstance(self.actions[action], (Model, ModelMO)) for action in actions): + raise ValueError("Quantitative actions require defined quantities.") + else: + if not all( + q is not None for a, q in zip(actions, quantities) if isinstance(self.actions[a], QuantitativeModel) + ): + raise ValueError("Quantitative actions require defined quantities.") + if not all(q is None for a, q in zip(actions, quantities) if isinstance(self.actions[a], (Model, ModelMO))): + raise ValueError("Standard actions should not have defined quantities.") + self._update(actions, rewards, quantities, **kwargs) @abstractmethod - @validate_call - def update( - self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]], *args, **kwargs + @validate_call(config=dict(arbitrary_types_allowed=True)) + def _update( + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + **kwargs, ): """ Update the multi-armed bandit model. @@ -198,11 +233,92 @@ def update( The selected action for each sample. rewards: List[Union[BinaryReward, List[BinaryReward]]] The reward for each sample. + quantities: Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. """ + @staticmethod + def _transform_nested_list(lst: List[List[Dict]]): + return [{k: v for d in single_action_dicts for k, v in d.items()} for single_action_dicts in zip(*lst)] + + @staticmethod + def _is_so_standard_action(value: Any) -> bool: + # Probability ProbabilityWeight + return isinstance(value, float) or (isinstance(value, tuple) and isinstance(value[0], float)) + + @staticmethod + def _is_so_quantitative_action(value: Any) -> bool: + return isinstance(value, tuple) and isinstance(value[0], tuple) + + @classmethod + def _is_standard_action(cls, value: Any) -> bool: + return cls._is_so_standard_action(value) or (isinstance(value, list) and cls._is_so_standard_action(value[0])) + + @classmethod + def _is_quantitative_action(cls, value: Any) -> bool: + return cls._is_so_quantitative_action(value) or ( + isinstance(value, list) and cls._is_so_quantitative_action(value[0]) + ) + + def _get_action_probabilities( + self, forbidden_actions: Optional[Set[ActionId]] = None, **kwargs + ) -> Union[ + List[Dict[UnifiedActionId, Probability]], + List[Dict[UnifiedActionId, ProbabilityWeight]], + List[Dict[UnifiedActionId, MOProbability]], + List[Dict[UnifiedActionId, MOProbabilityWeight]], + ]: + """ + Get the probability of getting a positive reward for each action. + + Parameters + ---------- + forbidden_actions : Optional[Set[ActionId]], default=None + Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only + consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. + Note that: actions = allowed_actions U forbidden_actions. + + Returns + ------- + action_probabilities: Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, ProbabilityWeight]], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, MOProbabilityWeight]]] + The probability of getting a positive reward for each action. + """ + + valid_actions = self._get_valid_actions(forbidden_actions) + action_probabilities = { + action: model.sample_proba(**kwargs) for action, model in self.actions.items() if action in valid_actions + } + # Handle standard actions for which the value is a (probability, weight) tuple + list_transformations = [ + [{key: proba} for proba in value] + for key, value in action_probabilities.items() + if self._is_standard_action(value[0]) + ] + list_transformations = self._transform_nested_list(list_transformations) + # Handle quantitative actions, for which the value is a tuple of + # tuples of (quantity, (probability, weight) or probability) + tuple_transformations = [ + [{(key, quantity): proba for quantity, proba in sample} for sample in value] + for key, value in action_probabilities.items() + if self._is_quantitative_action(value[0]) + ] + tuple_transformations = self._transform_nested_list(tuple_transformations) + if not list_transformations and not tuple_transformations: + return [] + if not list_transformations: # No standard actions + list_transformations = [dict() for _ in range(len(tuple_transformations))] + if not tuple_transformations: # No quantitative actions + tuple_transformations = [dict() for _ in range(len(list_transformations))] + if not len(list_transformations) == len(tuple_transformations): + raise ValueError("The number of standard and quantitative actions should be the same.") + action_probabilities = [ + {**list_dict, **dict_dict} for list_dict, dict_dict in zip(list_transformations, tuple_transformations) + ] + return action_probabilities + @abstractmethod @validate_call - def predict(self, forbidden_actions: Optional[Set[ActionId]] = None) -> Predictions: + def predict(self, forbidden_actions: Optional[Set[ActionId]] = None, **kwargs) -> Predictions: """ Predict actions. @@ -241,7 +357,7 @@ def get_state(self) -> (str, dict): def _select_epsilon_greedy_action( self, p: ActionRewardLikelihood, - actions: Optional[Dict[ActionId, Model]] = None, + actions: Optional[Dict[ActionId, BaseModel]] = None, ) -> ActionId: """ Wraps self.strategy.select_action function with epsilon-greedy strategy, @@ -275,7 +391,15 @@ def _select_epsilon_greedy_action( if self.default_action and self.default_action not in p.keys(): raise KeyError(f"Default action {self.default_action} not in actions.") if np.random.binomial(1, self.epsilon): - selected_action = self.default_action or np.random.choice(list(p.keys())) + if self.default_action: + selected_action = self.default_action + else: + selected_action = np.random.choice(list(set(a[0] if isinstance(a, tuple) else a for a in p.keys()))) + if isinstance(self.actions[selected_action], QuantitativeModel): + selected_action = ( + selected_action, + tuple(np.random.random(self.actions[selected_action].dimension)), + ) else: selected_action = self.strategy.select_action(p=p, actions=actions) else: @@ -283,7 +407,7 @@ def _select_epsilon_greedy_action( return selected_action @classmethod - def from_state(cls, state: dict) -> "BaseMab": + def from_state(cls, state: Dict[str, Serializable]) -> "BaseMab": """ Create a new instance of the class from a given model state. The state can be obtained by applying get_state() to a model. @@ -299,16 +423,13 @@ def from_state(cls, state: dict) -> "BaseMab": The new model instance. """ - model_attributes = extract_argument_names_from_function(cls.__init__, True) - strategy_attributes = list(state["strategy"].keys()) - attributes_mapping = {k: state[k] for k in model_attributes if k not in strategy_attributes and k in state} - attributes_mapping.update({k: state["strategy"][k] for k in strategy_attributes}) - return cls(**attributes_mapping) + return cls.model_validate(state) @classmethod def cold_start( cls, action_ids: Optional[Set[ActionId]] = None, + quantitative_action_ids: Optional[Set[ActionId]] = None, epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, **kwargs, @@ -319,14 +440,16 @@ def cold_start( Parameters ---------- - action_ids: Optional[Set[ActionId]] + action_ids : Optional[Set[ActionId]] The list of possible actions. - epsilon: Optional[Float01] + quantitative_action_ids : Optional[Set[ActionId]] + The list of quantitative actions. + epsilon : Optional[Float01] epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] + default_action : Optional[ActionId] The default action to select with a probability of epsilon when using the epsilon-greedy approach. If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - kwargs: Dict[str, Any] + kwargs : Dict[str, Any] Additional parameters for the mab and for the action model. Returns @@ -334,35 +457,43 @@ def cold_start( mab: BaseMab Multi-Armed Bandit """ - action_specific_kwargs, kwargs = cls._extract_action_specific_kwargs(**kwargs) + action_specific_kwargs, quantitative_action_specific_kwargs, kwargs = cls._extract_action_specific_kwargs( + **kwargs + ) # Extract inner_action_ids - inner_action_ids = action_ids or set(action_specific_kwargs.keys()) - if not inner_action_ids: - raise ValueError( - "inner_action_ids should be provided either directly or via keyword argument in the form of " - "action_id_{model argument name} = {action_id: value}." - ) + inner_action_ids = action_ids or set(action_specific_kwargs) + inner_quantitative_action_ids = quantitative_action_ids or set(quantitative_action_specific_kwargs) + if not inner_action_ids and not inner_quantitative_action_ids: + raise ValueError("At least one action should be defined.") # Assign model for each action - action_model_cold_start, action_general_kwargs = cls._extract_action_model_class_and_attributes(**kwargs) - actions = {} - for a in inner_action_ids: - actions[a] = action_model_cold_start(**action_general_kwargs, **action_specific_kwargs.get(a, {})) + ( + model_cold_start, + quantitative_model_cold_start, + action_general_kwargs, + quantitative_action_general_kwargs, + ) = cls._extract_action_model_class_and_attributes(kwargs) + + # Instantiate the actions + all_actions = {} + for action_ids, cold_start, general_kwargs, specific_kwargs in zip( + [inner_action_ids, inner_quantitative_action_ids], + [model_cold_start, quantitative_model_cold_start], + [action_general_kwargs, quantitative_action_general_kwargs], + [action_specific_kwargs, quantitative_action_specific_kwargs], + ): + for a in action_ids: + all_actions[a] = cold_start(**general_kwargs, **specific_kwargs.get(a, {})) # Instantiate the MAB - strategy_kwargs = {k: kwargs[k] for k in kwargs.keys() if k not in action_general_kwargs.keys()} strategy_class = cls.model_fields["strategy"].annotation - strategy = strategy_class(**strategy_kwargs) - mab = cls(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - # For contextual multi-armed bandit, until the very first update the model will predict actions randomly, - # where each action has equal probability to be selected. - if hasattr(mab, "predict_actions_randomly"): - mab.predict_actions_randomly = True + strategy = strategy_class(**kwargs) + mab = cls(actions=all_actions, strategy=strategy, epsilon=epsilon, default_action=default_action) return mab @staticmethod - def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str, Any]]: + def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str, Dict], Dict[str, Any]]: """ Utility function to extract kwargs that are specific for each action when constructing the action model. @@ -375,21 +506,30 @@ def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str ------- action_specific_kwargs : Dict[str, Dict] Dictionary of actions and the parameters of their associated model. + quantitative_action_specific_kwargs : Dict[str, Dict] + Dictionary of quantitative actions and the parameters of their associated model. kwargs : Dict[str, Any] - Dictionary of parameters and their values, without the action_specific_kwargs. + Dictionary of parameters and their quantities, without the action_specific_kwargs. """ action_specific_kwargs = defaultdict(dict) + quantitative_action_specific_kwargs = defaultdict(dict) for keyword in list(kwargs): argument = kwargs[keyword] - if keyword.startswith(ACTION_IDS_PREFIX) and type(argument) is dict: - kwargs.pop(keyword) - inner_keyword = keyword.split(ACTION_IDS_PREFIX)[1] - for action_id, value in argument.items(): - action_specific_kwargs[action_id][inner_keyword] = value - return dict(action_specific_kwargs), kwargs + for prefix, target_kwargs in zip( + [ACTION_IDS_PREFIX, QUANTITATIVE_ACTION_IDS_PREFIX], + [action_specific_kwargs, quantitative_action_specific_kwargs], + ): + if keyword.startswith(prefix) and type(argument) is dict: + kwargs.pop(keyword) + inner_keyword = keyword.split(prefix)[1] + for action_id, value in argument.items(): + target_kwargs[action_id][inner_keyword] = value + return dict(action_specific_kwargs), dict(quantitative_action_specific_kwargs), kwargs @classmethod - def _extract_action_model_class_and_attributes(cls, **kwargs) -> Tuple[Callable, Dict[str, Dict]]: + def _extract_action_model_class_and_attributes( + cls, kwargs + ) -> Tuple[Callable, Callable, Dict[str, Dict], Dict[str, Dict]]: """ Utility function to extract kwargs that are specific for each action when constructing the action model. @@ -402,17 +542,44 @@ def _extract_action_model_class_and_attributes(cls, **kwargs) -> Tuple[Callable, ------- action_model_cold_start : Callable Function handle for factoring the required action model. + quantitative_action_model_cold_start : Callable + Function handle for factoring the required quantitative action model. action_general_kwargs : Dict[str, any] Dictionary of parameters and their values for the action model. + quantitative_action_general_kwargs : Dict[str, any] + Dictionary of parameters and their values for the quantitative action model. """ - action_model_class = get_args(cls.model_fields["actions"].annotation)[1] - if hasattr(action_model_class, "cold_start"): - action_model_cold_start_init = action_model_cold_start = action_model_class.cold_start - else: - action_model_cold_start_init = action_model_class.__init__ - action_model_cold_start = action_model_class - - action_model_attributes = extract_argument_names_from_function(action_model_cold_start_init, True) - - action_general_kwargs = {k: kwargs[k] for k in action_model_attributes if k in kwargs.keys()} - return action_model_cold_start, action_general_kwargs + action_model_type = get_args(cls.model_fields["actions"].annotation)[1] + action_model_classes = ( + get_args(action_model_type) if get_origin(action_model_type) is Union else (action_model_type,) + ) + if len(action_model_classes) > 2: + raise ValueError("Only up to two types of action models are supported.") + quantitative_model_cold_start = model_cold_start = lambda **kwargs: None # dummy callable + action_general_kwargs = quantitative_action_general_kwargs = None + for action_model_class in action_model_classes: + if hasattr(action_model_class, "cold_start"): + action_model_cold_start = action_model_class.cold_start + action_model_attributes = extract_argument_names(action_model_cold_start) + # cover for cold_start kwargs + action_model_attributes = action_model_attributes + extract_argument_names(action_model_class) + else: + action_model_cold_start = action_model_class + action_model_attributes = extract_argument_names(action_model_cold_start) + general_kwargs = {k: kwargs.pop(k) for k in action_model_attributes if k in kwargs.keys()} + + if issubclass(action_model_class, (Model, ModelMO)): + model_cold_start = action_model_cold_start + action_general_kwargs = general_kwargs + elif issubclass(action_model_class, QuantitativeModel): + quantitative_model_cold_start = action_model_cold_start + quantitative_action_general_kwargs = general_kwargs + else: + raise ValueError(f"Unsupported action model class: {action_model_class}") + + return ( + model_cold_start, + quantitative_model_cold_start, + action_general_kwargs, + quantitative_action_general_kwargs, + ) diff --git a/pybandits/model.py b/pybandits/model.py index 2993645..bbfd81c 100644 --- a/pybandits/model.py +++ b/pybandits/model.py @@ -22,7 +22,7 @@ import warnings from abc import ABC, abstractmethod from random import betavariate -from typing import Any, List, Literal, Optional, Tuple, Union +from typing import List, Literal, Optional, Union import numpy as np import pymc.math as pmath @@ -33,8 +33,10 @@ from pymc import StudentT as PymcStudentT from pytensor.tensor import TensorVariable, dot from scipy.stats import t +from typing_extensions import Self -from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel +from pybandits.base import BinaryReward, MOProbability, Probability, ProbabilityWeight, PyBanditsBaseModel +from pybandits.base_model import BaseModelCC, BaseModelMO, BaseModelSO from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, PYDANTIC_VERSION_2, @@ -47,26 +49,95 @@ validate_call, ) -UpdateMethods = Literal["MCMC", "VI"] +UpdateMethods = Literal["VI", "MCMC"] -class Model(PyBanditsBaseModel, ABC): +class Model(BaseModelSO, ABC): """ - Class to model the prior distributions. + Class to model the prior distributions for single objective. + + Parameters + ---------- + n_successes: PositiveInt = 1 + Counter of the number of successes. + n_failures: PositiveInt = 1 + Counter of the number of failures. """ + n_successes: PositiveInt = 1 + n_failures: PositiveInt = 1 + @abstractmethod - def sample_proba(self) -> Probability: + def sample_proba(self, **kwargs) -> Union[List[Probability], List[MOProbability], List[ProbabilityWeight]]: """ Sample the probability of getting a positive reward. """ + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update(self, rewards: List[BinaryReward], **kwargs): + """ + Update n_successes and n_failures. + + Parameters + ---------- + rewards: List[BinaryReward] + A list of binary rewards. + """ + self._update(rewards=rewards, **kwargs) + self.n_successes += sum(rewards) + self.n_failures += len(rewards) - sum(rewards) + @abstractmethod - def update(self, rewards: List[Any]): + def _update(self, rewards: List[BinaryReward], **kwargs): """ Update the model parameters. + + Parameters + ---------- + rewards: List[BinaryReward] + A list of binary rewards. """ + @property + def count(self) -> int: + """ + The total amount of successes and failures collected. + """ + return self.n_successes + self.n_failures + + @property + def mean(self) -> float: + """ + The success rate i.e. n_successes / (n_successes + n_failures). + """ + return self.n_successes / self.count + + +class ModelCC(BaseModelCC, ABC): + """ + Class to model action cost. + + Parameters + ---------- + cost: NonNegativeFloat + Cost associated to the Beta distribution. + """ + + cost: NonNegativeFloat + + +class ModelMO(BaseModelMO, ABC): + """ + Class to model the prior distributions for multi-objective. + + Parameters + ---------- + models : List[Model] + The list of models for each objective. + """ + + models: List[Model] + class BaseBeta(Model): """ @@ -80,30 +151,13 @@ class BaseBeta(Model): Counter of the number of failures. """ - n_successes: PositiveInt = 1 - n_failures: PositiveInt = 1 - @model_validator(mode="before") @classmethod - def both_or_neither_counters_are_defined(cls, values): + def both_or_neither_models_are_defined(cls, values): if hasattr(values, "n_successes") != hasattr(values, "n_failures"): raise ValueError("Either both or neither n_successes and n_failures should be specified.") return values - @property - def count(self) -> int: - """ - The total amount of successes and failures collected. - """ - return self.n_successes + self.n_failures - - @property - def mean(self) -> float: - """ - The success rate i.e. n_successes / (n_successes + n_failures). - """ - return self.n_successes / self.count - @property def std(self) -> float: """ @@ -112,72 +166,90 @@ def std(self) -> float: return sqrt((self.n_successes * self.n_failures) / (self.count * (self.count - 1))) @validate_call - def update(self, rewards: List[BinaryReward]): + def _update(self, rewards: List[BinaryReward]): """ - Update n_successes and and n_failures. + Update n_successes and n_failures. Parameters ---------- rewards: List[BinaryReward] A list of binary rewards. """ - self.n_successes += sum(rewards) - self.n_failures += len(rewards) - sum(rewards) + pass - def sample_proba(self) -> Probability: + def sample_proba(self, n_samples: PositiveInt) -> List[Probability]: """ Sample the probability of getting a positive reward. + Parameters + ---------- + n_samples : PositiveInt + Number of samples to draw. + Returns ------- - prob: Probability - Probability of getting a positive reward. + prob: List[Probability] + Probability of getting a positive reward for each sample. """ - return betavariate(self.n_successes, self.n_failures) # type: ignore + return [betavariate(self.n_successes, self.n_failures) for _ in range(n_samples)] class Beta(BaseBeta): """ Beta Distribution model for Bernoulli multi-armed bandits. + + Parameters + ---------- + n_successes: PositiveInt = 1 + Counter of the number of successes. + n_failures: PositiveInt = 1 + Counter of the number of failures. """ -class BetaCC(BaseBeta): +class BetaCC(BaseBeta, ModelCC): """ Beta Distribution model for Bernoulli multi-armed bandits with cost control. Parameters ---------- - cost: NonNegativeFloat + n_successes : PositiveInt = 1 + Counter of the number of successes. + n_failures : PositiveInt = 1 + Counter of the number of failures. + cost : NonNegativeFloat Cost associated to the Beta distribution. """ - cost: NonNegativeFloat - -class BetaMO(Model): +class BetaMO(ModelMO): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives. Parameters ---------- - counters: List[Beta] of shape (n_objectives,) + models: List[Beta] of length (n_objectives,) List of Beta distributions. """ - counters: List[Beta] + models: List[Beta] @validate_call - def sample_proba(self) -> List[Probability]: + def sample_proba(self, n_samples: PositiveInt) -> List[MOProbability]: """ Sample the probability of getting a positive reward. + Parameters + ---------- + n_samples : PositiveInt + Number of samples to draw. + Returns ------- - prob: List[Probability] - Probabilities of getting a positive reward for each objective. + prob: List[MOProbability] + Probabilities of getting a positive reward for each sample and objective. """ - return [x.sample_proba() for x in self.counters] + return [list(p) for p in zip(*[model.sample_proba(n_samples=n_samples) for model in self.models])] @validate_call def update(self, rewards: List[List[BinaryReward]]): @@ -191,11 +263,11 @@ def update(self, rewards: List[List[BinaryReward]]): associated to each objective. For example, `[[1, 1], [1, 0], [1, 1], [1, 0], [1, 1]]`. """ - if any(len(x) != len(self.counters) for x in rewards): + if any(len(x) != len(self.models) for x in rewards): raise AttributeError("The shape of rewards is incorrect") - for i, counter in enumerate(self.counters): - counter.update([r[i] for r in rewards]) + for i, model in enumerate(self.models): + model.update([r[i] for r in rewards]) @classmethod def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": @@ -222,25 +294,21 @@ def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": blr: BayesianLogisticRegrssion The Bayesian Logistic Regression model. """ - counters = n_objectives * [Beta()] - blr = cls(counters=counters, **kwargs) + models = n_objectives * [Beta()] + blr = cls(models=models, **kwargs) return blr -class BetaMOCC(BetaMO): +class BetaMOCC(BetaMO, ModelCC): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives and cost control. Parameters ---------- - counters: List[BetaCC] of shape (n_objectives,) + models: List[BetaCC] of shape (n_objectives,) List of Beta distributions. - cost: NonNegativeFloat - Cost associated to the Beta distribution. """ - cost: NonNegativeFloat - class StudentT(PyBanditsBaseModel): """ @@ -261,7 +329,7 @@ class StudentT(PyBanditsBaseModel): nu: confloat(allow_inf_nan=False) = 5.0 -class BayesianLogisticRegression(Model): +class BaseBayesianLogisticRegression(Model): """ Base Bayesian Logistic Regression model. @@ -281,7 +349,7 @@ class BayesianLogisticRegression(Model): The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the full list. - update_kwargs : Optional[dict], uses default values if not specified + update_kwargs : Optional[dict], uses default quantities if not specified Additional arguments to pass to the update method. """ @@ -352,12 +420,12 @@ def _stable_sigmoid(cls, x: Union[np.ndarray, TensorVariable]) -> Union[np.ndarr Parameters ---------- x : Union[np.ndarray, TensorVariable] - Input values. + Input quantities. Returns ------- prob : Union[np.ndarray, TensorVariable] - Sigmoid function applied to the input values. + Sigmoid function applied to the input quantities. """ backend = np if isinstance(x, np.ndarray) else pmath with warnings.catch_warnings(): @@ -388,7 +456,7 @@ def check_context_matrix(self, context: ArrayLike): raise AttributeError(f"Shape mismatch: context must have {len(self.betas)} columns.") @validate_call(config=dict(arbitrary_types_allowed=True)) - def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: + def sample_proba(self, context: ArrayLike) -> List[ProbabilityWeight]: """ Compute the probability of getting a positive reward from the sampled regression coefficients and the context. @@ -402,7 +470,7 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: prob: ndarray of shape (n_samples) Probability of getting a positive reward. weighted_sum: ndarray of shape (n_samples) - Weighted sums between contextual feature values and sampled coefficients. + Weighted sums between contextual feature quantities and sampled coefficients. """ # check input args @@ -411,7 +479,7 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: # extend context with a column of 1 to handle the dot product with the intercept context_ext = c_[ones((len(context), 1)), context] - # sample alpha and beta coefficient values from student-t distributions once for each sample + # sample alpha and beta coefficient quantities from student-t distributions once for each sample alpha = t.rvs(df=self.alpha.nu, loc=self.alpha.mu, scale=self.alpha.sigma, size=len(context_ext)) betas = array( [ @@ -429,25 +497,25 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: # compute the probability with the sigmoid function prob = self._stable_sigmoid(weighted_sum) - return prob, weighted_sum + return list(zip(prob, weighted_sum)) @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, rewards: List[BinaryReward]): + def _update(self, rewards: List[BinaryReward], context: ArrayLike): """ Update the model parameters. Parameters ---------- - context : ArrayLike - Context matrix of shape (n_samples, n_features). rewards: List[BinaryReward] A list of binary rewards. + context : ArrayLike + Context matrix of shape (n_samples, n_features). """ # check input args self.check_context_matrix(context=context) if len(context) != len(rewards): - AttributeError("Shape mismatch: context and rewards must have the same length.") + raise ValueError("Shape mismatch: context and rewards must have the same length.") with PymcModel() as _: # update intercept (alpha) and coefficients (betas) @@ -468,7 +536,6 @@ def update(self, context: ArrayLike, rewards: List[BinaryReward]): # Bernoulli random vector with probability of success given by sigmoid function and actual data as observed _ = Bernoulli("likelihood", p=p, observed=rewards) - # update traces object by sampling from posterior distribution if self.update_method == "VI": # variational inference @@ -482,22 +549,26 @@ def update(self, context: ArrayLike, rewards: List[BinaryReward]): raise ValueError("Invalid update method.") # compute mean and std of the coefficients distributions - self.alpha.mu = mean(trace["alpha"]) - self.alpha.sigma = std(trace["alpha"], ddof=1) - betas_mu = mean(trace["betas"], axis=0) - betas_std = std(trace["betas"], axis=0, ddof=1) - self.betas = [ - StudentT(mu=mu, sigma=sigma, nu=beta.nu) for mu, sigma, beta in zip(betas_mu, betas_std, self.betas) - ] + if hasattr(trace, "alpha") and hasattr(trace, "betas"): + self.alpha.mu = mean(trace["alpha"]) + self.alpha.sigma = std(trace["alpha"], ddof=1) + betas_mu = mean(trace["betas"], axis=0) + betas_std = std(trace["betas"], axis=0, ddof=1) + self.betas = [ + StudentT(mu=mu, sigma=sigma, nu=beta.nu) for mu, sigma, beta in zip(betas_mu, betas_std, self.betas) + ] + else: + warnings.warn("Trace object missing vital keys. Model was not updated.") @classmethod + @validate_call def cold_start( cls, n_features: PositiveInt, update_method: UpdateMethods = "MCMC", update_kwargs: Optional[dict] = None, **kwargs, - ) -> "BayesianLogisticRegression": + ) -> Self: """ Utility function to create a Bayesian Logistic Regression model or child model with cost control, with default parameters. @@ -517,14 +588,14 @@ def cold_start( The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the full list. - update_kwargs : Optional[dict], uses default values if not specified + update_kwargs : Optional[dict], uses default quantities if not specified Additional arguments to pass to the update method. kwargs: Dict[str, Any] Additional arguments for the Bayesian Logistic Regression child model. Returns ------- - blr: BayesianLogisticRegrssion + blr: BayesianLogisticRegression The Bayesian Logistic Regression model. """ return cls( @@ -536,7 +607,32 @@ def cold_start( ) -class BayesianLogisticRegressionCC(BayesianLogisticRegression): +class BayesianLogisticRegression(BaseBayesianLogisticRegression): + """ + Base Bayesian Logistic Regression model. + + It is modeled as: + + y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) + + where the alpha and betas coefficients are Student's t-distributions. + + Parameters + ---------- + alpha : StudentT + Student's t-distribution of the alpha coefficient. + betas : StudentT + Student's t-distributions of the betas coefficients. + update_method : UpdateMethods, defaults to "MCMC" + The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov + chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the + full list. + update_kwargs : Optional[dict], uses default quantities if not specified + Additional arguments to pass to the update method. + """ + + +class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression, ModelCC): """ Bayesian Logistic Regression model with cost control. @@ -556,10 +652,8 @@ class BayesianLogisticRegressionCC(BayesianLogisticRegression): The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the full list. - update_kwargs : Optional[dict], uses default values if not specified + update_kwargs : Optional[dict], uses default quantities if not specified Additional arguments to pass to the update method. cost: NonNegativeFloat Cost associated to the Bayesian Logistic Regression model. """ - - cost: NonNegativeFloat diff --git a/pybandits/quantitative_model.py b/pybandits/quantitative_model.py new file mode 100644 index 0000000..cdd0e16 --- /dev/null +++ b/pybandits/quantitative_model.py @@ -0,0 +1,679 @@ +from abc import ABC, abstractmethod +from itertools import product +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import numpy as np +from numpy.typing import ArrayLike +from scipy.spatial.distance import jensenshannon +from scipy.stats import beta +from typing_extensions import Self + +from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel, QuantitativeProbability +from pybandits.base_model import BaseModel, BaseModelCC +from pybandits.model import BayesianLogisticRegression, Beta, Model +from pybandits.pydantic_version_compatibility import ( + NonNegativeFloat, + PositiveInt, + PrivateAttr, + field_validator, + validate_call, +) + + +class QuantitativeModel(BaseModel, ABC): + """ + Base class for quantitative models. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + """ + + dimension: PositiveInt + + @abstractmethod + def sample_proba(self) -> float: + """ + Sample the model. + """ + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update( + self, + quantities: List[Union[float, List[float]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + **kwargs, + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + The reward for each sample. + context : Optional[ArrayLike] + Context for each sample. + """ + + self._validate_params_lengths(quantities=quantities, rewards=rewards, **kwargs) + if quantities: + self._update(quantities, rewards, **kwargs) + + @abstractmethod + def _update( + self, + quantities: Optional[List[Union[float, List[float], None]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + **kwargs, + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + The reward for each sample. + """ + + +class QuantitativeModelCC(BaseModelCC, ABC): + """ + Class to model quantitative action cost. + + Parameters + ---------- + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + Cost associated to the Beta distribution. + """ + + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + + +class Segment(PyBanditsBaseModel): + """ + Segment class. + + Parameters + ---------- + intervals: Tuple[Tuple[Probability, Probability], ...] + Intervals of the segment. + """ + + intervals: Tuple[Tuple[Probability, Probability], ...] + + @property + def mins(self) -> np.ndarray: + return self.intervals_array[:, 0] + + @property + def maxs(self) -> np.ndarray: + return self.intervals_array[:, 1] + + @property + def intervals_array(self) -> np.ndarray: + array_form = np.array(self.intervals) + if array_form.size == 0: + return np.array([[], []]).T + return np.array(self.intervals) + + @field_validator("intervals", mode="before") + @classmethod + def segment_intervals_to_tuple(cls, value): + if isinstance(value, np.ndarray): + if value.shape[1] != 2: + raise ValueError("Intervals must have shape (n, 2).") + return tuple(tuple(v) for v in value) + return value + + def split(self) -> Tuple["Segment", "Segment"]: + middles = (self.mins + self.maxs) / 2 + left_intervals = np.concatenate(np.atleast_2d(self.mins, middles), axis=1) + right_intervals = np.concatenate(np.atleast_2d(middles, self.maxs), axis=1) + return Segment(intervals=left_intervals), Segment(intervals=right_intervals) + + def __add__(self, other: "Segment") -> "Segment": + """ + Add two adjacent segments. + + Parameters + ---------- + other : Segment + Segment to add. + + Returns + ------- + Segment + The merged segment. + The merged segment. + """ + if not self.is_adjacent(other): + raise ValueError("Segments must be adjacent.") + to_concatenate = (self.mins, other.maxs) if self.maxs == other.mins else (other.mins, self.maxs) + new_intervals = np.concatenate(np.atleast_2d(*to_concatenate), axis=1) + return Segment(intervals=new_intervals) + + def __hash__(self) -> int: + return tuple(self.intervals_array.flatten()).__hash__() + + def __contains__(self, value: Union[float, np.ndarray]) -> bool: + """ + Check if a value is contained in segment. + + Parameters + ---------- + value : Union[float, np.ndarray] + Value to check. + + Returns + ------- + bool + Whether the value is contained in the segment. + """ + if (isinstance(value, np.ndarray) and value.shape != self.intervals_array.shape[1]) or ( + isinstance(value, float) and len(self.intervals_array) != 1 + ): + raise ValueError("Tested value must have the same shape as the intervals.") + return bool( + np.all( + np.logical_and( + (self.mins <= value), + np.logical_or((value < self.maxs), np.logical_and(value == self.maxs, self.maxs == 1)), + ) + ) + ) + + def __eq__(self, other) -> bool: + return np.all(self.intervals_array == other.intervals_array) + + def is_adjacent(self, other: "Segment") -> bool: + """ + Check if two segments are adjacent. + + Parameters + ---------- + other : Segment + Segment to check. + + Returns + ------- + bool + Whether the segments are adjacent. + """ + if self.intervals_array.shape[0] != other.intervals_array.shape[0]: + raise ValueError("Segments must have the same shape.") + return np.all(self.maxs == other.mins) or np.all(self.mins == other.maxs) + + +class ZoomingModel(QuantitativeModel, ABC): + """ + Base class for zooming models. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Segment, Optional[Model]] + Mapping of segments to models. + """ + + dimension: PositiveInt + comparison_threshold: Probability = 0.1 + n_comparison_points: PositiveInt = 1000 + n_max_segments: Optional[PositiveInt] = 32 + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Model]] + _base_model: Model = PrivateAttr() + + def _validate_segments(self): + if self.n_max_segments is not None and len(self.sub_actions) > self.n_max_segments: + raise ValueError("Number of segments must be less than the maximum number of segments.") + dimensions = {len(segment) for segment in self.sub_actions.keys()} + if dimensions != {self.dimension}: + raise ValueError(f"All segments must have the same dimension {self.dimension}.") + + def model_post_init(self, __context: Any) -> None: + self._validate_segments() + self._init_base_model() + segment_models_types = set(type(model) if model is not None else None for model in self.sub_actions.values()) + if None in segment_models_types: + if len(segment_models_types) > 1: + raise ValueError("All segments must either have a model or miss a model.") + self.sub_actions = dict(zip(self.sub_actions, [self._base_model.model_copy()] * len(self.sub_actions))) + + @property + def segmented_actions(self) -> Dict[Segment, Optional[Model]]: + return {Segment(intervals=segment): model for segment, model in self.sub_actions.items()} + + @abstractmethod + def _init_base_model(self): + """ + Initialize the base model. + """ + + @classmethod + @validate_call + def cold_start( + cls, + dimension: PositiveInt = 1, + n_1d_segments: PositiveInt = 2, + comparison_threshold: Probability = 0.1, + n_comparison_points: PositiveInt = 1000, + n_max_segments: Optional[PositiveInt] = 32, + **kwargs, + ) -> Self: + """ + Create a cold start model. + + Returns + ------- + ZoomingModel + Cold start model. + """ + interval_points = np.linspace(0, 1, n_1d_segments + 1) + intervals = [(interval_points[i], interval_points[i + 1]) for i in range(n_1d_segments)] + sub_actions = {tuple(segment): None for segment in product(intervals, repeat=dimension)} + return cls( + dimension=dimension, + comparison_threshold=comparison_threshold, + n_comparison_points=n_comparison_points, + n_max_segments=n_max_segments, + sub_actions=sub_actions, + **kwargs, + ) + + def sample_proba(self, **kwargs) -> List[QuantitativeProbability]: + """ + Sample an action value from each of the intervals. + """ + result = [] + for segment, model in self.segmented_actions.items(): + sampled_proba = model.sample_proba(**kwargs) + random_point = np.random.random((len(sampled_proba), len(segment.intervals))) + scaled_quantity = segment.mins.T + random_point * (segment.maxs.T - segment.mins.T) + + result.append(tuple((tuple(quantity), prob) for quantity, prob in zip(scaled_quantity, sampled_proba))) + result = list(zip(*result)) + return result + + def _update(self, quantities: List[Union[float, np.ndarray]], rewards: List[BinaryReward], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + quantities : List[Union[float, np.ndarray]] + The value associated with each action. + rewards: List[BinaryReward] + The reward for each sample. + context : Optional[ArrayLike] + Context for each sample. + """ + + segments = self._map_and_update_segment_models(quantities, rewards, **kwargs) + self._update_segmentation(quantities, segments, rewards, **kwargs) + + def _map_and_update_segment_models( + self, quantities: List[Union[float, np.ndarray]], rewards: List[BinaryReward], **kwargs + ) -> List[Segment]: + """ + Map and update the segment models. + + Parameters + ---------- + quantities : List[Union[float, np.ndarray]] + The value associated with each action. + rewards: List[BinaryReward] + The reward for each sample. + + Returns + ------- + List[Segment] + Segments to update. + """ + segments = self._map_values_to_segments(quantities) + self._inner_update(segments, rewards, **kwargs) + return segments + + @abstractmethod + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], **kwargs): + """ + Update the segments models. + + Parameters + ---------- + segments : List[Segment] + Segments to update. + rewards : List[BinaryReward] + Rewards for update. + context : Optional[ArrayLike] + Context for update. + """ + + def _map_values_to_segments(self, quantities: List[Union[float, np.ndarray]]) -> List[Segment]: + segments = [segment for value in quantities for segment in self.segmented_actions.keys() if value in segment] + return segments + + def _update_segmentation( + self, + quantities: List[Union[float, np.ndarray]], + segments: List[Segment], + rewards: List[BinaryReward], + **kwargs, + ): + segment_scores = {segment: model.mean for segment, model in self.segmented_actions.items()} + ordered_segments = sorted(segment_scores, key=segment_scores.get) + best_segment = ordered_segments[-1] + del self.sub_actions[best_segment.intervals] + + # Consider merging adjacent segments + worst_segments = ordered_segments[:-1] + i = 0 + while i < len(worst_segments) - 1: + segment = worst_segments[i] + j = i + 1 + while j < len(worst_segments): + other_segment = worst_segments[j] + if segment.is_adjacent(other_segment) and self.is_similar_performance(segment, other_segment): + del self.sub_actions[segment.intervals] + del self.sub_actions[other_segment.intervals] + worst_segments.remove(segment) + worst_segments.remove(other_segment) + merged_segment = segment + other_segment + self.sub_actions[merged_segment.intervals] = self._base_model.model_copy() + filtered_quantities, filtered_rewards, filtered_kwargs = self._filter_by_segment( + merged_segment, quantities, segments, rewards, **kwargs + ) + self._map_and_update_segment_models(filtered_quantities, filtered_rewards, **filtered_kwargs) + break + j += 1 + i += 1 + + # Split best segment if possible + if self.n_max_segments is None or len(self.sub_actions) < self.n_max_segments: + sub_best_segments = best_segment.split() + self.sub_actions[sub_best_segments[0].intervals] = self._base_model.model_copy() + self.sub_actions[sub_best_segments[1].intervals] = self._base_model.model_copy() + filtered_quantities, filtered_rewards, filtered_kwargs = self._filter_by_segment( + best_segment, quantities, segments, rewards, **kwargs + ) + self._map_and_update_segment_models(filtered_quantities, filtered_rewards, **filtered_kwargs) + + def is_similar_performance(self, segment1: Segment, segment2: Segment) -> bool: + """ + Check if two segments have similar performance. + + Parameters + ---------- + segment1 : Segment + First segment. + segment2 : Segment + Second segment. + + Returns + ------- + bool + Whether the segments have similar performance. + """ + x = np.linspace(0, 1, self.n_comparison_points) + model1 = self.sub_actions[segment1.intervals] + model2 = self.sub_actions[segment2.intervals] + p1 = beta.pdf(x, model1.n_successes, model1.n_failures) + p2 = beta.pdf(x, model2.n_successes, model2.n_failures) + return jensenshannon(p1, p2) < self.comparison_threshold + + def _filter_by_segment( + self, + reference_segment: Segment, + quantities: List[Union[float, np.ndarray]], + segments: List[Segment], + rewards: List[BinaryReward], + **kwargs, + ) -> Tuple[List[Union[float, np.ndarray]], List[BinaryReward], Dict[str, Any]]: + """ + Filter and update the segments models. + + Parameters + ---------- + reference_segment : Segment + Reference segment to filter upon. + segments : List[Segment] + Segments to filter. + quantities : List[Union[float, np.ndarray]] + Values to filter. + rewards : List[BinaryReward] + Rewards to filter. + + Returns + ------- + filtered_values : List[Union[float, np.ndarray]] + Filtered quantities. + filtered_rewards : List[BinaryReward] + Filtered rewards. + filtered_kwargs : Dict[str, Any] + Filtered context. + """ + filtered_values_rewards_kwargs = [ + (value, reward, *[kwarg[i] for kwarg in kwargs.values()]) + for i, (value, reward, segment) in enumerate(zip(quantities, rewards, segments)) + if segment == reference_segment + ] + if filtered_values_rewards_kwargs: + filtered_values, filtered_rewards, *filtered_kwargs = zip(*filtered_values_rewards_kwargs) + filtered_kwargs = dict(zip(kwargs.keys(), filtered_kwargs)) + else: + filtered_values, filtered_rewards, filtered_kwargs = [], [], {k: [] for k in kwargs.keys()} + filtered_kwargs = { + k: np.array(v) if isinstance(kwargs[k], np.ndarray) else v for k, v in filtered_kwargs.items() + } + return filtered_values, filtered_rewards, filtered_kwargs + + +class BaseSmabZoomingModel(ZoomingModel, ABC): + """ + Zooming model for sMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Beta]] + Mapping of segments to Beta models. + """ + + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Beta]] + + def _init_base_model(self): + """ + Initialize the base model. + """ + self._base_model = Beta() + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update( + self, + quantities: Optional[List[Union[float, List[float], None]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + The reward for each sample. + context : Optional + Placeholder for context. + """ + super().update(quantities, rewards) + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward]): + """ + Update the segments models. + + Parameters + ---------- + segments : List[Segment] + Segments to update. + rewards : List[BinaryReward] + Rewards for update. + """ + segments = np.array(segments) + rewards = np.array(rewards) + for segment in set(segments): + rewards_of_segment = rewards[segments == segment].tolist() + self.sub_actions[segment.intervals].update(rewards=rewards_of_segment) + + +class SmabZoomingModel(BaseSmabZoomingModel): + """ + Zooming model for sMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Beta]] + Mapping of segments to Beta models. + """ + + +class SmabZoomingModelCC(BaseSmabZoomingModel, QuantitativeModelCC): + """ + Zooming model for sMAB with cost control. + + Parameters + ---------- + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + Cost associated to the Beta distribution. + """ + + +class BaseCmabZoomingModel(ZoomingModel, ABC): + """ + Zooming model for CMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[BayesianLogisticRegression]] + Mapping of segments to Bayesian Logistic Regression models. + base_model_cold_start_kwargs: Dict[str, Any] + Keyword arguments for the base model cold start. + """ + + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[BayesianLogisticRegression]] + base_model_cold_start_kwargs: Dict[str, Any] + + @field_validator("base_model_cold_start_kwargs", mode="before") + @classmethod + def validate_n_features(cls, value): + if "n_features" not in value: + raise KeyError("n_features must be in base_model_cold_start_kwargs.") + return value + + def _init_base_model(self): + """ + Initialize the base model. + """ + self._base_model = BayesianLogisticRegression.cold_start(**self.base_model_cold_start_kwargs) + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], context: ArrayLike): + """ + Update the segments models. + + Parameters + ---------- + segments : List[Segment] + Segments to update. + rewards : List[BinaryReward] + Rewards for update. + context : Optional[ArrayLike] + Context for update. + """ + segments = np.array(segments) + rewards = np.array(rewards) + context = np.array(context) + for segment in set(segments): + rewards_of_segment = rewards[segments == segment].tolist() + context_of_segment = context[segments == segment] + if rewards_of_segment: + self.sub_actions[segment.intervals].update(rewards=rewards_of_segment, context=context_of_segment) + + +class CmabZoomingModel(BaseCmabZoomingModel): + """ + Zooming model for CMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[BayesianLogisticRegression]] + Mapping of segments to Bayesian Logistic Regression models. + base_model_cold_start_kwargs: Dict[str, Any] + Keyword arguments for the base model cold start. + """ + + +class CmabZoomingModelCC(BaseCmabZoomingModel, QuantitativeModelCC): + """ + Zooming model for CMAB with cost control. + + Parameters + ---------- + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + base_model_cold_start_kwargs: Dict[str, Any] + Keyword arguments for the base model cold start. + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + Cost associated to the Beta distribution. + """ diff --git a/pybandits/simulator.py b/pybandits/simulator.py index c9f143a..8b7618c 100644 --- a/pybandits/simulator.py +++ b/pybandits/simulator.py @@ -23,10 +23,11 @@ import os.path import random from abc import ABC, abstractmethod -from functools import cached_property -from typing import Any, Dict, List, Optional, Tuple, Union +from functools import cached_property, lru_cache +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np +import optuna import pandas as pd from bokeh.core.enums import Palette from bokeh.layouts import layout @@ -34,8 +35,16 @@ from bokeh.palettes import Category10, Turbo256 from bokeh.plotting import figure from loguru import logger - -from pybandits.base import ActionId, BinaryReward, PyBanditsBaseModel +from scipy.interpolate import make_interp_spline + +from pybandits.base import ( + ActionId, + BinaryReward, + Probability, + PyBanditsBaseModel, + UnifiedActionId, +) +from pybandits.base_model import BaseModelSO from pybandits.mab import BaseMab from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, @@ -47,8 +56,16 @@ model_validator, pydantic_version, ) +from pybandits.quantitative_model import QuantitativeModel from pybandits.utils import in_jupyter_notebook, visualize_via_bokeh +# context quantity +DoubleParametricActionProbability = Callable[[np.ndarray, np.ndarray], Probability] +# one of: quantity or context +ParametricActionProbability = Callable[[np.ndarray], Probability] +ProbabilityValue = Union[Probability, ParametricActionProbability, DoubleParametricActionProbability] +ActionProbabilityGroundTruth = Dict[ActionId, ProbabilityValue] + class Simulator(PyBanditsBaseModel, ABC): """ @@ -64,12 +81,12 @@ class Simulator(PyBanditsBaseModel, ABC): MAB model. n_updates : PositiveInt, defaults to 10 The number of updates (i.e. batches of samples) in the simulation. - batch_size: PositiveInt, defaults to 100 + batch_size : PositiveInt, defaults to 100 The number of samples per batch. - probs_reward : Optional[pd.DataFrame], default=None + probs_reward : Optional[Union[ActionProbabilityGroundTruth, Dict[str, ActionProbabilityGroundTruth]]] The reward probability for the different actions. If None probabilities are set to 0.5. - The keys of the dict must match the mab actions_ids, and the values are float in the interval [0, 1]. - e.g. probs_reward=pd.DataFrame({"a1 A": [0.6], "a2 B": [0.5], "a3": [0.8]}). + The keys of the dict must match the mab actions_ids, and the quantities are float in the interval [0, 1]. + e.g. probs_reward={"a1 A": [0.6], "a2 B": [0.5], "a3": [0.8]}. Note that currently only single-objective reward is supported. save : bool, defaults to False Boolean flag to save the results. @@ -88,7 +105,7 @@ class Simulator(PyBanditsBaseModel, ABC): mab: BaseMab n_updates: PositiveInt = 10 batch_size: PositiveInt = 100 - probs_reward: Optional[pd.DataFrame] = None + probs_reward: Optional[Union[ActionProbabilityGroundTruth, Dict[str, ActionProbabilityGroundTruth]]] = None save: bool = False path: str = "" file_prefix: str = "" @@ -115,31 +132,27 @@ class Config: else: raise ValueError(f"Unsupported pydantic version: {pydantic_version}") - @field_validator("probs_reward", mode="before") @classmethod - def validate_probs_reward_values(cls, value): - if value is not None: - if not all(value.dtypes.apply(lambda x: x.kind == "f")): - raise ValueError("probs_reward values must be float.") - if not value.applymap(lambda x: 0 <= x <= 1).all().all(): - raise ValueError("probs_reward values must be in the interval [0, 1].") - return value + def _validate_probs_reward_dict( + cls, action_probability_ground_truth: ActionProbabilityGroundTruth, actions: Dict[ActionId, BaseModelSO] + ): + if set(action_probability_ground_truth.keys()) != set(actions.keys()): + raise ValueError("The keys of the action probability ground truth dictionary must match the actions.") + for action_id, probability in action_probability_ground_truth.items(): + is_quantitative_action = isinstance(actions[action_id], QuantitativeModel) + if not is_quantitative_action: + if not isinstance(probability, float): + raise ValueError("The probability must be a float.") + if not 0 <= probability <= 1: + raise ValueError("The probability must be in the interval [0, 1].") + else: + if not callable(probability): + raise ValueError("The probability must be a callable function.") @field_validator("file_prefix", mode="before") def maybe_alter_file_prefix(cls, value): return f"{value}_" if value else "" - @model_validator(mode="before") - @classmethod - def validate_probs_reward_columns(cls, values): - if "probs_reward" in values and values["probs_reward"] is not None: - mab_action_ids = list(values["mab"].actions.keys()) - if set(values["probs_reward"].columns) != set(mab_action_ids): - raise ValueError("probs_reward columns must match mab actions ids.") - if values["probs_reward"].shape[1] != len(mab_action_ids): - raise ValueError("probs_reward columns must be the same as the number of MAB actions.") - return values - @model_validator(mode="before") @classmethod def validate_visualize_without_save(cls, values): @@ -157,6 +170,64 @@ def model_post_init(self, __context: Any) -> None: np.random.default_rng(self.random_seed) self._initialize_results() + @classmethod + def _generate_prob_reward( + cls, + first_dimension: PositiveInt, + second_dimension: NonNegativeInt = 0, + n_points: PositiveInt = 10, + spline_degree: PositiveInt = 3, + ) -> Union[ParametricActionProbability, DoubleParametricActionProbability]: + """ + Generate a spline for the given dimensions. + + Parameters + ---------- + first_dimension : PositiveInt + The first dimension. + second_dimension : NonNegativeInt, defaults to 0 + The second dimension. + n_points : PositiveInt, defaults to 10 + The number of points to sample + spline_degree : PositiveInt, defaults to 3 + The degree of the spline. + + Returns + ------- + Union[ParametricActionProbability, DoubleParametricActionProbability] + The spline function. + """ + if spline_degree >= n_points: + raise ValueError(f"spline_degree ({spline_degree}) must be less than n_points ({n_points})") + + def sigmoid(s: np.ndarray) -> np.ndarray: + return np.where(s >= 0, 1 / (1 + np.exp(-s)), np.exp(s) / (1 + np.exp(s))).item() + + # Create the spline once + + splines = [ + make_interp_spline(np.linspace(0, 1, n_points), np.random.uniform(-1, 1, n_points), k=spline_degree) + for _ in range(first_dimension + second_dimension) + ] + weights = np.random.uniform(0, 1, first_dimension + second_dimension) + weights = weights / weights.sum() # Normalize to sum to 1 + + if second_dimension: + + def spline_function(input1: np.ndarray, input2: np.ndarray) -> Probability: + combined_input = np.concatenate((input1, input2)) + logit = (weights * np.array([spline(x) for spline, x in zip(splines, combined_input)])).sum() + return sigmoid(logit) + + return spline_function + else: + + def spline_function(input1: np.ndarray) -> Probability: + logit = (weights * np.array([spline(x) for spline, x in zip(splines, input1)])).sum() + return sigmoid(logit) + + return spline_function + @abstractmethod def _initialize_results(self): """ @@ -224,27 +295,82 @@ def _step( """ # select actions for batch #index predictions = self.mab.predict(**predict_kwargs) - actions = predictions[0] # location 0 is the actions for both SmabPredictions and CmabPredictions - rewards = self._draw_rewards(actions, metadata) + actions_quantities = predictions[0] # location 0 is the actions for both SmabPredictions and CmabPredictions + actions = [x[0] if isinstance(x, tuple) else x for x in actions_quantities] + quantities = [x[1] if isinstance(x, tuple) else None for x in actions_quantities] + if all(q is None for q in quantities): + quantities = None + rewards = self._draw_rewards(actions_quantities, metadata, update_kwargs) # write the selected actions for batch #i in the results matrix - batch_results = pd.DataFrame({"action": actions, "reward": rewards, "batch": batch_index, **metadata}) - batch_results = self._finalize_step(batch_results) + batch_results = pd.DataFrame( + {"action": actions, "reward": rewards, "quantities": quantities, "batch": batch_index, **metadata} + ) + batch_results = self._finalize_step(batch_results, update_kwargs) if not all(col in batch_results.columns for col in self._base_columns): raise ValueError(f"The batch results must contain the {self._base_columns} columns") self._results = pd.concat((self._results, batch_results), ignore_index=True) - self.mab.update(actions=actions, rewards=rewards, **update_kwargs) + self.mab.update(actions=actions, rewards=rewards, quantities=quantities, **update_kwargs) + + @staticmethod + @lru_cache + def _maximize_prob_reward( + prob_reward_func: Callable[[np.ndarray], Probability], input_dimension: PositiveInt, n_trials: PositiveInt = 100 + ) -> Probability: + """ + Maximize the probability of reward for the given function. + + Parameters + ---------- + prob_reward_func : Callable[[np.ndarray], Probability] + The probability of reward function. + input_dimension : PositiveInt + The input dimension. + n_trials : PositiveInt, defaults to 100 + The number of otimization trials. + + Returns + ------- + Probability + The global maxima of prob_reward_func. + """ + + def objective(trial): + # Sample points from [0,1] for each dimension + points = [trial.suggest_float(f"x{i}", 0, 1) for i in range(input_dimension)] + return prob_reward_func(np.array(points)) + + # Configure TPE sampler with multivariate optimization + sampler = optuna.samplers.TPESampler( + multivariate=True, # Enable multivariate optimization + group=True, # Sample joint distribution of parameters + constant_liar=True, # Better parallel optimization handling + ) + + # Create and configure the study + study = optuna.create_study(sampler=sampler, direction="maximize") + + # Run optimization + study.optimize(objective, n_jobs=-1, n_trials=n_trials) # Use all available cores + best_value = study.best_value + if (not isinstance(best_value, float)) or (best_value < 0) or (best_value > 1): + raise ValueError("The best value must be a float in the interval [0, 1].") + return best_value @abstractmethod - def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + def _draw_rewards( + self, actions: List[UnifiedActionId], metadata: Dict[str, List], update_kwargs: Dict[str, np.ndarray] + ) -> List[BinaryReward]: """ Draw rewards for the selected actions based on metadata according to probs_reward. Parameters ---------- - actions : List[ActionId] + actions : List[UnifiedActionId] The actions selected by the multi-armed bandit model. metadata : Dict[str, List] The metadata for the selected actions. + update_kwargs : Dict[str, np.ndarray] + Update keyword arguments. Returns ------- @@ -253,6 +379,18 @@ def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> L """ pass + @abstractmethod + def _extract_ground_truth(self, *args, **kwargs) -> Probability: + """ + Extract the ground truth probability for the action. + + Returns + ------- + Probability + The ground truth probability for the action. + """ + pass + @abstractmethod def _get_batch_step_kwargs_and_metadata( self, batch_index: int @@ -278,7 +416,7 @@ def _get_batch_step_kwargs_and_metadata( pass @abstractmethod - def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: + def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, np.ndarray]) -> pd.DataFrame: """ Finalize the step by adding additional information to the batch results. @@ -286,6 +424,8 @@ def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: ---------- batch_results : pd.DataFrame raw batch results + update_kwargs : Dict[str, np.ndarray] + Update keyword arguments Returns ------- @@ -294,16 +434,16 @@ def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: """ pass - @abstractmethod def _finalize_results(self): """ - Finalize the simulation process. It can be used to add additional information to the results. + Finalize the simulation process. Used to add regret and cumulative regret Returns ------- None """ - pass + self._results["regret"] = self._results["max_prob_reward"] - self._results["selected_prob_reward"] + self._results["cum_regret"] = self._results["regret"].cumsum() @cached_property def _action_ids(self) -> List[ActionId]: @@ -344,7 +484,7 @@ def _colors(self) -> Palette: return Category10[max(n_actions, min(category10_keys))] if n_actions <= max(category10_keys) else Turbo256 @classmethod - def _impute_missing_counts(cls, df, action_ids): + def _impute_missing_counts(cls, df: pd.DataFrame, action_ids: List[ActionId]) -> pd.DataFrame: """ Impute missing counts for actions in the data frame. @@ -375,7 +515,7 @@ def selected_actions_count(self) -> pd.DataFrame: ------- counts_df : pd.DataFrame Data frame with batch serial number as index (or total for all batches), actions as columns, - and count of recommended actions as values + and count of recommended actions as quantities """ groupby_cols = [col for col in self._base_columns if col not in ["reward", "action"]] counts_df = self._results.groupby(groupby_cols)["action"].value_counts().unstack(fill_value=0).reset_index() @@ -415,7 +555,7 @@ def positive_reward_proportion(self) -> pd.DataFrame: Returns ------- proportion_df : pd.DataFrame - Data frame with actions as index, and proportion of positive rewards as values + Data frame with actions as index, and proportion of positive rewards as quantities """ groupby_cols = [col for col in self._base_columns if col not in ["reward", "batch"]] proportion_df = self._results.groupby(groupby_cols)["reward"].mean().to_frame(name="proportion") diff --git a/pybandits/smab.py b/pybandits/smab.py index 614434c..9ffde29 100644 --- a/pybandits/smab.py +++ b/pybandits/smab.py @@ -27,12 +27,13 @@ from pybandits.base import ( ActionId, BinaryReward, - Probability, SmabPredictions, + UnifiedActionId, ) from pybandits.mab import BaseMab from pybandits.model import BaseBeta, Beta, BetaCC, BetaMO, BetaMOCC from pybandits.pydantic_version_compatibility import PositiveInt, field_validator, validate_call +from pybandits.quantitative_model import BaseSmabZoomingModel, SmabZoomingModel, SmabZoomingModelCC from pybandits.strategy import ( BestActionIdentificationBandit, ClassicBandit, @@ -49,13 +50,13 @@ class BaseSmabBernoulli(BaseMab): Parameters ---------- - actions: Dict[ActionId, BaseBeta] + actions: Dict[ActionId, Union[BaseBeta, BaseSmabZoomingModel]] The list of possible actions, and their associated Model. strategy: Strategy The strategy used to select actions. """ - actions: Dict[ActionId, BaseBeta] + actions: Dict[ActionId, Union[BaseBeta, BaseSmabZoomingModel]] @validate_call def predict( @@ -68,7 +69,7 @@ def predict( Parameters ---------- - n_samples : int > 0, default=1 + n_samples : PositiveInt, default=1 Number of samples to predict. forbidden_actions : Optional[Set[ActionId]], default=None Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only @@ -77,25 +78,24 @@ def predict( Returns ------- - actions: List[ActionId] of shape (n_samples,) + actions: List[UnifiedActionId] The actions selected by the multi-armed bandit model. - probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + probs: Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]] The probabilities of getting a positive reward for each action. """ - valid_actions = self._get_valid_actions(forbidden_actions) - selected_actions: List[ActionId] = [] - probs: List[Dict[ActionId, Probability]] = [] - - for _ in range(n_samples): - p = {action: model.sample_proba() for action, model in self.actions.items() if action in valid_actions} - selected_actions.append(self._select_epsilon_greedy_action(p=p, actions=self.actions)) - probs.append(p) + probs = self._get_action_probabilities(forbidden_actions=forbidden_actions, n_samples=n_samples) + selected_actions = [self._select_epsilon_greedy_action(p=prob, actions=self.actions) for prob in probs] return selected_actions, probs @validate_call - def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): + def _update( + self, + actions: List[UnifiedActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + ): """ Update the stochastic Bernoulli bandit given the list of selected actions and their corresponding binary rewards. @@ -104,23 +104,35 @@ def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], Lis ---------- actions : List[ActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5'] The selected action for each sample. - rewards : List[Union[BinaryReward, List[BinaryReward]]] of shape (n_samples, n_objectives) + rewards : Union[List[BinaryReward], List[List[BinaryReward]]], + if nested list, len() should follow shape of (n_samples, n_objectives) The binary reward for each sample. If strategy is not MultiObjectiveBandit, rewards should be a list, e.g. rewards = [1, 0, 1, 1, 1, ...] If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + quantities : Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. """ - self._validate_update_params(actions=actions, rewards=rewards) - rewards_dict = defaultdict(list) - for a, r in zip(actions, rewards): - rewards_dict[a].append(r) - - for a in set(actions): - self.actions[a].update(rewards=rewards_dict[a]) + if quantities is None: + for a, r in zip(actions, rewards): + rewards_dict[a].append(r) + for a in set(actions): + self.actions[a].update(rewards=rewards_dict[a]) + else: + quantities_dict = defaultdict(list) + for a, v, r in zip(actions, quantities, rewards): + if v is not None: + quantities_dict[a].append(v) + rewards_dict[a].append(r) + for a in set(actions): + if quantities_dict[a]: # quantitative action + self.actions[a].update(rewards=rewards_dict[a], quantities=quantities_dict[a]) + else: # non-quantitative action + self.actions[a].update(rewards=rewards_dict[a]) class SmabBernoulli(BaseSmabBernoulli): @@ -132,13 +144,13 @@ class SmabBernoulli(BaseSmabBernoulli): Parameters ---------- - actions: Dict[ActionId, Beta] + actions: Dict[UnifiedActionId, Union[Beta, SmabZoomingModel]] The list of possible actions, and their associated Model. strategy: ClassicBandit The strategy used to select actions. """ - actions: Dict[ActionId, Beta] + actions: Dict[ActionId, Union[Beta, SmabZoomingModel]] strategy: ClassicBandit @@ -151,13 +163,13 @@ class SmabBernoulliBAI(BaseSmabBernoulli): Parameters ---------- - actions: Dict[ActionId, Beta] + actions: Dict[ActionId, Union[Beta, SmabZoomingModel]] The list of possible actions, and their associated Model. strategy: BestActionIdentificationBandit The strategy used to select actions. """ - actions: Dict[ActionId, Beta] + actions: Dict[ActionId, Union[Beta, SmabZoomingModel]] strategy: BestActionIdentificationBandit @@ -178,13 +190,13 @@ class SmabBernoulliCC(BaseSmabBernoulli): Parameters ---------- - actions: Dict[ActionId, BetaCC] + actions: Dict[ActionId, Union[BetaCC, SmabZoomingModelCC]] The list of possible actions, and their associated Model. strategy: CostControlBandit The strategy used to select actions. """ - actions: Dict[ActionId, BetaCC] + actions: Dict[ActionId, Union[BetaCC, SmabZoomingModelCC]] strategy: CostControlBandit @@ -207,7 +219,7 @@ class BaseSmabBernoulliMO(BaseSmabBernoulli): @field_validator("actions", mode="after") @classmethod def all_actions_have_same_number_of_objectives(cls, actions: Dict[ActionId, BetaMO]): - n_objs_per_action = [len(beta.counters) for beta in actions.values()] + n_objs_per_action = [len(beta.models) for beta in actions.values()] if len(set(n_objs_per_action)) != 1: raise ValueError("All actions should have the same number of objectives") return actions diff --git a/pybandits/smab_simulator.py b/pybandits/smab_simulator.py index 400fd1b..726284b 100644 --- a/pybandits/smab_simulator.py +++ b/pybandits/smab_simulator.py @@ -21,16 +21,22 @@ # SOFTWARE. import random -from typing import Dict, List, Tuple +from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np import pandas as pd -from pybandits.base import ActionId, BinaryReward +from pybandits.base import ActionId, BinaryReward, Probability, UnifiedActionId from pybandits.pydantic_version_compatibility import Field, model_validator +from pybandits.quantitative_model import QuantitativeModel from pybandits.simulator import Simulator from pybandits.smab import BaseSmabBernoulli +# quantity +ParametricActionProbability = Callable[[np.ndarray], Probability] +SmabProbabilityValue = Union[Probability, ParametricActionProbability] +SmabActionProbabilityGroundTruth = Dict[ActionId, SmabProbabilityValue] + class SmabSimulator(Simulator): """ @@ -46,35 +52,51 @@ class SmabSimulator(Simulator): sMAB model. """ + probs_reward: Optional[Union[SmabActionProbabilityGroundTruth, Dict[str, SmabActionProbabilityGroundTruth]]] = None mab: BaseSmabBernoulli = Field(validation_alias="smab") _base_columns: List[str] = ["batch", "action", "reward"] @model_validator(mode="before") @classmethod def replace_null_and_validate_probs_reward(cls, values): - mab_action_ids = list(values["mab"].actions.keys()) probs_reward = cls._get_value_with_default("probs_reward", values) if probs_reward is None: - probs_reward = pd.DataFrame(0.5, index=[0], columns=mab_action_ids) + probs_reward = { + action: cls._generate_prob_reward(model.dimension) + if isinstance(model, QuantitativeModel) + else np.random.random() + for action, model in values["mab"].actions.items() + } values["probs_reward"] = probs_reward else: if len(probs_reward) != 1: raise ValueError("probs_reward must have exactly one row.") return values + @model_validator(mode="before") + @classmethod + def validate_probs_reward_columns(cls, values): + if "probs_reward" in values and values["probs_reward"] is not None: + cls._validate_probs_reward_dict(values["probs_reward"], values["mab"].actions) + return values + def _initialize_results(self): """ Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results. """ - self._results = pd.DataFrame(columns=["batch", "action", "reward"]) + self._results = pd.DataFrame( + columns=["batch", "action", "reward", "quantities", "selected_prob_reward", "max_prob_reward"] + ) - def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + def _draw_rewards( + self, actions: List[UnifiedActionId], metadata: Dict[str, List], update_kwargs: Dict[str, np.ndarray] + ) -> List[BinaryReward]: """ Draw rewards for the selected actions according to probs_reward. Parameters ---------- - actions : List[ActionId] + actions : List[UnifiedActionId] The actions selected by the multi-armed bandit model. metadata : Dict[str, List] The metadata for the selected actions. Not used in this implementation. @@ -84,9 +106,31 @@ def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> L reward : List[BinaryReward] A list of binary rewards. """ - rewards = [int(random.random() < self.probs_reward.loc[0, a]) for a in actions] + rewards = [int(random.random() < self._extract_ground_truth(a)) for a in actions] return rewards + def _extract_ground_truth(self, action: UnifiedActionId) -> Probability: + """ + Extract the ground truth probability for the action. + + Parameters + ---------- + action : UnifiedActionId + The action for which the ground truth probability is extracted. + + Returns + ------- + Probability + The ground truth probability for the action. + """ + return ( + self.probs_reward[action[0]](np.array(action[1])) + if isinstance(action, tuple) and action[1] is not None + else self.probs_reward[action[0]] + if isinstance(action, tuple) + else self.probs_reward[action] + ) + def _get_batch_step_kwargs_and_metadata( self, batch_index ) -> Tuple[Dict[str, int], Dict[str, np.ndarray], Dict[str, List]]: @@ -113,28 +157,33 @@ def _get_batch_step_kwargs_and_metadata( metadata = {} return predict_kwargs, update_kwargs, metadata - def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: + def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, np.ndarray]) -> pd.DataFrame: """ Finalize the step by adding additional information to the batch results. Parameters ---------- batch_results : pd.DataFrame - raw batch results + Raw batch results + update_kwargs : Dict[str, np.ndarray] + Placeholder for interface compatability Returns ------- batch_results : pd.DataFrame - same raw batch results + Same raw batch results """ + action_id = batch_results.loc[:, "action"] + quantity = batch_results.loc[:, "quantities"] + selected_prob_reward = [self._extract_ground_truth((a, q)) for a, q in zip(action_id, quantity)] + batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward + max_prob_reward = [ + max( + self._maximize_prob_reward((lambda q: self.probs_reward[a](q)), m.dimension) + if isinstance(m, QuantitativeModel) + else self.probs_reward[a] + for a, m in self.mab.actions.items() + ) + ] * len(batch_results) + batch_results.loc[:, "max_prob_reward"] = max_prob_reward return batch_results - - def _finalize_results(self): - """ - Finalize the simulation process. It can be used to add additional information to the results. - - Returns - ------- - None - """ - pass diff --git a/pybandits/strategy.py b/pybandits/strategy.py index a67be09..5887462 100644 --- a/pybandits/strategy.py +++ b/pybandits/strategy.py @@ -28,8 +28,9 @@ from scipy.stats import ttest_ind_from_stats from typing_extensions import Self -from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel -from pybandits.model import Beta, BetaMOCC, Model +from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel, UnifiedActionId +from pybandits.base_model import BaseModel +from pybandits.model import Beta, BetaMOCC from pybandits.pydantic_version_compatibility import field_validator, validate_call @@ -60,7 +61,9 @@ def _with_argument(self, argument_name: str, argument_value: Any) -> Self: return mutated_strategy @abstractmethod - def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId: + def select_action( + self, p: Dict[UnifiedActionId, Probability], actions: Optional[Dict[ActionId, BaseModel]] + ) -> UnifiedActionId: """ Select the action. """ @@ -90,22 +93,22 @@ class ClassicBandit(Strategy): @validate_call def select_action( self, - p: Dict[ActionId, float], - actions: Optional[Dict[ActionId, Model]] = None, - ) -> ActionId: + p: Dict[UnifiedActionId, float], + actions: Optional[Dict[UnifiedActionId, BaseModel]] = None, + ) -> UnifiedActionId: """ Select the action with the highest probability of getting a positive reward. Parameters ---------- - p : Dict[ActionId, Probability] + p : Dict[UnifiedActionId, Probability] The dictionary of actions and their sampled probability of getting a positive reward. - actions : Optional[Dict[ActionId, Model]] + actions : Optional[Dict[UnifiedActionId, BaseModel]] The dictionary of actions and their associated model. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ return max(p, key=p.get) @@ -162,9 +165,9 @@ def with_exploit_p(self, exploit_p: Optional[Float01]) -> Self: @validate_call def select_action( self, - p: Dict[ActionId, float], - actions: Optional[Dict[ActionId, Model]] = None, - ) -> ActionId: + p: Dict[UnifiedActionId, float], + actions: Optional[Dict[UnifiedActionId, BaseModel]] = None, + ) -> UnifiedActionId: """ Select with probability self.exploit_p the best action (i.e. the action with the highest probability of getting a positive reward), and with probability 1-self.exploit_p it returns the second best action (i.e. the action @@ -172,14 +175,14 @@ def select_action( Parameters ---------- - p : Dict[ActionId, Probability] + p : Dict[UnifiedActionId, Probability] The dictionary of actions and their sampled probability of getting a positive reward. - actions : Optional[Dict[ActionId, Model]] + actions : Optional[Dict[UnifiedActionId, BaseModel]] The dictionary of actions and their associated model. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ p = p.copy() @@ -198,13 +201,13 @@ def select_action( return selected_action # TODO: WIP this is valid only for SmabBernoulli - def compare_best_actions(self, actions: Dict[ActionId, Beta]) -> float: + def compare_best_actions(self, actions: Dict[UnifiedActionId, Beta]) -> float: """ Compare the 2 best actions, hence the 2 actions with the highest expected means of getting a positive reward. Parameters ---------- - actions: Dict[ActionId, Beta] + actions: Dict[UnifiedActionId, Beta] Returns ---------- @@ -244,31 +247,34 @@ def _average(cls, p_of_action: Union[Probability, List[Probability]]): @validate_call def _evaluate_and_select( cls, - p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], - actions: Dict[ActionId, Model], - feasible_actions: List[ActionId], - ) -> ActionId: + p: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]], + actions: Dict[UnifiedActionId, BaseModel], + feasible_actions: List[UnifiedActionId], + ) -> UnifiedActionId: """ Evaluate the feasible actions and select the one with the minimum cost. Parameters ---------- - p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] + p: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]] The dictionary of actions and their sampled probability of getting a positive reward. - actions: Dict[ActionId, Model] + actions: Dict[UnifiedActionId, BaseModel] The dictionary of actions and their associated model. - feasible_actions: List[ActionId] + feasible_actions: List[UnifiedActionId] The list of feasible actions. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id) - # the negative probability ensures that if we order the actions based on their minimum values the one with + # the negative probability ensures that if we order the actions based on their minimum quantities the one with # higher probability will be selected - sortable_actions = [(actions[a].cost, -cls._average(p[a]), a) for a in feasible_actions] + sortable_actions = [ + (actions[a[0]].cost(*a[1]) if isinstance(a, tuple) else actions[a].cost, -cls._average(p[a]), str(a)) + for a in feasible_actions + ] # select the action with the min cost (and the highest mean of probabilities in case of cost equality) _, _, selected_action = sorted(sortable_actions)[0] @@ -331,7 +337,9 @@ def with_subsidy_factor(self, subsidy_factor: Optional[Float01]) -> Self: return mutated_cost_control_bandit @validate_call - def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, Model]) -> ActionId: + def select_action( + self, p: Dict[UnifiedActionId, Probability], actions: Dict[UnifiedActionId, BaseModel] + ) -> UnifiedActionId: """ Select the action with the minimum cost among the set of feasible actions (the actions whose expected rewards are above a certain lower bound defined as [(1-subsidy_factor)*max_p, max_p], where max_p is the highest @@ -339,14 +347,14 @@ def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, Parameters ---------- - p: Dict[ActionId, Probability] + p: Dict[UnifiedActionId, Probability] The dictionary or actions and their sampled probability of getting a positive reward. - actions: Dict[ActionId, BetaCC] + actions: Dict[UnifiedActionId, BetaCC] The dictionary or actions and their cost. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ # get the highest expected reward sampled value @@ -366,14 +374,14 @@ class MultiObjectiveStrategy(Strategy, ABC): @classmethod @validate_call - def get_pareto_front(cls, p: Dict[ActionId, List[Probability]]) -> List[ActionId]: + def get_pareto_front(cls, p: Dict[UnifiedActionId, List[Probability]]) -> List[UnifiedActionId]: """ Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by any action out of the set A*. Parameters: ----------- - p: Dict[ActionId, Probability] + p: Dict[UnifiedActionId, Probability] The dictionary or actions and their sampled probability of getting a positive reward for each objective. Return @@ -425,7 +433,7 @@ class MultiObjectiveBandit(MultiObjectiveStrategy): """ @validate_call - def select_action(self, p: Dict[ActionId, List[Probability]], **kwargs) -> ActionId: + def select_action(self, p: Dict[UnifiedActionId, List[Probability]], **kwargs) -> UnifiedActionId: """ Select an action at random from the Pareto optimal set of action. The Pareto optimal action set (Pareto front) A* is the set of actions not dominated by any other actions not in A*. Dominance relation is established based @@ -453,7 +461,9 @@ class MultiObjectiveCostControlBandit(MultiObjectiveStrategy, CostControlStrateg """ @validate_call - def select_action(self, p: Dict[ActionId, List[Probability]], actions: Dict[ActionId, BetaMOCC]) -> ActionId: + def select_action( + self, p: Dict[UnifiedActionId, List[Probability]], actions: Dict[UnifiedActionId, BetaMOCC] + ) -> UnifiedActionId: """ Select the action with the minimum cost among the Pareto optimal set of action. The Pareto optimal action set (Pareto front) A* is the set of actions not dominated by any other actions not in A*. Dominance @@ -461,12 +471,12 @@ def select_action(self, p: Dict[ActionId, List[Probability]], actions: Dict[Acti Parameters ---------- - p: Dict[ActionId, List[Probability]] + p: Dict[UnifiedActionId, List[Probability]] The dictionary of actions and their sampled probability of getting a positive reward for each objective. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ pareto_set = self.get_pareto_front(p=p) diff --git a/pybandits/utils.py b/pybandits/utils.py index 3d8a62f..35b0685 100644 --- a/pybandits/utils.py +++ b/pybandits/utils.py @@ -19,9 +19,8 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -import json -from typing import Any, Callable, Dict, List, Optional, Union +import inspect +from typing import Callable, List, Optional, Tuple from bokeh.io import curdoc, output_file, output_notebook, save, show from bokeh.models import InlineStyleSheet, TabPanel, Tabs @@ -29,44 +28,31 @@ from pybandits.pydantic_version_compatibility import validate_call -JSONSerializable = Union[str, int, float, bool, None, List["JSONSerializable"], Dict[str, "JSONSerializable"]] - @validate_call -def to_serializable_dict(d: Dict[str, Any]) -> Dict[str, JSONSerializable]: - """ - Convert a dictionary to a dictionary whose values are JSONSerializable Parameters - - ---------- - d: dictionary to convert - - Returns - ------- - - """ - return json.loads(json.dumps(d, default=dict)) - - -@validate_call -def extract_argument_names_from_function(function_handle: Callable, is_class_method: bool = False) -> List[str]: +def extract_argument_names(handle: Callable, ignore_arguments: Tuple = ("args", "kwargs")) -> List[str]: """ Extract the argument names from a function handle. Parameters ---------- - function_handle : Callable - Handle of a function to extract the argument names from - - is_class_method : bool, defaults to False - Whether the function is a class method + handle : Callable + Handle of a function or class to extract the argument names from + ignore_arguments : Tuple + Tuple of argument names to ignore Returns ------- argument_names : List[str] List of argument names """ - start_index = int(is_class_method) - argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount] + + argument_names = list( + handle.model_fields.keys() if hasattr(handle, "model_fields") else inspect.signature(handle).parameters + ) + for argument_name in ignore_arguments: + if argument_name in argument_names: + argument_names.remove(argument_name) return argument_names diff --git a/pyproject.toml b/pyproject.toml index fb3e0e8..d1e649e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "1.1.0" +version = "3.0.0" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", @@ -16,10 +16,11 @@ readme = "README.md" python = ">=3.8.1,<3.12" loguru = "^0.6" numpy = "^1.23" -pydantic = "1.10.*" +pydantic = ">=1.10.*,<3" scipy = "^1.9" pymc = "^5.3" scikit-learn = "^1.1" +optuna = "^3.6" bokeh = "^3.1" diff --git a/tests/test_cmab.py b/tests/test_cmab.py index 208f381..b8860f4 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -19,205 +19,590 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -from typing import get_args +from copy import deepcopy +from typing import Any, Dict, List, Optional, Tuple, Type, Union import numpy as np import pandas as pd import pytest from hypothesis import given, settings from hypothesis import strategies as st - -from pybandits.base import Float01 -from pybandits.cmab import CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC -from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC, StudentT, UpdateMethods +from pydantic.dataclasses import dataclass + +import pybandits +from pybandits.base import ActionId, Float01 +from pybandits.base_model import BaseModel +from pybandits.cmab import BaseCmabBernoulli, CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC +from pybandits.model import ( + BaseBayesianLogisticRegression, + BayesianLogisticRegression, + BayesianLogisticRegressionCC, + StudentT, + UpdateMethods, +) from pybandits.pydantic_version_compatibility import ( - PYDANTIC_VERSION_1, - PYDANTIC_VERSION_2, - NonNegativeFloat, + PositiveInt, ValidationError, - pydantic_version, ) +from pybandits.quantitative_model import BaseCmabZoomingModel, CmabZoomingModel, CmabZoomingModelCC, QuantitativeModel from pybandits.strategy import BestActionIdentificationBandit, ClassicBandit, CostControlBandit -from pybandits.utils import to_serializable_dict -from tests.test_utils import is_serializable +from tests.test_utils import FakeApproximation, literal_update_methods + + +@st.composite +def diff_strategy(draw): + return draw(st.floats(min_value=0.001, max_value=0.5)) + + +@st.composite +def cost_strategy(draw, n_actions): + return draw(st.lists(st.floats(min_value=0, max_value=2), min_size=n_actions, max_size=n_actions)) + + +@pytest.fixture(scope="module") +def monkeymodule(): + with pytest.MonkeyPatch.context() as mp: + yield mp + + +def mock_student_t( + field_value: StudentT, + diff: Any, + monkeymodule: Any, + label: Union[int, str], +) -> int: + """ + Update the mu and sigma fields of a StudentT object. + + Args: + field_value: StudentT object to update + diff: Hypothesis diff object for drawing random values + monkeymodule: Module for monkey patching + label: Label for the diff draw + + Returns: + Updated label value + """ + for sub_field in ("mu", "sigma"): + try: + new_value = getattr(field_value, sub_field) + diff.draw(diff_strategy(), label=f"{label}") + monkeymodule.setattr(field_value, sub_field, new_value) + label = int(label) + 1 if isinstance(label, (int, str)) else label + 1 + except AttributeError as e: + raise ValueError(f"Invalid StudentT field: {sub_field}") from e + return label + + +def mock_update(models: Union[List[BaseModel], BaseModel], diff, monkeymodule, label=0): + model_list = [models] if isinstance(models, BaseModel) else models + for model in model_list: + for field in model.model_fields: + field_value = getattr(model, field) + + # Handle StudentT field + if isinstance(field_value, StudentT): + label = mock_student_t(field_value, diff, monkeymodule, label) + + # Handle list of StudentT objects + elif isinstance(field_value, list) and field_value and isinstance(field_value[0], StudentT): + for item in field_value: + label = mock_student_t(item, diff, monkeymodule, label) + + # Handle list of BaseModel objects + elif isinstance(field_value, list) and field_value and isinstance(field_value[0], BaseModel): + mock_update(field_value, diff, monkeymodule, label) + + +@dataclass +class ModelTestConfig: + cmab_class: Type + strategy_class: Type + model_types: List[Type[BaseModel]] + + def _create_actions( + self, + action_ids: List[str], + costs: Optional[st.SearchStrategy], + n_features: PositiveInt, + update_method: UpdateMethods, + update_kwargs: Optional[Dict[str, Any]], + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + if len(self.model_types) < len(action_ids): + indices = np.random.randint(0, len(self.model_types), len(action_ids)) + self.model_types = [self.model_types[i] for i in indices] + if all(model in [BayesianLogisticRegressionCC, CmabZoomingModelCC] for model in self.model_types): + # Generate random costs + costs = costs.draw(cost_strategy(n_actions=len(action_ids))) + costs = [ + cost if model_type in [BayesianLogisticRegressionCC] else lambda x: x**cost + for cost, model_type in zip(costs, self.model_types) + ] + else: + costs = None + + model_cold_start_kwargs = dict(update_method=update_method, update_kwargs=update_kwargs) + base_model_cold_start_kwargs = dict(n_features=n_features, **model_cold_start_kwargs) + if costs is not None: + return { + action_id: model_type( + alpha=StudentT(), + betas=[StudentT() for _ in range(n_features)], + **model_cold_start_kwargs, + cost=cost, + ) + if issubclass(model_type, BayesianLogisticRegressionCC) + else model_type.cold_start( + dimension=1, + base_model_cold_start_kwargs=base_model_cold_start_kwargs, + cost=cost, + ) # CmabZoomingModelCC + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + }, base_model_cold_start_kwargs + else: + return { + action_id: model_type( + alpha=StudentT(), betas=[StudentT() for _ in range(n_features)], **model_cold_start_kwargs + ) + if issubclass(model_type, BayesianLogisticRegression) + else model_type.cold_start( + dimension=1, + base_model_cold_start_kwargs=base_model_cold_start_kwargs, + ) # CmabZoomingModel + for action_id, model_type in zip(action_ids, self.model_types) + }, base_model_cold_start_kwargs + + def create_cmab_and_actions( + self, + action_ids: List[str], + epsilon: Optional[Float01], + costs: st.SearchStrategy, + exploit_p: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + subsidy_factor: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + n_features: PositiveInt, + update_method: UpdateMethods, + update_kwargs: Optional[Dict[str, Any]], + ) -> Tuple[BaseCmabBernoulli, Dict[ActionId, BaseModel], Dict[str, Any]]: + actions, base_model_cold_start_kwargs = self._create_actions( + action_ids, costs, n_features, update_method, update_kwargs + ) + default_action = action_ids[0] if epsilon else None + + kwargs = { + k: v + for k, v in { + "epsilon": epsilon, + "default_action": default_action, + }.items() + if v is not None + } + for param, classes in zip(["subsidy_factor", "exploit_p"], [[CmabBernoulliCC], [CmabBernoulliBAI]]): + if self.cmab_class in classes: + actual_param = eval(param) + if isinstance(actual_param, float) or actual_param is None: + kwargs[param] = actual_param + else: + kwargs[param] = actual_param.draw(st.floats(min_value=0, max_value=1)) + + cmab = self.cmab_class(actions=actions, **kwargs) + if any(isinstance(model, BaseCmabZoomingModel) for model in actions.values()): + kwargs["base_model_cold_start_kwargs"] = base_model_cold_start_kwargs + if any(isinstance(model, BaseBayesianLogisticRegression) for model in actions.values()): + kwargs.update(base_model_cold_start_kwargs) + + return cmab, actions, kwargs + + +TEST_CONFIGS = { + "cmab": ModelTestConfig(CmabBernoulli, ClassicBandit, [BayesianLogisticRegression, CmabZoomingModel]), + "cmab_bai": ModelTestConfig( + CmabBernoulliBAI, BestActionIdentificationBandit, [BayesianLogisticRegression, CmabZoomingModel] + ), + "cmab_cc": ModelTestConfig( + CmabBernoulliCC, + CostControlBandit, + [BayesianLogisticRegressionCC, CmabZoomingModelCC], + ), +} -literal_update_methods = get_args(UpdateMethods) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), +) +def test_cold_start( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, +): + # Create CMAB instance + cmab, actions, kwargs = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + ) -def _apply_update_method_to_state(state, update_method): - for action in state["actions"]: - state["actions"][action]["update_method"] = update_method + # Cold start comparison logic (modified for different model types) + cold_start_kwargs = { + "action_ids": { + action + for action, model in zip(action_ids, config.model_types) + if issubclass(model, (BayesianLogisticRegression)) + }, + "quantitative_action_ids": { + action for action, model in zip(action_ids, config.model_types) if issubclass(model, QuantitativeModel) + }, + } + if all(model in [BayesianLogisticRegressionCC, CmabZoomingModelCC] for model in config.model_types): + cold_start_kwargs["action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, (BayesianLogisticRegressionCC)) + } + cold_start_kwargs["quantitative_action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, CmabZoomingModelCC) + } + cold_start_kwargs.update(kwargs) # Add exploit_p or subsidy_factor if needed + cold_start_kwargs = {k: v for k, v in cold_start_kwargs.items() if v is not None} + assert config.cmab_class.cold_start(**cold_start_kwargs) == cmab -######################################################################################################################## +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists(st.text(min_size=1), min_size=2, max_size=5, unique=True), + n_features=st.integers(min_value=1, max_value=5), + costs=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), +) +def test_bad_initialization( + config: ModelTestConfig, + action_ids: List[str], + n_features: int, + costs, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, +): + """Test various invalid initialization scenarios for CMAB models""" + kwargs = {"cost": 1} if config.cmab_class == CmabBernoulliCC else {} + # Test empty actions + with pytest.raises(AttributeError): + config.cmab_class(actions={}) + # Test single action (should warn) + single_action = {action_ids[0]: config.model_types[0].cold_start(n_features=n_features, **kwargs)} + with pytest.warns(UserWarning): + config.cmab_class(actions=single_action) -# CmabBernoulli with strategy=ClassicBandit() + # Test mismatched feature dimensions + actions_wrong_dims = { + action_ids[0]: config.model_types[0].cold_start(n_features=n_features, **kwargs), + action_ids[1]: config.model_types[0].cold_start(n_features=n_features + 1, **kwargs), + } + with pytest.raises(AttributeError): + config.cmab_class(actions=actions_wrong_dims) + # Test mismatched update methods + actions_wrong_update = { + action_ids[0]: config.model_types[0].cold_start(n_features=n_features, update_method="VI", **kwargs), + action_ids[1]: config.model_types[0].cold_start(n_features=n_features, update_method="MCMC", **kwargs), + } + with pytest.raises(AttributeError): + config.cmab_class(actions=actions_wrong_update) -@settings(deadline=500) -@given(st.integers(max_value=100)) -def test_create_cmab_bernoulli_cold_start(a_int): - # n_features must be > 0 - if a_int <= 0: - with pytest.raises(ValidationError): - CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - else: - mab1 = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - mab2 = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=a_int), - "a2": BayesianLogisticRegression.cold_start(n_features=a_int), - } - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 + # Test mismatched update kwargs + base_kwargs = {"draws": 500} if update_kwargs else {"draws": 1000} + actions_wrong_kwargs = { + action_ids[0]: config.model_types[0].cold_start( + n_features=n_features, update_method=update_method, update_kwargs=base_kwargs, **kwargs + ), + action_ids[1]: config.model_types[0].cold_start( + n_features=n_features, + update_method=update_method, + update_kwargs={"draws": base_kwargs["draws"] // 2}, + **kwargs, + ), + } + with pytest.raises(AttributeError): + config.cmab_class(actions=actions_wrong_kwargs) + # Test invalid model types + actions_wrong_type = { + action_ids[0]: BayesianLogisticRegression.cold_start(n_features=n_features), + action_ids[1]: BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=1.0), + } + with pytest.raises(ValidationError): + config.cmab_class(actions=actions_wrong_type) -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=10)) -def test_cmab_can_instantiate(n_features): - with pytest.raises(TypeError): - CmabBernoulli() - with pytest.raises(AttributeError): - CmabBernoulli(actions={}) - with pytest.warns(UserWarning): - CmabBernoulli(actions={"a1": BayesianLogisticRegression.cold_start(n_features=n_features)}) - with pytest.raises(ValidationError): # predict_with_proba is not an argument of init - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - predict_with_proba=True, - ) + # Test None actions with pytest.raises(ValidationError): - CmabBernoulli( - actions={ - "a1": None, - "a2": None, - }, - ) - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - strategy=ClassicBandit(), + config.cmab_class(actions={aid: None for aid in action_ids}) + + # Test invalid strategy parameters + if config.cmab_class == CmabBernoulliBAI: + with pytest.raises(ValidationError): + config.create_cmab_and_actions( + action_ids, + None, + costs, + exploit_p.draw(st.sampled_from([-0.1, 1.1])), + subsidy_factor, + n_features, + update_method, + update_kwargs, + ) + elif config.cmab_class == CmabBernoulliCC: + with pytest.raises(ValidationError): + config.create_cmab_and_actions( + action_ids, + None, + costs, + exploit_p, + subsidy_factor.draw(st.sampled_from([-0.1, 1.1])), + n_features, + update_method, + update_kwargs, + ) + + +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=5), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=3), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 10}]), +) +def test_update( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, + monkeymodule, +): + monkeymodule.setattr( + pybandits.model, + "fit", + lambda *args, **kwargs: FakeApproximation(n_features=n_features), ) - mab = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } + monkeymodule.setattr( + pybandits.model, + "sample", + FakeApproximation(n_features=n_features).sample, + ) + # Create CMAB instance + cmab, _, kwargs = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs ) + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) + # Generate random rewards + reward_data = np.random.choice([0, 1], size=n_samples).tolist() + # Test updates with generated data + actions_to_update = np.random.choice(np.array(action_ids, dtype=np.object_), size=n_samples, replace=True).tolist() + # Generate quantities only if there are any QuantitativeModel actions + for_update_kwargs = {"actions": actions_to_update, "rewards": reward_data} + if any(isinstance(model, BaseCmabZoomingModel) for model in cmab.actions.values()): + quantity_data = np.random.random(size=n_samples).tolist() + quantity_data = [ + q if isinstance(cmab.actions[action], QuantitativeModel) else None + for q, action in zip(quantity_data, actions_to_update) + ] + for_update_kwargs["quantities"] = quantity_data - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert not mab.predict_actions_randomly - assert not mab.predict_with_proba - mab.predict_with_proba = True - mab.predict_actions_randomly = True - assert mab.predict_actions_randomly - assert mab.predict_with_proba + old_cmab = deepcopy(cmab) + for transform in [lambda x: x, list, pd.DataFrame]: + cmab.update(context=transform(context), **for_update_kwargs) + assert cmab != old_cmab -@settings(deadline=500) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - st.integers(min_value=1, max_value=5), - st.integers(min_value=6, max_value=10), - st.integers(min_value=0, max_value=1), - st.just("draws"), - st.just(2), + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), + diff=st.data(), ) -def test_cmab_init_with_wrong_blr_models(n_features, other_n_features, update_method_index, kwarg_to_alter, factor): - with pytest.raises(AttributeError): - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - "a3": BayesianLogisticRegression.cold_start(n_features=other_n_features), - } - ) - update_method = literal_update_methods[update_method_index] - other_update_method = literal_update_methods[1 - update_method_index] - with pytest.raises(AttributeError): - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features, update_method=other_update_method), - } - ) - model = BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - altered_kwarg = model.update_kwargs[kwarg_to_alter] // factor - with pytest.raises(AttributeError): - CmabBernoulli( - actions={ - "a1": model, - "a2": BayesianLogisticRegression.cold_start( - n_features=n_features, - update_method=update_method, - update_kwargs={kwarg_to_alter: altered_kwarg}, - ), - } - ) - +def test_predict( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, + diff, + monkeymodule, +): + # Create CMAB instance + cmab = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + )[0] + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) + # Test predictions with random forbidden actions + forbidden = ( + set(np.random.choice(np.array(action_ids, dtype=np.object_), size=len(action_ids) // 2, replace=False)) + if len(action_ids) > 2 + else None + ) + if cmab.default_action is not None and forbidden is not None and cmab.default_action in forbidden: + forbidden.remove(cmab.default_action) -@settings(deadline=60000) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_update(n_samples, n_features, update_method): - actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() + mock_update(list(cmab.actions.values()), diff, monkeymodule) + best_actions, probs, weights = cmab.predict(context=context, forbidden_actions=forbidden) + assert len(best_actions) == n_samples + assert len(probs) == n_samples + assert len(weights) == n_samples - def run_update(context): - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features, update_method=update_method) + if forbidden: assert all( - [ - mab.actions[a] - == BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] + len({action[0] if isinstance(action, tuple) else action for action in prob}) + == len(action_ids) - len(forbidden) + for prob in probs ) - mab.update(context=context, actions=actions, rewards=rewards) + assert all(action[0] if isinstance(action, tuple) else action not in forbidden for action in best_actions) assert all( - [ - mab.actions[a] - != BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] + action[0] if isinstance(action, tuple) else action not in forbidden + for prob in probs + for action in prob.keys() ) - assert not mab.predict_actions_randomly - - # context is numpy array - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - assert type(context) is np.ndarray - run_update(context=context) + assert all( + action[0] if isinstance(action, tuple) else action not in forbidden + for weight in weights + for action in weight.keys() + ) + else: + assert all( + len({action[0] if isinstance(action, tuple) else action for action in prob}) == len(action_ids) + for prob in probs + ) + if isinstance(cmab, CmabBernoulli) and not epsilon: + assert all(prob[best_action] == max(prob.values()) for best_action, prob in zip(best_actions, probs)) - # context is python list - context = context.tolist() - assert type(context) is list - run_update(context=context) - # context is pandas DataFrame - context = pd.DataFrame(context) - assert type(context) is pd.DataFrame - run_update(context=context) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), + diff=st.data(), +) +def test_serialization( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, + diff, + monkeymodule, +): + # Create CMAB instance + cmab = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + )[0] + + pre_update_state = cmab.get_state() + mock_update(list(cmab.actions.values()), diff, monkeymodule) + post_update_state = cmab.get_state() + # Verify model updates + assert pre_update_state != post_update_state + + # Test serialization + restored_cmab = config.cmab_class.from_state(post_update_state[1]) + assert restored_cmab == cmab + + +def test_cmab_actions_matching(n_features=2): + blr_model = BayesianLogisticRegression.cold_start(n_features=n_features) + blr_model2 = BayesianLogisticRegression.cold_start(n_features=n_features + 1) + zoom_model = CmabZoomingModel.cold_start(base_model_cold_start_kwargs={"n_features": n_features}) + zoom_model2 = CmabZoomingModel.cold_start(base_model_cold_start_kwargs={"n_features": n_features + 1}) + actions = {"a1": blr_model, "a2": blr_model2} + with pytest.raises(AttributeError): + BaseCmabBernoulli.check_models(actions) + actions = {"a1": zoom_model, "a2": zoom_model2} + with pytest.raises(AttributeError): + BaseCmabBernoulli.check_models(actions) -@settings(deadline=10000) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_update_not_all_actions(n_samples, n_feat, update_method): - actions = np.random.choice(["a3", "a4"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_feat)) - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat, update_method=update_method) + actions = {"a1": blr_model, "a2": zoom_model2} + with pytest.raises(AttributeError): + BaseCmabBernoulli.check_models(actions) - mab.update(context=context, actions=actions, rewards=rewards) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) - assert mab.actions["a3"] != BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) - assert mab.actions["a4"] != BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) + actions = {"a1": blr_model, "a2": zoom_model} + BaseCmabBernoulli.check_models(actions) @settings(deadline=500) @@ -244,64 +629,6 @@ def test_cmab_update_shape_mismatch(n_samples, n_features, update_method): mab.update(context=[], actions=actions, rewards=rewards) -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) -def test_cmab_predict_cold_start(n_samples, n_features): - def run_predict(context): - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - # context is numpy array - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - assert type(context) is np.ndarray - run_predict(context=context) - - # context is python list - context = context.tolist() - assert type(context) is list - run_predict(context=context) - - # context is pandas DataFrame - context = pd.DataFrame(context) - assert type(context) is pd.DataFrame - run_predict(context=context) - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_cmab_predict_not_cold_start(n_samples, n_features): - def run_predict(context): - mab = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=n_features * [StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - ) - assert not mab.predict_actions_randomly - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples - - # context is numpy array - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - assert type(context) is np.ndarray - run_predict(context=context) - - # context is python list - context = context.tolist() - assert type(context) is list - run_predict(context=context) - - # context is pandas DataFrame - context = pd.DataFrame(context) - assert type(context) is pd.DataFrame - run_predict(context=context) - - @settings(deadline=500) @given(st.integers(min_value=1, max_value=10)) def test_cmab_predict_shape_mismatch(a_int): @@ -311,653 +638,3 @@ def test_cmab_predict_shape_mismatch(a_int): mab.predict(context=context) with pytest.raises(AttributeError): mab.predict(context=[]) - - -def test_cmab_predict_with_forbidden_actions(n_features=3): - def run_predict(mab): - context = np.random.uniform(low=-1.0, high=1.0, size=(1000, n_features)) - assert set(mab.predict(context=context, forbidden_actions={"a2", "a3", "a4", "a5"})[0]) == {"a1"} - assert set(mab.predict(context=context, forbidden_actions={"a1", "a3"})[0]) == {"a2", "a4", "a5"} - assert set(mab.predict(context=context, forbidden_actions={"a1"})[0]) == {"a2", "a3", "a4", "a5"} - assert set(mab.predict(context=context, forbidden_actions=set())[0]) == {"a1", "a2", "a3", "a4", "a5"} - - if pydantic_version == PYDANTIC_VERSION_1: - expected_error_type = ValueError - elif pydantic_version == PYDANTIC_VERSION_2: - expected_error_type = ValidationError - else: - raise ValueError(f"Unsupported Pydantic version: {pydantic_version}") - with pytest.raises(expected_error_type): # not a set - assert set(mab.predict(context=context, forbidden_actions={1})[0]) - with pytest.raises(ValueError): # invalid action_ids - assert set(mab.predict(context=context, forbidden_actions={"a1", "a9999", "a", 5})[0]) - with pytest.raises(ValueError): # all actions forbidden - assert set(mab.predict(context=context, forbidden_actions={"a1", "a2", "a3", "a4", "a5"})[0]) - with pytest.raises(ValueError): # all actions forbidden (unordered) - assert set(mab.predict(n_samples=1000, forbidden_actions={"a5", "a4", "a2", "a3", "a1"})[0]) - - # cold start mab - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) - run_predict(mab=mab) - - # not cold start mab - mab = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=[StudentT(), StudentT(), StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - "a3": BayesianLogisticRegression.cold_start(n_features=n_features), - "a4": BayesianLogisticRegression(alpha=StudentT(mu=4, sigma=5), betas=[StudentT(), StudentT(), StudentT()]), - "a5": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - ) - assert mab != CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) - run_predict(mab=mab) - - -@settings(deadline=500) -@given(st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=2, max_value=100)) -def test_cmab_get_state(mu, sigma, n_features): - actions: dict = { - "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - - cmab = CmabBernoulli(actions=actions) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "predict_with_proba": False, - "predict_actions_randomly": False, - "epsilon": None, - "default_action": None, - } - ) - - class_name, cmab_state = cmab.get_state() - assert class_name == "CmabBernoulli" - assert cmab_state == expected_state - - assert is_serializable(cmab_state), "Internal state is not serializable" - - -@settings(deadline=500) -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "alpha": st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - "betas": st.lists( - st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - min_size=3, - max_size=3, - ), - }, - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ), - update_method=st.sampled_from(literal_update_methods), -) -def test_cmab_from_state(state, update_method): - _apply_update_method_to_state(state, update_method) - cmab = CmabBernoulli.from_state(state) - assert isinstance(cmab, CmabBernoulli) - - actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict - expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_cmab = globals()[cmab.get_state()[0]].from_state(state=cmab.get_state()[1]) - assert new_cmab == cmab - - -######################################################################################################################## - - -# CmabBernoulli with strategy=BestActionIdentificationBandit() - - -@settings(deadline=500) -@given(st.integers(max_value=100)) -def test_create_cmab_bernoulli_bai_cold_start(a_int): - # n_features must be > 0 - if a_int <= 0: - with pytest.raises(ValidationError): - CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - else: - # default exploit_p - mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - mab2 = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=a_int), - "a2": BayesianLogisticRegression.cold_start(n_features=a_int), - } - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - # set exploit_p - mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int, exploit_p=0.42) - mab2 = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=a_int), - "a2": BayesianLogisticRegression.cold_start(n_features=a_int), - }, - exploit_p=0.42, - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=10)) -def test_cmab_bai_can_instantiate(n_features): - with pytest.raises(TypeError): - CmabBernoulliBAI() - with pytest.raises(AttributeError): - CmabBernoulliBAI(actions={}) - with pytest.warns(UserWarning): - CmabBernoulliBAI(actions={"a1": BayesianLogisticRegression.cold_start(n_features=2)}) - with pytest.raises(ValidationError): # predict_with_proba is not an argument of init - CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - predict_with_proba=True, - ) - with pytest.raises(ValidationError): - CmabBernoulliBAI( - actions={ - "a1": None, - "a2": None, - }, - ) - CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - strategy=BestActionIdentificationBandit(), - ) - mab = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - ) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert not mab.predict_actions_randomly - assert not mab.predict_with_proba - assert mab.strategy == BestActionIdentificationBandit() - - mab = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - exploit_p=0.42, - ) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert not mab.predict_actions_randomly - assert not mab.predict_with_proba - assert mab.strategy == BestActionIdentificationBandit(exploit_p=0.42) - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_cmab_bai_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - # cold start - mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - # not cold start - mab = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - exploit_p=0.42, - ) - assert not mab.predict_actions_randomly - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples - - -@settings(deadline=10000) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_bai_update(n_samples, n_features, update_method): - actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features, update_method=update_method) - assert mab.predict_actions_randomly - assert all( - [ - mab.actions[a] == BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] - ) - mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [ - mab.actions[a] != BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] - ) - assert not mab.predict_actions_randomly - - -@settings(deadline=500) -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=2, max_value=100), - st.floats(min_value=0, max_value=1), -) -def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): - actions: dict = { - "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - - cmab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {"exploit_p": exploit_p}, - "predict_with_proba": False, - "predict_actions_randomly": False, - "epsilon": None, - "default_action": None, - } - ) - - class_name, cmab_state = cmab.get_state() - assert class_name == "CmabBernoulliBAI" - assert cmab_state == expected_state - - assert is_serializable(cmab_state), "Internal state is not serializable" - - -@settings(deadline=500) -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "alpha": st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - "betas": st.lists( - st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - min_size=3, - max_size=3, - ), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"exploit_p": None}), - st.builds(lambda x: {"exploit_p": x}, st.floats(min_value=0, max_value=1)), - ), - } - ), - update_method=st.sampled_from(literal_update_methods), -) -def test_cmab_bai_from_state(state, update_method): - _apply_update_method_to_state(state, update_method) - cmab = CmabBernoulliBAI.from_state(state) - assert isinstance(cmab, CmabBernoulliBAI) - - actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict - expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} - assert expected_actions == actual_actions - - expected_exploit_p = cmab.strategy.get_expected_value_from_state(state, "exploit_p") - actual_exploit_p = cmab.strategy.exploit_p - assert expected_exploit_p == actual_exploit_p - - # Ensure get_state and from_state compatibility - new_cmab = globals()[cmab.get_state()[0]].from_state(state=cmab.get_state()[1]) - assert new_cmab == cmab - - -######################################################################################################################## - - -# CmabBernoulli with strategy=CostControlBandit() - - -@settings(deadline=500) -@given(st.integers(max_value=100)) -def test_create_cmab_bernoulli_cc_cold_start(a_int): - action_ids_cost = {"a1": 10, "a2": 20.5} - # n_features must be > 0 - if a_int <= 0: - with pytest.raises(ValidationError): - CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) - else: - # default subsidy_factor - mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) - mab2 = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), - } - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - # set subsidy_factor - mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int, subsidy_factor=0.42) - mab2 = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), - }, - subsidy_factor=0.42, - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=10)) -def test_cmab_cc_can_instantiate(n_features): - with pytest.raises(TypeError): - CmabBernoulliCC() - with pytest.raises(AttributeError): - CmabBernoulliCC(actions={}) - with pytest.warns(UserWarning): - CmabBernoulliCC(actions={"a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10)}) - with pytest.raises(ValidationError): # predict_with_proba is not an argument of init - CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - }, - predict_with_proba=True, - ) - with pytest.raises(ValidationError): - CmabBernoulliCC( - actions={ - "a1": None, - "a2": None, - }, - ) - CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - }, - strategy=CostControlBandit(), - ) - mab = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - } - ) - assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert not mab.predict_actions_randomly - assert mab.predict_with_proba - assert mab.strategy == CostControlBandit() - - mab = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - }, - subsidy_factor=0.42, - ) - assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert not mab.predict_actions_randomly - assert mab.predict_with_proba - assert mab.strategy == CostControlBandit(subsidy_factor=0.42) - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_cmab_cc_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - # cold start - mab = CmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - # not cold start - mab = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=20.5), - }, - subsidy_factor=0.42, - ) - assert not mab.predict_actions_randomly - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples - - -@settings(deadline=None) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_cc_update(n_samples, n_features, update_method): - actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = CmabBernoulliCC.cold_start( - action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features, update_method=update_method - ) - assert mab.predict_actions_randomly - assert all( - [ - mab.actions[a] - == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10, update_method=update_method) - for a in set(actions) - ] - ) - mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [ - mab.actions[a] - != BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10, update_method=update_method) - for a in set(actions) - ] - ) - assert not mab.predict_actions_randomly - - -@settings(deadline=500) -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=2, max_value=100), - st.floats(min_value=0), - st.floats(min_value=0), - st.floats(min_value=0, max_value=1), -) -def test_cmab_cc_get_state( - mu, sigma, n_features, cost_1: NonNegativeFloat, cost_2: NonNegativeFloat, subsidy_factor: Float01 -): - actions: dict = { - "a1": BayesianLogisticRegressionCC( - alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()], cost=cost_1 - ), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=cost_2), - } - - cmab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {"subsidy_factor": subsidy_factor}, - "predict_with_proba": True, - "predict_actions_randomly": False, - "epsilon": None, - "default_action": None, - } - ) - - class_name, cmab_state = cmab.get_state() - assert class_name == "CmabBernoulliCC" - assert cmab_state == expected_state - - assert is_serializable(cmab_state), "Internal state is not serializable" - - -@settings(deadline=500) -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "alpha": st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - "betas": st.lists( - st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - min_size=3, - max_size=3, - ), - "cost": st.floats(min_value=0), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"subsidy_factor": None}), - st.builds(lambda x: {"subsidy_factor": x}, st.floats(min_value=0, max_value=1)), - ), - } - ), - update_method=st.sampled_from(literal_update_methods), -) -def test_cmab_cc_from_state(state, update_method): - _apply_update_method_to_state(state, update_method) - cmab = CmabBernoulliCC.from_state(state) - assert isinstance(cmab, CmabBernoulliCC) - - actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict - expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} - assert expected_actions == actual_actions - - expected_subsidy_factor = cmab.strategy.get_expected_value_from_state(state, "subsidy_factor") - actual_subsidy_factor = cmab.strategy.subsidy_factor - assert expected_subsidy_factor == actual_subsidy_factor - - # Ensure get_state and from_state compatibility - new_cmab = globals()[cmab.get_state()[0]].from_state(state=cmab.get_state()[1]) - assert new_cmab == cmab - - -######################################################################################################################## - - -# Cmab with epsilon-greedy super strategy - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) -def test_epsilon_greedy_cmab_predict_cold_start(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_epsilon_greedy_cmab_bai_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_epsilon_greedy_cmab_cc_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - # cold start - mab = CmabBernoulliCC.cold_start( - action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features, epsilon=0.1, default_action="a1" - ) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] diff --git a/tests/test_cmab_simulator.py b/tests/test_cmab_simulator.py index cfb8633..4e9629e 100644 --- a/tests/test_cmab_simulator.py +++ b/tests/test_cmab_simulator.py @@ -30,16 +30,26 @@ from hypothesis import strategies as st from pytest_mock import MockerFixture +import pybandits from pybandits.cmab import CmabBernoulli from pybandits.cmab_simulator import CmabSimulator +from pybandits.model import BayesianLogisticRegression +from pybandits.quantitative_model import CmabZoomingModel +from tests.test_utils import FakeApproximation -def test_mismatched_probs_reward_columns(mocker: MockerFixture, groups=[0, 1]): +@pytest.fixture(scope="module") +def monkeymodule(): + with pytest.MonkeyPatch.context() as mp: + yield mp + + +def test_mismatched_probs_reward_columns(mocker: MockerFixture, group=[0, 1]): def check_value_error(probs_reward, context): with pytest.raises(ValueError): - CmabSimulator(mab=cmab, probs_reward=probs_reward, groups=groups, context=context) + CmabSimulator(mab=cmab, probs_reward=probs_reward, group=group, context=context) - num_groups = len(groups) + num_groups = len(group) cmab = mocker.Mock(spec=CmabBernoulli) cmab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()} cmab.epsilon = 0.0 @@ -51,11 +61,42 @@ def check_value_error(probs_reward, context): check_value_error(probs_reward, context[:1]) -def test_cmab_e2e_simulation_with_default_arguments( - action_ids=["a1", "a2"], n_features=3, n_updates=2, batch_size=10, num_groups=2 -): - mab = CmabBernoulli.cold_start(action_ids=action_ids, n_features=n_features) +@settings(deadline=None) +@given( + st.just(["a1", "a2"]), + st.lists( + st.sampled_from( + [ + BayesianLogisticRegression.cold_start(n_features=3, update_method="VI"), + CmabZoomingModel.cold_start(base_model_cold_start_kwargs={"n_features": 3, "update_method": "VI"}), + ] + ), + min_size=2, + max_size=2, + ), + st.just(3), + st.just(2), +) +def test_cmab_e2e_simulation_with_default_arguments(monkeymodule, action_ids, models, n_features, num_groups): + monkeymodule.setattr( + pybandits.model, + "fit", + lambda *args, **kwargs: FakeApproximation(n_features=n_features), + ) + monkeymodule.setattr( + pybandits.model, + "sample", + FakeApproximation(n_features=n_features).sample, + ) + monkeymodule.setattr( + CmabSimulator, + "_maximize_prob_reward", + lambda *args, **kwargs: np.random.random(), + ) + mab = CmabBernoulli(actions=dict(zip(action_ids, models))) base_groups = list(range(num_groups)) + n_updates = CmabSimulator.model_fields["n_updates"].default + batch_size = CmabSimulator.model_fields["batch_size"].default group = base_groups * (n_updates * batch_size // num_groups) + base_groups[: (n_updates * batch_size % num_groups)] context = ( np.repeat(np.arange(3).reshape(1, -1), n_updates * batch_size, axis=0).T * (np.array(group) - np.mean(group)) @@ -82,19 +123,30 @@ def test_cmab_e2e_simulation_with_default_arguments( @settings(deadline=None) @given( - st.just(["a1", "a2"]), - st.just(3), - st.integers(min_value=1, max_value=3), - st.integers(min_value=1, max_value=10), - st.booleans(), - st.sampled_from([None, 0, 42]), - st.booleans(), - st.booleans(), - st.sampled_from(["", "unit_test"]), - st.integers(min_value=1, max_value=3), + action_ids=st.just(["a1", "a2"]), + models=st.lists( + st.sampled_from( + [ + BayesianLogisticRegression.cold_start(n_features=3, update_method="VI"), + CmabZoomingModel.cold_start(base_model_cold_start_kwargs={"n_features": 3, "update_method": "VI"}), + ] + ), + min_size=2, + max_size=2, + ), + n_features=st.just(3), + n_updates=st.integers(min_value=1, max_value=3), + batch_size=st.integers(min_value=1, max_value=10), + save=st.booleans(), + random_seed=st.sampled_from([None, 0, 42]), + verbose=st.booleans(), + visualize=st.booleans(), + file_prefix=st.sampled_from(["", "unit_test"]), + num_groups=st.integers(min_value=1, max_value=3), ) def test_cmab_e2e_simulation_with_non_default_args( action_ids, + models, n_features, n_updates, batch_size, @@ -104,20 +156,30 @@ def test_cmab_e2e_simulation_with_non_default_args( visualize, file_prefix, num_groups, + monkeymodule, ): + monkeymodule.setattr( + pybandits.model, + "fit", + lambda *args, **kwargs: FakeApproximation(n_features=n_features), + ) + monkeymodule.setattr( + pybandits.model, + "sample", + FakeApproximation(n_features=n_features).sample, + ) + monkeymodule.setattr( + CmabSimulator, + "_maximize_prob_reward", + lambda *args, **kwargs: np.random.random(), + ) base_groups = list(range(num_groups)) group = base_groups * (n_updates * batch_size // num_groups) + base_groups[: (n_updates * batch_size % num_groups)] - effective_base_groups = sorted(set(group)) context = ( np.repeat(np.arange(n_features).reshape(1, -1), n_updates * batch_size, axis=0).T * (np.array(group) - np.mean(group)) ).T - probs_reward = pd.DataFrame( - np.random.uniform(0, 1, (len(effective_base_groups), len(action_ids))), - columns=action_ids, - index=[str(g) for g in effective_base_groups], - ) - mab = CmabBernoulli.cold_start(action_ids=action_ids, n_features=n_features, update_method="VI") + mab = CmabBernoulli(actions=dict(zip(action_ids, models))) if visualize and not save: with pytest.raises(ValueError): CmabSimulator( @@ -128,7 +190,7 @@ def test_cmab_e2e_simulation_with_non_default_args( n_updates=n_updates, batch_size=batch_size, random_seed=random_seed, - probs_reward=probs_reward, + probs_reward=None, verbose=verbose, file_prefix=file_prefix, context=context, @@ -144,7 +206,7 @@ def test_cmab_e2e_simulation_with_non_default_args( n_updates=n_updates, batch_size=batch_size, random_seed=random_seed, - probs_reward=probs_reward, + probs_reward=None, verbose=verbose, file_prefix=file_prefix, context=context, diff --git a/tests/test_mab.py b/tests/test_mab.py index 3668347..26a5a21 100644 --- a/tests/test_mab.py +++ b/tests/test_mab.py @@ -28,7 +28,14 @@ from hypothesis import given from pytest_mock import MockerFixture -from pybandits.base import ACTION_IDS_PREFIX, ActionId, BinaryReward, Float01, Probability +from pybandits.base import ( + ACTION_IDS_PREFIX, + ActionId, + BinaryReward, + Float01, + Probability, + PyBanditsBaseModel, +) from pybandits.mab import BaseMab from pybandits.model import Beta, BetaCC from pybandits.pydantic_version_compatibility import ValidationError @@ -36,11 +43,18 @@ class DummyMab(BaseMab): + actions: Dict[ActionId, Beta] epsilon: Optional[Float01] = None default_action: Optional[ActionId] = None - def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): - self._validate_update_params(actions=actions, rewards=rewards) + def _update( + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + **kwargs, + ): + pass def predict( self, @@ -69,7 +83,7 @@ def test_base_mab_raise_on_bad_actions(cost=0.0): DummyMab(actions={"a1": None, "a2": None}, strategy=ClassicBandit()) with pytest.warns(UserWarning): DummyMab(actions={"a1": Beta()}, strategy=ClassicBandit()) - with pytest.raises(AttributeError): + with pytest.raises(ValidationError): DummyMab(actions={"a1": Beta(), "a2": BetaCC(cost=cost)}, strategy=ClassicBandit()) @@ -77,19 +91,27 @@ def test_base_mab_check_update_params(): dummy_mab = DummyMab(actions={"a1": Beta(), "a2": Beta()}, strategy=ClassicBandit()) with pytest.raises(AttributeError): # actionId doesn't exist - dummy_mab._validate_update_params(actions=["a1", "a3"], rewards=[1, 1]) - with pytest.raises(AttributeError): + dummy_mab.update(actions=["a1", "a3"], rewards=[1, 1], quantities=None) + with pytest.raises(ValidationError): # actionId cannot be empty - dummy_mab._validate_update_params(actions=[""], rewards=[1]) + dummy_mab.update(actions=[""], rewards=[1], quantities=None) + with pytest.raises(AttributeError): + dummy_mab._validate_params_lengths(actions=["a1", "a2"], rewards=[1], quantities=None) + with pytest.raises(AttributeError): - dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1]) + # quantities of different length + dummy_mab._validate_params_lengths(actions=["a1", "a2"], rewards=[1, 1], quantities=[1]) + + with pytest.raises(AttributeError): + # context of different length + dummy_mab._validate_params_lengths(actions=["a1", "a2"], rewards=[1, 1], quantities=None, context=[1]) @given(r1=st.integers(min_value=0, max_value=1), r2=st.integers(min_value=0, max_value=1)) def test_base_mab_update_ok(r1, r2): dummy_mab = DummyMab(actions={"a1": Beta(), "a2": Beta()}, strategy=ClassicBandit()) - dummy_mab.update(actions=["a1", "a2"], rewards=[r1, r2]) - dummy_mab.update(actions=["a1", "a1"], rewards=[r1, r2]) + dummy_mab.update(actions=["a1", "a2"], rewards=[r1, r2], quantities=None) + dummy_mab.update(actions=["a1", "a1"], rewards=[r1, r2], quantities=None) ######################################################################################################################## @@ -100,7 +122,7 @@ def test_base_mab_update_ok(r1, r2): def test_returns_empty_dict_when_no_action_specific_kwargs(): kwargs = {"param1": 1, "param2": 2} - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == {} @@ -108,13 +130,13 @@ def test_processes_kwargs_with_non_dict_values(): kwargs = { f"{ACTION_IDS_PREFIX}param1": "not_a_dict", } - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == {} def test_manages_kwargs_with_empty_dicts(): kwargs = {f"{ACTION_IDS_PREFIX}param1": {}, f"{ACTION_IDS_PREFIX}param2": {}} - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == {} @@ -124,7 +146,7 @@ def test_extracts_action_specific_kwargs_with_valid_keys(): f"{ACTION_IDS_PREFIX}param2": {"action1": 3, "action2": 4}, } expected_output = {"action1": {"param1": 1, "param2": 3}, "action2": {"param1": 2, "param2": 4}} - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == expected_output @@ -135,18 +157,25 @@ def test_extracts_action_specific_kwargs_with_valid_keys(): def test_extracts_action_model_class_and_attributes_with_valid_kwargs(mocker: MockerFixture): - class MockActionModel: - def __init__(self, param1, param2): - pass + class MockActionModel(PyBanditsBaseModel): + param1: int + param2: int mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=["param1", "param2"]) + mocker.patch("pybandits.mab.extract_argument_names", return_value=["param1", "param2"]) + mocker.patch("pybandits.mab.issubclass", return_value=True) kwargs = {"param1": 1, "param2": 2} - action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + kwargs_backup = kwargs.copy() + ( + model_cold_start, + _, + action_general_kwargs, + _, + ) = BaseMab._extract_action_model_class_and_attributes(kwargs) - assert action_model_cold_start == MockActionModel - assert action_general_kwargs == {"param1": 1, "param2": 2} + assert model_cold_start == MockActionModel + assert action_general_kwargs == kwargs_backup def test_returns_callable_for_action_model_cold_start_instantiation(mocker: MockerFixture): @@ -156,12 +185,17 @@ def cold_start(cls): pass mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) - + mocker.patch("pybandits.mab.extract_argument_names", return_value=[]) + mocker.patch("pybandits.mab.issubclass", return_value=True) kwargs = {} - action_model_cold_start, _ = BaseMab._extract_action_model_class_and_attributes(**kwargs) + ( + model_cold_start, + _, + action_general_kwargs, + _, + ) = BaseMab._extract_action_model_class_and_attributes(kwargs) - assert callable(action_model_cold_start) + assert callable(model_cold_start) def test_handles_empty_kwargs_gracefully(mocker: MockerFixture): @@ -170,13 +204,18 @@ def __init__(self): pass mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) - + mocker.patch("pybandits.mab.extract_argument_names", return_value=[]) + mocker.patch("pybandits.mab.issubclass", return_value=True) kwargs = {} - action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + ( + model_cold_start, + quantitative_model_cold_start, + action_general_kwargs, + quantitative_action_general_kwargs, + ) = BaseMab._extract_action_model_class_and_attributes(kwargs) - assert action_model_cold_start == MockActionModel - assert action_general_kwargs == {} + assert model_cold_start == MockActionModel + assert action_general_kwargs == kwargs def test_handles_kwargs_with_no_matching_action_model_attributes(mocker: MockerFixture): @@ -185,13 +224,10 @@ def __init__(self): pass mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) - + mocker.patch("pybandits.mab.extract_argument_names", return_value=[]) kwargs = {"irrelevant_param": 1} - action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) - - assert action_model_cold_start == MockActionModel - assert action_general_kwargs == {} + with pytest.raises(ValueError): + BaseMab._extract_action_model_class_and_attributes(kwargs) ######################################################################################################################## diff --git a/tests/test_model.py b/tests/test_model.py index b5ade18..e2b153d 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -19,11 +19,12 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from copy import deepcopy import numpy as np import pandas as pd import pytest -from hypothesis import given +from hypothesis import given, settings from hypothesis import strategies as st from pybandits.model import ( @@ -36,6 +37,7 @@ StudentT, ) from pybandits.pydantic_version_compatibility import ValidationError +from tests.test_utils import literal_update_methods ######################################################################################################################## @@ -55,7 +57,7 @@ def test_can_init_beta(success_counter, failure_counter): assert (b.n_successes, b.n_failures) == (1, 1) -def test_both_or_neither_counters_are_defined(): +def test_both_or_neither_models_are_defined(): with pytest.raises(ValidationError): Beta(n_successes=0) with pytest.raises(ValidationError): @@ -80,12 +82,11 @@ def test_beta_get_stats_is_working(e: Beta): assert e.count >= 2, "Count too low" -def test_beta_sample_proba(): +def test_beta_sample_proba(n_samples=100): b = Beta(n_successes=1, n_failures=2) - - for _ in range(1000): - prob = b.sample_proba() - assert prob >= 0 and prob <= 1 + prob = b.sample_proba(n_samples=n_samples) + assert len(prob) == n_samples + assert all([p >= 0 and p <= 1 for p in prob]) ######################################################################################################################## @@ -112,22 +113,22 @@ def test_can_init_betaCC(a_float): def test_can_init_base_beta_mo(): # init with default params - b = BetaMO(counters=[Beta(), Beta()]) - assert b.counters[0].n_successes == 1 and b.counters[0].n_failures == 1 - assert b.counters[1].n_successes == 1 and b.counters[1].n_failures == 1 + b = BetaMO(models=[Beta(), Beta()]) + assert b.models[0].n_successes == 1 and b.models[0].n_failures == 1 + assert b.models[1].n_successes == 1 and b.models[1].n_failures == 1 # init with empty dict - b = BetaMO(counters=[{}, {}]) - assert b.counters[0] == Beta() + b = BetaMO(models=[{}, {}]) + assert b.models[0] == Beta() # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) + BetaMO(models=[BetaCC(cost=1), BetaCC(cost=1)]) -def test_calculate_proba_beta_mo(): - b = BetaMO(counters=[Beta(), Beta()]) - b.sample_proba() +def test_calculate_proba_beta_mo(n_samples=100): + b = BetaMO(models=[Beta(), Beta()]) + b.sample_proba(n_samples=n_samples) @given( @@ -139,12 +140,12 @@ def test_beta_update_mo(rewards1, rewards2): rewards1, rewards2 = rewards1[:min_len], rewards2[:min_len] rewards = [[a, b] for a, b in zip(rewards1, rewards2)] - b = BetaMO(counters=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) + b = BetaMO(models=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) b.update(rewards=rewards) assert b == BetaMO( - counters=[ + models=[ Beta(n_successes=11 + sum(rewards1), n_failures=22 + len(rewards1) - sum(rewards1)), Beta(n_successes=33 + sum(rewards2), n_failures=44 + len(rewards2) - sum(rewards2)), ] @@ -162,16 +163,16 @@ def test_beta_update_mo(rewards1, rewards2): def test_can_init_beta_mo(): # init with default params - b = BetaMO(counters=[Beta(), Beta()]) - assert b.counters == [Beta(), Beta()] + b = BetaMO(models=[Beta(), Beta()]) + assert b.models == [Beta(), Beta()] # init with empty dict - b = BetaMO(counters=[{}, {}]) - assert b.counters == [Beta(), Beta()] + b = BetaMO(models=[{}, {}]) + assert b.models == [Beta(), Beta()] # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) + BetaMO(models=[BetaCC(cost=1), BetaCC(cost=1)]) ######################################################################################################################## @@ -184,21 +185,21 @@ def test_can_init_beta_mo(): def test_can_init_beta_mo_cc(a_float): if a_float < 0 or np.isnan(a_float): with pytest.raises(ValidationError): - BetaMOCC(counters=[Beta(), Beta()], cost=a_float) + BetaMOCC(models=[Beta(), Beta()], cost=a_float) else: # init with default params - b = BetaMOCC(counters=[Beta(), Beta()], cost=a_float) - assert b.counters == [Beta(), Beta()] + b = BetaMOCC(models=[Beta(), Beta()], cost=a_float) + assert b.models == [Beta(), Beta()] assert b.cost == a_float # init with empty dict - b = BetaMOCC(counters=[{}, {}], cost=a_float) - assert b.counters == [Beta(), Beta()] + b = BetaMOCC(models=[{}, {}], cost=a_float) + assert b.models == [Beta(), Beta()] assert b.cost == a_float # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BetaMOCC(counters=[BetaCC(cost=1), BetaCC(cost=1)], cost=a_float) + BetaMOCC(models=[BetaCC(cost=1), BetaCC(cost=1)], cost=a_float) ######################################################################################################################## @@ -285,12 +286,15 @@ def test_check_context_matrix(n_samples, n_features): blr.check_context_matrix(context=[1.0]) # context is a 1-dim list -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) +@given( + st.integers(min_value=1, max_value=10), + st.integers(min_value=1, max_value=10), +) def test_blr_sample_proba(n_samples, n_features): def sample_proba(context): - prob, weighted_sum = blr.sample_proba(context=context) - - assert type(prob) is type(weighted_sum) is np.ndarray # type of the returns must be np.ndarray + prob_weighted_sum = blr.sample_proba(context=context) + prob, weighted_sum = list(zip(*prob_weighted_sum)) + assert type(prob) is type(weighted_sum) is tuple # type of the returns must be np.ndarray assert len(prob) == len(weighted_sum) == n_samples # return 1 sampled probability and ws per each sample assert all([0 <= p <= 1 for p in prob]) # probs must be in the interval [0, 1] @@ -312,33 +316,33 @@ def sample_proba(context): sample_proba(context=context) -def test_blr_update(n_samples=100, n_features=3): +@settings(deadline=None) +@given( + st.integers(min_value=1, max_value=5), + st.integers(min_value=1, max_value=3), + st.sampled_from(literal_update_methods), + st.just({"draws": 10}), +) +def test_blr_update(n_samples, n_features, update_method, update_kwargs): def update(context, rewards): - blr = BayesianLogisticRegression.cold_start(n_features=n_features) + blr = BayesianLogisticRegression.cold_start( + n_features=n_features, update_method=update_method, update_kwargs=update_kwargs + ) assert blr.alpha == StudentT(mu=0.0, sigma=10.0, nu=5.0) - assert blr.betas == [ - StudentT(mu=0.0, sigma=10.0, nu=5.0), - StudentT(mu=0.0, sigma=10.0, nu=5.0), - StudentT(mu=0.0, sigma=10.0, nu=5.0), - ] - + assert blr.betas == [StudentT(mu=0.0, sigma=10.0, nu=5.0)] * n_features + old_blr = deepcopy(blr) blr.update(context=context, rewards=rewards) - assert blr.alpha != StudentT(mu=0.0, sigma=10.0, nu=5.0) - assert blr.betas != [ - StudentT(mu=0.0, sigma=10.0, nu=5.0), - StudentT(mu=0.0, sigma=10.0, nu=5.0), - StudentT(mu=0.0, sigma=10.0, nu=5.0), - ] + assert old_blr != blr rewards = np.random.choice([0, 1], size=n_samples).tolist() # context is numpy array - context = np.random.uniform(low=-100.0, high=100.0, size=(n_samples, n_features)) + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) assert type(context) is np.ndarray update(context=context, rewards=rewards) - - # context is python list + # + # # context is python list context = context.tolist() assert type(context) is list update(context=context, rewards=rewards) @@ -349,9 +353,11 @@ def update(context, rewards): update(context=context, rewards=rewards) # raise an error if len(context) != len(rewards) + blr = BayesianLogisticRegression.cold_start( + n_features=n_features, update_method=update_method, update_kwargs=update_kwargs + ) with pytest.raises(ValueError): - blr = BayesianLogisticRegression.cold_start(n_features=n_features) - blr.update(context=context, rewards=rewards[1:]) + blr.update(context=context, rewards=rewards + [rewards[-1]]) ######################################################################################################################## diff --git a/tests/test_quantitative_model.py b/tests/test_quantitative_model.py new file mode 100644 index 0000000..952885d --- /dev/null +++ b/tests/test_quantitative_model.py @@ -0,0 +1,228 @@ +from typing import List + +import numpy as np + +# Generated by Qodo Gen +import pytest +from hypothesis import given +from hypothesis import strategies as st +from hypothesis.extra.numpy import arrays + +from pybandits.base import BinaryReward +from pybandits.model import Beta +from pybandits.quantitative_model import CmabZoomingModel, Segment, SmabZoomingModel, ZoomingModel + + +def tuple_of_tuples_strategy(n, m, elements=st.floats(min_value=0, max_value=1)): + return st.tuples(*[st.tuples(*[elements for _ in range(m)]) for _ in range(n)]) + + +# Create segment with valid intervals array of shape (n,2) +@given(tuple_of_tuples_strategy(2, 2)) +def test_create_valid_segment(intervals): + segment = Segment(intervals=intervals) + assert isinstance(segment, Segment) + assert len(segment.intervals) == 2 + assert all(len(interval) == 2 for interval in segment.intervals) + + +# Access minimum and maximum quantities via mins and maxs properties +@given(tuple_of_tuples_strategy(3, 2)) +def test_mins_maxs_properties(intervals): + segment = Segment(intervals=intervals) + assert np.all(segment.mins == np.array([interval[0] for interval in intervals])) + assert np.all(segment.maxs == np.array([interval[1] for interval in intervals])) + + +# Create segment with empty intervals array +def test_create_empty_segment(): + empty_intervals = np.empty((0, 2)) + segment = Segment(intervals=empty_intervals) + assert len(segment.intervals) == 0 + assert len(segment.mins) == 0 + assert len(segment.maxs) == 0 + + +# Create segment with invalid interval shape +@given(arrays(np.float64, shape=(2, 3), elements=st.floats(min_value=0, max_value=100))) +def test_invalid_interval_shape(intervals): + with pytest.raises(ValueError, match="Intervals must have shape .n, 2."): + Segment(intervals=intervals) + + +# Add non-adjacent segments +def test_add_nonadjacent_segments(): + seg1 = Segment(intervals=np.array([[0, 0.1], [0, 0.1]])) + seg2 = Segment(intervals=np.array([[0.2, 0.5], [0.1, 0.2]])) + with pytest.raises(ValueError, match="Segments must be adjacent."): + seg1 + seg2 + + +class DummyZoomingModel(ZoomingModel): + def _init_base_model(self): + self._base_model = Beta() + + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], **kwargs): + pass + + +# Model initialization with valid parameters creates correct number of segments +@given(dimension=st.integers(min_value=1, max_value=3), n_1d_segments=st.integers(min_value=2, max_value=8)) +def test_init_creates_correct_segments(dimension, n_1d_segments): + model = DummyZoomingModel.cold_start(dimension=dimension, n_1d_segments=n_1d_segments, n_max_segments=None) + expected_segments = n_1d_segments**dimension + assert len(model.sub_actions) == expected_segments + + +# Update method correctly processes rewards and quantities for existing segments +@given( + st.integers(min_value=1, max_value=5).flatmap( + lambda size: st.tuples( + st.lists(st.integers(min_value=0, max_value=1), min_size=size, max_size=size), + st.lists(st.floats(min_value=0, max_value=1), min_size=size, max_size=size), + ) + ) +) +def test_update_processes_rewards_correctly(data): + rewards, quantities = data + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4) + initial_segments = len(model.sub_actions) + model.update(quantities=quantities, rewards=rewards) + assert len(model.sub_actions) <= initial_segments + + +# Best performing segment gets split when below max segments limit +def test_best_segment_splits(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2, n_max_segments=4) + quantities = [0.25, 0.75] + rewards = [1, 0] + model.update(quantities=quantities, rewards=rewards) + assert len(model.sub_actions) == 3 + + +# Adjacent segments with similar performance get merged correctly +def test_similar_segments_merge(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4, comparison_threshold=0.5) + initial_segmented_actions = model.sub_actions.copy() + quantities = [0.25, 0.75] + rewards = [1, 1] + model.update(quantities=quantities, rewards=rewards) + assert initial_segmented_actions.keys() != model.sub_actions + assert len(model.sub_actions) == 4 + + +# Sample_proba returns valid probability for each segment +def test_sample_proba_returns_valid_probabilities(n_samples=100): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2) + probs = model.sample_proba(n_samples=n_samples) + assert all(len(prob) == len(model.sub_actions) for prob in probs) + assert len(probs) == n_samples + assert all(0 <= prob[1] <= 1 for sample in probs for prob in sample) + assert all(0 <= v <= 1 for sample in probs for prob in sample for v in prob[0]) + + +# Update with empty rewards/quantities list +def test_update_with_empty_lists(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2) + initial_segments = len(model.sub_actions) + model.update(quantities=[], rewards=[]) + assert len(model.sub_actions) == initial_segments + + +# Update when at maximum number of segments +def test_update_at_max_segments(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4, n_max_segments=4) + quantities = [0.5] + rewards = [1] + model.update(quantities=quantities, rewards=rewards) + assert len(model.sub_actions) <= model.n_max_segments + + +# Merging segments when only 2 segments remain +def test_merge_with_two_segments(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2, comparison_threshold=1.0) + quantities = [0.25, 0.75] + rewards = [1, 1] + model.update(quantities=quantities, rewards=rewards) + assert len(model.sub_actions) >= 1 + + +# Comparing non-adjacent segments for merging +def test_non_adjacent_segments_comparison(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4) + segments = list(model.segmented_actions.keys()) + non_adjacent = [segments[0], segments[2]] + assert not non_adjacent[0].is_adjacent(non_adjacent[1]) + + +# Values that fall on segment boundaries +@given(st.integers(min_value=2, max_value=4)) +def test_boundary_values(n_segments): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=n_segments) + boundary = 1.0 / n_segments + quantities = [boundary] + rewards = [1] + model.update(quantities=quantities, rewards=rewards) + mapped_segments = model._map_values_to_segments(quantities) + assert len(mapped_segments) >= 1 + + +# Test SmabZoomingModel initialization with valid parameters +@given(dimension=st.integers(min_value=1, max_value=3), n_1d_segments=st.integers(min_value=2, max_value=8)) +def initializes_smab_zooming_model_correctly(dimension, n_1d_segments): + model = SmabZoomingModel(dimension=dimension, n_1d_segments=n_1d_segments) + expected_segments = n_1d_segments**dimension + assert len(model.segmented_actions) == expected_segments + + +# Test SmabZoomingModel update with valid rewards and quantities +@given( + rewards=st.lists(st.integers(min_value=0, max_value=1), min_size=1, max_size=5), + quantities=st.lists(st.floats(min_value=0, max_value=1), min_size=1, max_size=5), +) +def updates_smab_zooming_model_correctly(rewards, quantities): + model = SmabZoomingModel(dimension=1, n_1d_segments=4) + initial_segments = len(model.segmented_actions) + model.update(quantities=quantities, rewards=rewards) + assert len(model.segmented_actions) <= initial_segments + + +# Test SmabZoomingModel sample_proba returns valid probabilities +def sample_proba_returns_valid_probabilities_smab(): + model = SmabZoomingModel(dimension=1, n_1d_segments=2) + probas = model.sample_proba() + for value, prob in probas.items(): + assert 0 <= prob <= 1 + assert 0 <= value <= 1 + + +# Test CmabZoomingModel initialization with valid parameters +@given(dimension=st.integers(min_value=1, max_value=3), n_1d_segments=st.integers(min_value=2, max_value=8)) +def initializes_cmab_zooming_model_correctly(dimension, n_1d_segments): + model = CmabZoomingModel( + dimension=dimension, n_1d_segments=n_1d_segments, base_model_cold_start_kwargs={"n_features": 1} + ) + expected_segments = n_1d_segments**dimension + assert len(model.segmented_actions) == expected_segments + + +# Test CmabZoomingModel update with valid rewards, quantities, and context +@given( + rewards=st.lists(st.integers(min_value=0, max_value=1), min_size=1, max_size=5), + quantities=st.lists(st.floats(min_value=0, max_value=1), min_size=1, max_size=5), + context=arrays(np.float64, shape=(5, 1), elements=st.floats(min_value=0, max_value=1)), +) +def updates_cmab_zooming_model_correctly(rewards, quantities, context): + model = CmabZoomingModel(dimension=1, n_1d_segments=4, base_model_cold_start_kwargs={"n_features": 1}) + initial_segments = len(model.segmented_actions) + model.update(quantities=quantities, rewards=rewards, context=context) + assert len(model.segmented_actions) <= initial_segments + + +# Test CmabZoomingModel sample_proba returns valid probabilities +def sample_proba_returns_valid_probabilities_cmab(): + model = CmabZoomingModel(dimension=1, n_1d_segments=2, base_model_cold_start_kwargs={"n_features": 1}) + probas = model.sample_proba() + for value, prob in probas.items(): + assert 0 <= prob <= 1 + assert 0 <= value <= 1 diff --git a/tests/test_simulator.py b/tests/test_simulator.py index eee7b5e..b47f341 100644 --- a/tests/test_simulator.py +++ b/tests/test_simulator.py @@ -24,11 +24,14 @@ from typing import Dict, List, Tuple import numpy as np +import optuna import pandas as pd import pytest +from hypothesis import given, settings +from hypothesis import strategies as st from pytest_mock import MockerFixture -from pybandits.base import ActionId, BinaryReward +from pybandits.base import ActionId, BinaryReward, Probability from pybandits.mab import BaseMab from pybandits.simulator import Simulator @@ -43,11 +46,11 @@ def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> L def _get_batch_step_kwargs_and_metadata(self, batch_index: int) -> Tuple[Dict[str, np.ndarray], Dict[str, List]]: return {}, {} - def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: + def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, np.ndarray]) -> pd.DataFrame: return batch_results - def _finalize_results(self): - pass + def _extract_ground_truth(self, *args, **kwargs) -> Probability: + return np.random.random() def test_mismatched_probs_reward_columns(mocker: MockerFixture): @@ -65,3 +68,155 @@ def check_value_error(probs_reward): check_value_error(probs_reward) probs_reward = pd.DataFrame({"a1": [0.5], "a2": [0.5], "a3": [0.5]}) check_value_error(probs_reward) + + +# Test _maximize_prob_reward + + +# Returns maximum probability value from optimization study +def test_returns_maximum_probability(mocker): + mock_study = mocker.Mock() + mock_study.best_value = 0.8 + mocker.patch("optuna.create_study", return_value=mock_study) + + def prob_func(x): + return 0.8 + + result = Simulator._maximize_prob_reward(prob_func, 1) + + assert result == 0.8 + + +# Correctly samples points from [0,1] range +@given(st.integers(min_value=1, max_value=2)) +@settings(deadline=None, max_examples=10) +def test_samples_points_in_valid_range(dimension): + def prob_func(x): + assert all(0 <= xi <= 1 for xi in x) + return 0.5 + + Simulator._maximize_prob_reward(prob_func, dimension) + + +def test_maximization_result(): + maximum = Simulator._maximize_prob_reward(lambda x: 1 - x**2, 1) + assert np.isclose(maximum, 1.0, atol=1e-3) + maximum = Simulator._maximize_prob_reward(lambda x: x**2, 1) + assert np.isclose(maximum, 1.0, atol=1e-3) + + +# Uses TPE sampler with multivariate optimization +def test_uses_tpe_sampler_config(mocker): + sampler_spy = mocker.spy(optuna.samplers, "TPESampler") + + def prob_func(x): + return 0.5 + + Simulator._maximize_prob_reward(prob_func, 1) + + assert sampler_spy.call_args.kwargs["multivariate"] + assert sampler_spy.call_args.kwargs["group"] + + +# Optimizes using all available CPU cores +def test_uses_all_cpu_cores(mocker): + mock_study = mocker.Mock() + mocker.patch("optuna.create_study", return_value=mock_study) + + def prob_func(x): + return 0.5 + + Simulator._maximize_prob_reward(prob_func, 1) + mock_study.optimize.assert_called_once() + assert mock_study.optimize.call_args.kwargs["n_jobs"] == -1 + + +# Function is decorated with lru_cache +def test_lru_cache_memoization(): + def prob_func(x): + return 0.5 + + result1 = Simulator._maximize_prob_reward(prob_func, 1) + result2 = Simulator._maximize_prob_reward(prob_func, 1) + + assert result1 == result2 + assert hasattr(Simulator._maximize_prob_reward, "cache_info") + + +# Probability reward function raises exceptions +def test_probability_function_exceptions(): + def failing_prob_func(x): + raise RuntimeError("Function failed") + + with pytest.raises(RuntimeError): + Simulator._maximize_prob_reward(failing_prob_func, 1) + + +# Input dimension is very large +def test_large_input_dimension(mocker): + mock_study = mocker.Mock() + mocker.patch("optuna.create_study", return_value=mock_study) + + def prob_func(x): + return 0.5 + + Simulator._maximize_prob_reward(prob_func, 200) + mock_study.optimize.assert_called_once() + + +# Optimization fails to converge +def test_optimization_convergence_failure(mocker): + mock_study = mocker.Mock() + mock_study.best_value = None + mocker.patch("optuna.create_study", return_value=mock_study) + + def prob_func(x): + return 0.5 + + with pytest.raises(ValueError): + Simulator._maximize_prob_reward(prob_func, 1) + + +# Test _generate_prob_reward + + +# Returns spline function for single dimension input when second_dimension=0 +@given(first_dimension=st.integers(min_value=1, max_value=10)) +def test_single_dimension_spline(first_dimension): + spline_fn = Simulator._generate_prob_reward(first_dimension=first_dimension) + test_input = np.random.random(first_dimension) + result = spline_fn(test_input) + assert isinstance(result, float) + assert 0 <= result <= 1 + + +# Returns spline function for two dimension inputs when second_dimension>0 +@given(first_dim=st.integers(min_value=1, max_value=5), second_dim=st.integers(min_value=1, max_value=5)) +def test_two_dimension_spline(first_dim, second_dim): + spline_fn = Simulator._generate_prob_reward(first_dimension=first_dim, second_dimension=second_dim) + input1 = np.random.random(first_dim) + input2 = np.random.random(second_dim) + result = spline_fn(input1, input2) + assert isinstance(result, float) + assert 0 <= result <= 1 + + +# Generates n_points random uniform values between -1 and 1 for spline interpolation +def test_random_points_generation(mocker): + random_mock = mocker.patch("numpy.random.uniform") + with pytest.raises(ValueError): + Simulator._generate_prob_reward(first_dimension=1, n_points=5) + random_mock.assert_called_with(-1, 1, 5) + + +# Raises ValueError if spline_degree >= n_points +def test_invalid_spline_degree(): + with pytest.raises(ValueError): + Simulator._generate_prob_reward(first_dimension=1, n_points=3, spline_degree=3) + + +# Validates that n_points is positive integer +@given(n_points=st.integers(max_value=0)) +def test_n_points_validation(n_points): + with pytest.raises(ValueError): + Simulator._generate_prob_reward(first_dimension=1, n_points=n_points) diff --git a/tests/test_smab.py b/tests/test_smab.py index 2c8a34e..71490e4 100644 --- a/tests/test_smab.py +++ b/tests/test_smab.py @@ -20,918 +20,520 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import json from copy import deepcopy -from typing import List +from typing import Any, Dict, List, Optional, Tuple, Type, Union +import numpy as np import pytest -from hypothesis import given +from hypothesis import given, settings from hypothesis import strategies as st +from pydantic.dataclasses import dataclass -from pybandits.base import BinaryReward, Float01 +from pybandits.base import ActionId, Float01 +from pybandits.base_model import BaseModel from pybandits.model import Beta, BetaCC, BetaMO, BetaMOCC -from pybandits.pydantic_version_compatibility import NonNegativeFloat, ValidationError -from pybandits.smab import SmabBernoulli, SmabBernoulliBAI, SmabBernoulliCC, SmabBernoulliMO, SmabBernoulliMOCC +from pybandits.pydantic_version_compatibility import PositiveInt, ValidationError +from pybandits.quantitative_model import QuantitativeModel, SmabZoomingModel, SmabZoomingModelCC +from pybandits.smab import ( + BaseSmabBernoulli, + SmabBernoulli, + SmabBernoulliBAI, + SmabBernoulliCC, + SmabBernoulliMO, + SmabBernoulliMOCC, +) from pybandits.strategy import ( + BestActionIdentificationBandit, ClassicBandit, CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit, ) -from pybandits.utils import to_serializable_dict -from tests.test_utils import is_serializable - - -@pytest.fixture(scope="session") -def n_samples() -> int: - return 1000 - - -######################################################################################################################## - - -# SmabBernoulli with strategy=ClassicBandit() - - -def test_create_smab_bernoulli_cold_start(): - assert SmabBernoulli.cold_start(action_ids={"a1", "a2"}) == SmabBernoulli( - actions={"a1": Beta(), "a2": Beta()}, - ) - - -@given(st.integers(min_value=0, max_value=1), st.integers(min_value=0, max_value=1)) -def test_base_smab_update_ok(r1, r2): - mab = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) - mab.update(actions=["a1", "a2"], rewards=[r1, r2]) - mab.update(actions=["a1", "a1"], rewards=[r1, r2]) - - -def test_can_instantiate_smab(): - with pytest.raises(TypeError): - SmabBernoulli() - with pytest.raises(AttributeError): - SmabBernoulli(actions={}) - with pytest.warns(UserWarning): - SmabBernoulli(actions={"action1": Beta()}) - with pytest.raises(ValidationError): - SmabBernoulli( - actions={ - "action1": None, - "action2": None, - }, - ) - SmabBernoulli( - actions={ - "action1": Beta(), - "action2": Beta(), - }, - strategy=ClassicBandit(), - ) - smab = SmabBernoulli( - actions={ - "action1": Beta(), - "action2": Beta(), - } - ) - - assert smab.actions["action1"] == Beta() - assert smab.actions["action2"] == Beta() - - -@given( - st.integers(min_value=1), - st.integers(min_value=1), -) -def test_can_instantiate_smab_with_params(a, b): - s = SmabBernoulli( - actions={ - "action1": Beta(n_successes=a, n_failures=b), - "action2": Beta(n_successes=a, n_failures=b), - }, - ) - assert (s.actions["action1"].n_successes == a) and (s.actions["action1"].n_failures == b) - assert s.actions["action1"] == s.actions["action2"] - -@given(st.integers(max_value=0)) -def test_smab_predict_raise_when_samples_low(n_samples): - s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) - with pytest.raises(ValidationError): - s.predict(n_samples=n_samples) +@st.composite +def diff_strategy(draw): + return draw(st.integers(min_value=1, max_value=10)) -def test_smab_predict_raise_when_all_actions_forbidden(): - s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) - with pytest.raises(ValueError): - s.predict(n_samples=10, forbidden_actions=["a1", "a2"]) +@st.composite +def cost_strategy(draw, n_actions): + return draw(st.lists(st.floats(min_value=0, max_value=2), min_size=n_actions, max_size=n_actions)) -def test_smab_predict(n_samples: int): - s = SmabBernoulli( - actions={ - "a0": Beta(), - "a1": Beta(n_successes=5, n_failures=5), - "forb_1": Beta(n_successes=10, n_failures=1), - "best": Beta(n_successes=10, n_failures=5), - "forb_2": Beta(n_successes=100, n_failures=4), - "a5": Beta(), - }, - ) - forbidden_actions = set(["forb_1", "forb_2"]) - - best_actions, probs = s.predict(n_samples=n_samples, forbidden_actions=forbidden_actions) - assert ["forb1" not in p.keys() for p in probs], "forbidden actions weren't removed from the output" - - valid_actions = set(s.actions.keys()) - forbidden_actions - for probas, best_action in zip(probs, best_actions): - assert set(probas.keys()) == valid_actions, "restituted actions don't match valid actions" - - best_proba = probas[best_action] - assert best_proba == max(probas.values()), "best action hasn't the best probability" +@pytest.fixture(scope="module") +def monkeymodule(): + with pytest.MonkeyPatch.context() as mp: + yield mp -@given( - st.lists(st.integers(min_value=0, max_value=1), min_size=1), - st.lists(st.integers(min_value=0, max_value=1), min_size=1), -) -def test_smab_update(rewards: List[BinaryReward], rewards_1: List[BinaryReward]): - updated = SmabBernoulli( - actions={ - "a0": Beta(), - "a1": Beta(), - }, - ) - batch_updated = deepcopy(updated) - # update the model sequentially - [updated.update(actions=["a0"], rewards=[reward]) for reward in rewards] - [updated.update(actions=["a1"], rewards=[reward]) for reward in rewards_1] +def mock_update(models: List[BaseModel], diff, monkeymodule, label=0): + for model in models: + for field in model.model_fields: + if field in ("n_successes", "n_failures"): + monkeymodule.setattr(model, field, getattr(model, field) + diff.draw(diff_strategy(), label=f"{label}")) + label += 1 + elif isinstance(sub_models := getattr(model, field), list) and isinstance(sub_models[0], BaseModel): + mock_update(sub_models, diff, monkeymodule, label) - # update the model in batch - batch_updated.update(actions=["a0"] * len(rewards) + ["a1"] * len(rewards_1), rewards=rewards + rewards_1) - assert updated == batch_updated, "update() has different result when each item is applied separately" +@dataclass +class ModelTestConfig: + smab_class: Type + strategy_class: Type + model_types: List[Type[BaseModel]] - sum_failures = sum([1 - x for x in rewards]) - assert updated.actions["a0"] == Beta( - n_successes=1 + sum(rewards), n_failures=1 + sum_failures - ), "Unexpected results in counter" - - sum_failures_1 = sum([1 - x for x in rewards_1]) - assert updated.actions["a1"] == Beta( - n_successes=1 + sum(rewards_1), n_failures=1 + sum_failures_1 - ), "Unexpected results in counter" - - -@given(st.text()) -def test_smab_accepts_only_valid_actions(s): - if s == "": - with pytest.raises(ValidationError): - SmabBernoulli( - actions={ - s: Beta(), - s + "_": Beta(), + def _create_actions( + self, action_ids: List[str], costs: Optional[st.SearchStrategy], n_objectives: Optional[PositiveInt] + ) -> Dict[str, Any]: + if len(self.model_types) < len(action_ids): + indices = np.random.randint(0, len(self.model_types), len(action_ids)) + self.model_types = [self.model_types[i] for i in indices] + if all(model in [BetaCC, SmabZoomingModelCC, BetaMOCC] for model in self.model_types): + # Generate random costs + costs = costs.draw(cost_strategy(n_actions=len(action_ids))) + costs = [ + cost if model_type in [BetaCC, BetaMOCC] else lambda x: x**cost + for cost, model_type in zip(costs, self.model_types) + ] + else: + costs = None + + if n_objectives is None: + if costs is not None: + return { + action_id: model_type(cost=cost) + if issubclass(model_type, BetaCC) + else model_type.cold_start(dimension=1, cost=cost) # SmabZoomingModelCC + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + } + else: + return { + action_id: model_type() + if issubclass(model_type, Beta) + else model_type.cold_start(dimension=1) # SmabZoomingModel + for action_id, model_type in zip(action_ids, self.model_types) + } + else: + if costs is not None: + return { + action_id: model_type(models=[Beta()] * n_objectives, cost=cost) + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + } + else: + return { + action_id: model_type(models=[Beta()] * n_objectives) + for action_id, model_type in zip(action_ids, self.model_types) } - ) - else: - SmabBernoulli(actions={s: Beta(), s + "_": Beta()}) - - -@given(st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=1)) -def test_smab_get_state(a, b, c, d): - actions = {"action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=c, n_failures=d)} - smab = SmabBernoulli(actions=actions) - - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "epsilon": None, - "default_action": None, - } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulli" - assert smab_state == expected_state - - -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ) -) -def test_smab_from_state(state): - smab = SmabBernoulli.from_state(state) - assert isinstance(smab, SmabBernoulli) - - expected_actions = state["actions"] - actual_actions = to_serializable_dict(smab.actions) # Normalize the dict - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=BestActionIdentificationBandit() - - -def test_create_smab_bernoulli_bai(): - # default exploit_p - assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}) == SmabBernoulliBAI( - actions={"a1": Beta(), "a2": Beta()}, - ) - # set exploit_p - assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, exploit_p=0.2) == SmabBernoulliBAI( - actions={"a1": Beta(), "a2": Beta()}, - exploit_p=0.2, - ) - - -def test_can_init_smabbai(): - # init default params - s = SmabBernoulliBAI( - actions={ - "a1": Beta(), - "a2": Beta(), - }, - ) - - assert s.actions["a1"] == Beta() - assert s.actions["a2"] == Beta() - assert s.strategy.exploit_p == 0.5 - - # init input params - s = SmabBernoulliBAI( - actions={ - "a1": Beta(n_successes=1, n_failures=2), - "a2": Beta(n_successes=3, n_failures=4), - }, - exploit_p=0.3, - ) - assert s.actions["a1"] == Beta(n_successes=1, n_failures=2) - assert s.actions["a2"] == Beta(n_successes=3, n_failures=4) - assert s.strategy.exploit_p == 0.3 - - -def test_smabbai_predict(n_samples: int): - s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}) - _, _ = s.predict(n_samples=n_samples) - - -def test_smabbai_update(): - s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}) - s.update(actions=["a1", "a1"], rewards=[1, 0]) - -def test_smabbai_with_betacc(): - # Fails because smab bernoulli with BAI shouldn't support BetaCC - with pytest.raises(ValidationError): - SmabBernoulliBAI( - actions={ - "a1": BetaCC(cost=10), - "a2": BetaCC(cost=20), - }, + def create_smab_and_actions( + self, + action_ids: List[str], + epsilon: Optional[Float01], + costs: st.SearchStrategy, + n_objectives: st.SearchStrategy[PositiveInt], + exploit_p: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + subsidy_factor: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + ) -> Tuple[BaseSmabBernoulli, Dict[ActionId, BaseModel], Dict[str, Any]]: + n_objectives = ( + n_objectives.draw(st.integers(min_value=1, max_value=10)) + if self.smab_class in [SmabBernoulliMO, SmabBernoulliMOCC] + else None ) - - -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.floats(min_value=0, max_value=1), -) -def test_smab_bai_get_state(a, b, c, d, exploit_p: Float01): - actions = {"action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=c, n_failures=d)} - smab = SmabBernoulliBAI(actions=actions, exploit_p=exploit_p) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {"exploit_p": exploit_p}, - "epsilon": None, - "default_action": None, + actions = self._create_actions(action_ids, costs, n_objectives) + default_action = action_ids[0] if epsilon else None + + kwargs = { + k: v + for k, v in { + "epsilon": epsilon, + "default_action": default_action, + }.items() + if v is not None } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliBAI" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" - - + for param, classes in zip(["subsidy_factor", "exploit_p"], [[SmabBernoulliCC], [SmabBernoulliBAI]]): + if self.smab_class in classes: + actual_param = eval(param) + if isinstance(actual_param, float) or actual_param is None: + kwargs[param] = actual_param + else: + kwargs[param] = actual_param.draw(st.floats(min_value=0, max_value=1)) + + smab = self.smab_class(actions=actions, **kwargs) + + # For cold start test + if self.smab_class in [SmabBernoulliMO, SmabBernoulliMOCC]: + kwargs["n_objectives"] = n_objectives + return smab, actions, kwargs + + +TEST_CONFIGS = { + "smab": ModelTestConfig(SmabBernoulli, ClassicBandit, [Beta, SmabZoomingModel]), + "smab_bai": ModelTestConfig(SmabBernoulliBAI, BestActionIdentificationBandit, [Beta, SmabZoomingModel]), + "smab_cc": ModelTestConfig( + SmabBernoulliCC, + CostControlBandit, + [BetaCC, SmabZoomingModelCC], + ), + "smab_mo": ModelTestConfig(SmabBernoulliMO, MultiObjectiveBandit, [BetaMO]), + "smab_mocc": ModelTestConfig(SmabBernoulliMOCC, MultiObjectiveCostControlBandit, [BetaMOCC]), +} + + +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"exploit_p": None}), - st.builds(lambda x: {"exploit_p": x}, st.floats(min_value=0, max_value=1)), - ), - } - ) + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), ) -def test_smab_bai_from_state(state): - smab = SmabBernoulliBAI.from_state(state) - assert isinstance(smab, SmabBernoulliBAI) - - expected_actions = state["actions"] - actual_actions = to_serializable_dict(smab.actions) # Normalize the dict - assert expected_actions == actual_actions - expected_exploit_p = smab.strategy.get_expected_value_from_state(state, "exploit_p") - actual_exploit_p = smab.strategy.exploit_p - assert expected_exploit_p == actual_exploit_p - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=CostControlBandit() - - -def test_create_smab_bernoulli_cc(): - assert SmabBernoulliCC.cold_start( - action_ids_cost={"a1": 10, "a2": 20}, - subsidy_factor=0.2, - ) == SmabBernoulliCC( - actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=20)}, - subsidy_factor=0.2, - ) - - assert SmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20}) == SmabBernoulliCC( - actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=20)}, - ) - - -def test_can_init_smabcc(): - # init default arguments - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(cost=10), - "a2": BetaCC(cost=20), +def test_cold_start( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, +): + # Create SMAB instance + smab, actions, kwargs = config.create_smab_and_actions( + action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor + ) + + # Cold start comparison logic (modified for different model types) + cold_start_kwargs = { + "action_ids": { + action for action, model in zip(action_ids, config.model_types) if issubclass(model, (Beta, BetaMO)) }, - ) - assert s.actions["a1"] == BetaCC(cost=10) - assert s.actions["a2"] == BetaCC(cost=20) - assert s.strategy.subsidy_factor == 0.5 - - # init with input args - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(n_successes=1, n_failures=2, cost=10), - "a2": BetaCC(n_successes=3, n_failures=4, cost=20), - }, - subsidy_factor=0.7, - ) - assert s.actions["a1"] == BetaCC(n_successes=1, n_failures=2, cost=10) - assert s.actions["a2"] == BetaCC(n_successes=3, n_failures=4, cost=20) - assert s.strategy == CostControlBandit(subsidy_factor=0.7) - assert s.strategy.subsidy_factor == 0.7 - - -def test_smabcc_predict(n_samples: int): - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(n_successes=1, n_failures=2, cost=10), - "a2": BetaCC(n_successes=3, n_failures=4, cost=20), + "quantitative_action_ids": { + action for action, model in zip(action_ids, config.model_types) if issubclass(model, QuantitativeModel) }, - subsidy_factor=0.7, - ) - _, _ = s.predict(n_samples=n_samples) - - -def test_smabcc_update(): - s = SmabBernoulliCC(actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=10)}) - s.update(actions=["a1", "a1"], rewards=[1, 0]) - - -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.floats(min_value=0), - st.floats(min_value=0), - st.floats(min_value=0, max_value=1), -) -def test_smab_cc_get_state(a, b, c, d, cost1: NonNegativeFloat, cost2: NonNegativeFloat, subsidy_factor: Float01): - actions = { - "action1": BetaCC(n_successes=a, n_failures=b, cost=cost1), - "action2": BetaCC(n_successes=c, n_failures=d, cost=cost2), } - smab = SmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": { - "subsidy_factor": subsidy_factor, - }, - "epsilon": None, - "default_action": None, + if all(model in [BetaCC, SmabZoomingModelCC, BetaMOCC] for model in config.model_types): + cold_start_kwargs["action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, (BetaCC, BetaMOCC)) } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliCC" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" + cold_start_kwargs["quantitative_action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, SmabZoomingModelCC) + } + cold_start_kwargs.update(kwargs) # Add exploit_p or subsidy_factor if needed + cold_start_kwargs = {k: v for k, v in cold_start_kwargs.items() if v is not None} + assert config.smab_class.cold_start(**cold_start_kwargs) == smab +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - "cost": st.floats(min_value=0), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"subsidy_factor": None}), - st.builds(lambda x: {"subsidy_factor": x}, st.floats(min_value=0, max_value=1)), - ), - } - ) + action_ids=st.lists(st.text(min_size=1), min_size=2, max_size=5, unique=True), + n_objectives=st.data(), + costs=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), ) -def test_smab_cc_from_state(state): - smab = SmabBernoulliCC.from_state(state) - assert isinstance(smab, SmabBernoulliCC) - - expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict - assert expected_actions == actual_actions - expected_subsidy_factor = smab.strategy.get_expected_value_from_state(state, "subsidy_factor") - actual_subsidy_factor = smab.strategy.subsidy_factor - assert expected_subsidy_factor == actual_subsidy_factor - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=MultiObjectiveBandit() - - -@given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) -def test_can_init_smab_mo(a_list): - a, b, c, d, e, f = a_list - - s = SmabBernoulliMO( - actions={ - "a1": BetaMO( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ] - ), - "a2": BetaMO( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ] - ), - }, - ) - assert s.actions["a1"] == BetaMO( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ] - ) - assert s.actions["a2"] == BetaMO( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ] - ) - assert s.strategy == MultiObjectiveBandit() - - -def test_all_actions_must_have_same_number_of_objectives_smab_mo(): - with pytest.raises(ValueError): - SmabBernoulliMO( - actions={ - "a1": BetaMO(counters=[Beta(), Beta()]), - "a2": BetaMO(counters=[Beta(), Beta()]), - "a3": BetaMO(counters=[Beta(), Beta(), Beta()]), - }, - ) - - -def test_smab_mo_predict(n_samples: int, n_objectives=3): - s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=n_objectives) - - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1"] - predicted_actions, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - assert "a1" not in predicted_actions - - forbidden = ["a1", "a2"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a2", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - +def test_bad_initialization( + config: ModelTestConfig, + action_ids: List[str], + n_objectives, + costs, + exploit_p, + subsidy_factor, +): + real_n_objectives = n_objectives.draw(st.integers(min_value=1, max_value=10)) + kwargs = {"cost": 1.0} if config.smab_class in (SmabBernoulliCC, SmabBernoulliMOCC) else {} + if config.smab_class in [SmabBernoulliMO, SmabBernoulliMOCC]: + kwargs["models"] = [Beta() for _ in range(real_n_objectives)] + + # Test empty actions + with pytest.raises(AttributeError): + config.smab_class(actions={}) -def test_smab_mo_update(n_objectives=3): - action_ids = {"a1", "a2"} - mab = SmabBernoulliMO.cold_start(action_ids=action_ids, n_objectives=n_objectives) - assert all([mab.actions[a] == BetaMO.cold_start(n_objectives=n_objectives) for a in action_ids]) + # Test single action (should warn) + single_action = {action_ids[0]: config.model_types[0](**kwargs)} + with pytest.warns(UserWarning): + config.smab_class(actions=single_action) - mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) - assert all([mab.actions[a] != BetaMO.cold_start(n_objectives=n_objectives) for a in set(action_ids)]) + # Test mismatched model types + actions_wrong_type = { + action_ids[0]: Beta(), + action_ids[1]: BetaCC(cost=1.0), + } + with pytest.raises(ValidationError): + config.smab_class(actions=actions_wrong_type) + # Test None actions + with pytest.raises(ValidationError): + config.smab_class(actions={aid: None for aid in action_ids}) -@given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) -def test_smab_mo_get_state(a_list): - a, b, c, d, e, f = a_list + # Test invalid strategy parameters + if config.smab_class == SmabBernoulliBAI: + with pytest.raises(ValidationError): + config.create_smab_and_actions( + action_ids, + None, + costs, + n_objectives, + exploit_p.draw(st.sampled_from([-0.1, 1.1])), + subsidy_factor, + ) + elif config.smab_class == SmabBernoulliCC: + with pytest.raises(ValidationError): + config.create_smab_and_actions( + action_ids, + None, + costs, + n_objectives, + exploit_p, + subsidy_factor.draw(st.sampled_from([-0.1, 1.1])), + ) - actions = { - "a1": BetaMO( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ] - ), - "a2": BetaMO( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ] - ), - } - smab = SmabBernoulliMO(actions=actions) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "epsilon": None, - "default_action": None, + # Test multi-objective specific cases + if hasattr(config.model_types[0], "models"): + # Test mismatched number of objectives + mo_actions_wrong = { + action_ids[0]: BetaMO(models=[Beta() for _ in range(real_n_objectives)]), + action_ids[1]: BetaMO(models=[Beta() for _ in range(real_n_objectives + 1)]), } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliMO" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" + with pytest.raises(AttributeError): + config.smab_class(actions=mo_actions_wrong) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "counters": st.lists( - st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=3, - max_size=3, - ) - } - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ) + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), ) -def test_smab_mo_from_state(state): - smab = SmabBernoulliMO.from_state(state) - assert isinstance(smab, SmabBernoulliMO) - - expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=MultiObjectiveCostControlBandit() - - -@given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_can_init_smab_mo_cc(a_list): - a, b, c, d, e, f, g, h = a_list - - s = SmabBernoulliMOCC( - actions={ - "a1": BetaMOCC( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ], - cost=g, - ), - "a2": BetaMOCC( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ], - cost=h, - ), - }, - ) - assert s.actions["a1"] == BetaMOCC( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ], - cost=g, - ) - assert s.actions["a2"] == BetaMOCC( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ], - cost=h, - ) - assert s.strategy == MultiObjectiveCostControlBandit() - - -def test_all_actions_must_have_same_number_of_objectives_smab_mo_cc(): - with pytest.raises(ValueError): - SmabBernoulliMOCC( - actions={ - "action 1": BetaMOCC(counters=[Beta(), Beta()], cost=1), - "action 2": BetaMOCC(counters=[Beta(), Beta()], cost=1), - "action 3": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=1), - }, - ) - - -def test_smab_mo_cc_predict(n_samples: int): - n_samples = 1000 - - s = SmabBernoulliMOCC.cold_start(action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2) - - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1"] - predicted_actions, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - assert "a1" not in predicted_actions - - forbidden = ["a1", "a2"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a2", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - -def test_smab_mo_cc_update(n_objectives=3): - action_ids_cost = {"a1": 1, "a2": 2} - mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=n_objectives) - assert all( - [ - mab.actions[a] == BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) - for a in action_ids_cost.keys() +def test_update( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, +): + # Create SMAB instance + smab, _, kwargs = config.create_smab_and_actions( + action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor + ) + batched_smab = deepcopy(smab) + n_objectives = kwargs.get("n_objectives") + # Generate random rewards + reward_data = ( + np.random.choice([0, 1], size=(n_samples, n_objectives), replace=True) + if n_objectives + else np.random.choice([0, 1], size=n_samples, replace=True) + ) + reward_data = reward_data.tolist() + # Test updates with generated data + actions_to_update = np.random.choice(np.array(action_ids, dtype=np.object_), size=n_samples, replace=True).tolist() + # Generate quantities only if there are any QuantitativeModel actions + if any(isinstance(model, QuantitativeModel) for model in smab.actions.values()): + quantity_data = np.random.random(size=n_samples).tolist() + quantity_data = [ + q if isinstance(smab.actions[action], QuantitativeModel) else None + for q, action in zip(quantity_data, actions_to_update) ] - ) - - mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) - assert all( [ - mab.actions[a] != BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) - for a in action_ids_cost.keys() + smab.update(actions=[action], rewards=[reward], quantities=[quantity]) + for action, reward, quantity in zip(actions_to_update, reward_data, quantity_data) ] - ) - + else: + quantity_data = None + [smab.update(actions=[action], rewards=[reward]) for action, reward in zip(actions_to_update, reward_data)] + + batched_smab.update(actions=actions_to_update, rewards=reward_data, quantities=quantity_data) + + for action in smab.actions: + if isinstance(smab.actions[action], Beta): + assert smab.actions[action] == batched_smab.actions[action] + relevant_rewards = np.array(reward_data)[[a == action for a in actions_to_update]] + if hasattr(smab.actions[action], "n_successes"): + assert ( + smab.actions[action].n_successes + == batched_smab.actions[action].n_successes + == sum(relevant_rewards) + 1 + ) + assert ( + smab.actions[action].n_failures + == batched_smab.actions[action].n_failures + == sum(1 - relevant_rewards) + 1 + ) -@given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_smab_mo_cc_get_state(a_list): - a, b, c, d, e, f, g, h = a_list - actions = { - "a1": BetaMOCC( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ], - cost=g, - ), - "a2": BetaMOCC( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ], - cost=h, +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, ), - } - smab = SmabBernoulliMOCC(actions=actions) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "epsilon": None, - "default_action": None, - } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliMOCC" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), + diff=st.data(), +) +def test_predict( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, + diff, + monkeymodule, +): + # Create SMAB instance + smab = config.create_smab_and_actions(action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor)[0] + + # Test predictions with random forbidden actions + forbidden = ( + set(np.random.choice(np.array(action_ids, dtype=np.object_), size=len(action_ids) // 2, replace=False)) + if len(action_ids) > 2 + else None + ) + if smab.default_action is not None and forbidden is not None and smab.default_action in forbidden: + forbidden.remove(smab.default_action) + + mock_update(list(smab.actions.values()), diff, monkeymodule) + best_actions, probs = smab.predict(n_samples=n_samples, forbidden_actions=forbidden) + assert len(best_actions) == n_samples + assert len(probs) == n_samples + + if forbidden: + assert all( + len({action[0] if isinstance(action, tuple) else action for action in prob}) + == len(action_ids) - len(forbidden) + for prob in probs + ) + assert all(action[0] if isinstance(action, tuple) else action not in forbidden for action in best_actions) + assert all( + action[0] if isinstance(action, tuple) else action not in forbidden + for prob in probs + for action in prob.keys() + ) + else: + assert all( + len({action[0] if isinstance(action, tuple) else action for action in prob}) == len(action_ids) + for prob in probs + ) + if isinstance(smab, SmabBernoulli) and not epsilon: + assert all(prob[best_action] == max(prob.values()) for best_action, prob in zip(best_actions, probs)) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "counters": st.lists( - st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=3, - max_size=3, - ), - "cost": st.floats(min_value=0), - } - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ) + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), + diff=st.data(), ) -def test_smab_mo_cc_from_state(state): - smab = SmabBernoulliMOCC.from_state(state) - assert isinstance(smab, SmabBernoulliMOCC) - - expected_actions = state["actions"] - actual_actions = to_serializable_dict(smab.actions) # Normalize the dict - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# Smab with epsilon-greedy super strategy +def test_serialization( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, + diff, + monkeymodule, +): + # Create SMAB instance + smab = config.create_smab_and_actions(action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor)[0] + + pre_update_state = smab.get_state() + mock_update(list(smab.actions.values()), diff, monkeymodule) + post_update_state = smab.get_state() + # Verify model updates + assert pre_update_state != post_update_state + + # Test serialization + restored_smab = config.smab_class.from_state(post_update_state[1]) + assert restored_smab == smab @given( st.integers(min_value=1), st.integers(min_value=1), ) -def test_can_instantiate_epsilon_greddy_smab_with_params(a, b): +def test_can_instantiate_smab_with_params(a, b): s = SmabBernoulli( actions={ "action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=a, n_failures=b), }, - epsilon=0.1, - default_action="action1", ) assert (s.actions["action1"].n_successes == a) and (s.actions["action1"].n_failures == b) assert s.actions["action1"] == s.actions["action2"] -def test_epsilon_greedy_smab_predict(n_samples: int): - n_samples = 1000 - - s = SmabBernoulli( - actions={ - "a0": Beta(), - "a1": Beta(n_successes=5, n_failures=5), - "forb_1": Beta(n_successes=10, n_failures=1), - "best": Beta(n_successes=10, n_failures=5), - "forb_2": Beta(n_successes=100, n_failures=4), - "a5": Beta(), - }, - epsilon=0.1, - default_action="a1", - ) - forbidden_actions = set(["forb_1", "forb_2"]) - - _, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden_actions) - - -def test_epsilon_greddy_smabbai_predict(n_samples: int): - n_samples = 1000 - s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}, epsilon=0.1, default_action="a1") - _, _ = s.predict(n_samples=n_samples) - - -def test_epsilon_greddy_smabcc_predict(n_samples: int): - n_samples = 1000 - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(n_successes=1, n_failures=2, cost=10), - "a2": BetaCC(n_successes=3, n_failures=4, cost=20), - }, - subsidy_factor=0.7, - epsilon=0.1, - default_action="a1", - ) - _, _ = s.predict(n_samples=n_samples) - - -def test_epsilon_greddy_smab_mo_predict(n_samples: int): - n_samples = 1000 - - s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=3, epsilon=0.1, default_action="a1") - - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) +@given(st.integers(max_value=0)) +def test_smab_predict_raise_when_samples_low(n_samples): + s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) + with pytest.raises(ValidationError): + s.predict(n_samples=n_samples) -def test_epsilon_greddy_smab_mo_cc_predict(n_samples: int): - n_samples = 1000 +def test_smab_predict_raise_when_all_actions_forbidden(): + s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) + with pytest.raises(ValueError): + s.predict(n_samples=10, forbidden_actions=["a1", "a2"]) - s = SmabBernoulliMOCC.cold_start( - action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2, epsilon=0.1, default_action="a1" - ) - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) +@given(st.text()) +def test_smab_accepts_only_valid_actions(s): + if s == "": + with pytest.raises(ValidationError): + SmabBernoulli( + actions={ + s: Beta(), + s + "_": Beta(), + } + ) + else: + SmabBernoulli(actions={s: Beta(), s + "_": Beta()}) diff --git a/tests/test_smab_simulator.py b/tests/test_smab_simulator.py index 912051d..23ab584 100644 --- a/tests/test_smab_simulator.py +++ b/tests/test_smab_simulator.py @@ -24,29 +24,41 @@ from tempfile import TemporaryDirectory import numpy as np -import pandas as pd import pytest from hypothesis import given, settings from hypothesis import strategies as st from pytest_mock import MockerFixture from pybandits.model import Beta +from pybandits.quantitative_model import SmabZoomingModel from pybandits.smab import SmabBernoulli from pybandits.smab_simulator import SmabSimulator +@pytest.fixture(scope="module") +def monkeymodule(): + with pytest.MonkeyPatch.context() as mp: + yield mp + + def test_mismatched_probs_reward_columns(mocker: MockerFixture): smab = mocker.Mock(spec=SmabBernoulli) smab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()} smab.epsilon = 0.0 smab.default_action = None - probs_reward = pd.DataFrame({"a1": [0.5, 0.5], "a2": [0.5, 0.5]}) + probs_reward = {str(i): {"a1": 0.5, "a2": 0.5} for i in range(2)} with pytest.raises(ValueError): SmabSimulator(mab=smab, probs_reward=probs_reward) -def test_smab_e2e_simulation_with_default_args(action_ids=["a1", "a2"]): - mab = SmabBernoulli(actions={action_id: Beta() for action_id in action_ids}) +@settings(deadline=None) +@given( + action_ids=st.just(["a1", "a2"]), + models=st.lists(st.sampled_from([Beta(), SmabZoomingModel.cold_start()]), min_size=2, max_size=2), +) +def test_smab_e2e_simulation_with_default_args(action_ids, models, monkeymodule): + monkeymodule.setattr(SmabSimulator, "_maximize_prob_reward", lambda *args, **kwargs: np.random.random()) + mab = SmabBernoulli(actions=dict(zip(action_ids, models))) with TemporaryDirectory() as path: simulator = SmabSimulator(mab=mab, visualize=True, save=True, path=path) simulator.run() @@ -60,20 +72,25 @@ def test_smab_e2e_simulation_with_default_args(action_ids=["a1", "a2"]): @settings(deadline=1000) @given( - st.just(["a1", "a2"]), - st.integers(min_value=1, max_value=10), - st.integers(min_value=1, max_value=10), - st.booleans(), - st.sampled_from([None, 0, 42]), - st.booleans(), - st.booleans(), - st.sampled_from(["", "unit_test"]), + action_ids=st.just(["a1", "a2"]), + models=st.lists(st.sampled_from([Beta(), SmabZoomingModel.cold_start()]), min_size=2, max_size=2), + n_updates=st.integers(min_value=1, max_value=10), + batch_size=st.integers(min_value=1, max_value=10), + save=st.booleans(), + random_seed=st.sampled_from([None, 0, 42]), + verbose=st.booleans(), + visualize=st.booleans(), + file_prefix=st.sampled_from(["", "unit_test"]), ) def test_smab_e2e_simulation_with_non_default_args( - action_ids, n_updates, batch_size, save, random_seed, verbose, visualize, file_prefix + action_ids, models, n_updates, batch_size, save, random_seed, verbose, visualize, file_prefix, monkeymodule ): - probs_reward = pd.DataFrame(np.random.uniform(0, 1, (1, len(action_ids))), columns=action_ids) - mab = SmabBernoulli.cold_start(action_ids=action_ids) + monkeymodule.setattr( + SmabSimulator, + "_maximize_prob_reward", + lambda *args, **kwargs: np.random.random(), + ) + mab = SmabBernoulli(actions=dict(zip(action_ids, models))) if visualize and not save: with pytest.raises(ValueError): SmabSimulator( @@ -83,7 +100,7 @@ def test_smab_e2e_simulation_with_non_default_args( n_updates=n_updates, batch_size=batch_size, random_seed=random_seed, - probs_reward=probs_reward, + probs_reward=None, verbose=verbose, file_prefix=file_prefix, ) @@ -97,7 +114,7 @@ def test_smab_e2e_simulation_with_non_default_args( n_updates=n_updates, batch_size=batch_size, random_seed=random_seed, - probs_reward=probs_reward, + probs_reward=None, verbose=verbose, file_prefix=file_prefix, ) diff --git a/tests/test_strategy.py b/tests/test_strategy.py index 8773fe1..7ecc7c9 100644 --- a/tests/test_strategy.py +++ b/tests/test_strategy.py @@ -250,7 +250,7 @@ def test_select_action_logic_corner_cases(a_list_p, a_list_cost): # if cost factor is 0: mutated_c = c.with_subsidy_factor(subsidy_factor=0) - # get the keys of the max p.values() (there might be more max_p_values) + # get the keys of the max p.quantities() (there might be more max_p_values) max_p_values = [k for k, v in p.items() if v == max(p.values())] # if cost factor is 0 and only 1 max_value => return the action with highest p (classic bandit) @@ -351,11 +351,11 @@ def test_select_action_mo_cc(): m = MultiObjectiveCostControlBandit() actions = { - "a1": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=8), - "a2": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=2), - "a3": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=5), - "a4": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=1), - "a5": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=7), + "a1": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=8), + "a2": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2), + "a3": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=5), + "a4": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=1), + "a5": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=7), } p = { "a1": [0.1, 0.3, 0.5], @@ -369,9 +369,9 @@ def test_select_action_mo_cc(): assert m.select_action(p=p, actions=actions) == "a4" actions = { - "a1": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=2), - "a2": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=2), - "a3": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=5), + "a1": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2), + "a2": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2), + "a3": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=5), } p = { "a1": [0.6, 0.1, 0.1], diff --git a/tests/test_utils.py b/tests/test_utils.py index b015cdb..4706381 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,20 @@ -import json +from typing import Dict, get_args +import numpy as np -def is_serializable(something) -> bool: - try: - json.dumps(something) - return True - except Exception: - return False +from pybandits.base import PyBanditsBaseModel +from pybandits.model import UpdateMethods +from pybandits.pydantic_version_compatibility import PositiveInt + +literal_update_methods = get_args(UpdateMethods) + + +class FakeApproximation(PyBanditsBaseModel): + n_draws: PositiveInt = 10 + n_features: PositiveInt + + def sample(self, *args, **kwargs) -> Dict[str, np.ndarray]: + return { + "alpha": np.random.random(size=(1, self.n_draws)), + "betas": np.random.random(size=(self.n_features, self.n_draws)), + }